casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ISMBucket.h
Go to the documentation of this file.
00001 //# ISMBucket.h: A bucket in the Incremental Storage Manager
00002 //# Copyright (C) 1996,1999,2000,2001
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: ISMBucket.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
00027 
00028 #ifndef TABLES_ISMBUCKET_H
00029 #define TABLES_ISMBUCKET_H
00030 
00031 //# Includes
00032 #include <casa/aips.h>
00033 #include <casa/Containers/Block.h>
00034 #include <casa/BasicSL/String.h>
00035 #include <casa/iosfwd.h>
00036 
00037 namespace casa { //# NAMESPACE CASA - BEGIN
00038 
00039 //# Forward declarations
00040 class ISMBase;
00041 
00042 // <summary>
00043 // A bucket in the Incremental Storage Manager
00044 // </summary>
00045 
00046 // <use visibility=local>
00047 
00048 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
00049 // </reviewed>
00050 
00051 // <prerequisite>
00052 //# Classes you should understand before using this one.
00053 //   <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto>
00054 //   <li> <linkto class=BucketCache>BucketCache</linkto>
00055 // </prerequisite>
00056 
00057 // <etymology>
00058 // ISMBucket represents a bucket in the Incremental Storage Manager.
00059 // </etymology>
00060 
00061 // <synopsis>
00062 // The Incremental Storage Manager uses a <linkto class=BucketCache>
00063 // BucketCache</linkto> object to read/write/cache the buckets
00064 // containing the data. An <src>ISMBucket</src> object is the
00065 // internal representation of the contents of a bucket. <src>ISMBucket</src>
00066 // contains static callback functions which are called by
00067 // <src>BucketCache</src> when reading/writing a bucket. These callback
00068 // functions do the mapping of bucket data to <src>ISMBucket</src> object
00069 // and vice-versa.
00070 // <p>
00071 // A bucket contains the values of several rows
00072 // of all columns bound to this Incremental Storage Manager.
00073 // A bucket is split into a data part and an index part.
00074 // Each part has an arbitrary length but together they do not exceed
00075 // the fixed bucket length.
00076 // <p>
00077 // The beginning of the data part contains the values of all columns
00078 // bound. The remainder of the data part contains the values of
00079 // the rows/columns with a changed value.
00080 // <br>
00081 // The index part contains an index per column. Each index contains the
00082 // row number and an offset for a row with a stored value. The row numbers
00083 // are relative to the beginning of the bucket, so the bucket has
00084 // no knowledge about the absolute row numbers. In this way deletion of
00085 // rows is much simpler.
00086 // <p>
00087 // The contents of a bucket looks like:
00088 // <srcblock>
00089 //    -------------------------------------------------------------------
00090 //    | index offset   | data part     | index part              | free |
00091 //    -------------------------------------------------------------------
00092 //     0                4               4+length(data part)
00093 //    <--------------------------bucketsize----------------------------->
00094 // </srcblock>
00095 // The data part contains all data value belonging to the bucket.
00096 // The index part contains for each column the following data:
00097 // <srcblock>
00098 //    -----------------------------------------------------------------------
00099 //    | #values stored | row numbers of values | offset in data part of     |
00100 //    | for column i   | stored for column i   | values stored for column i |
00101 //    -----------------------------------------------------------------------
00102 //     0                4                       4+4*nrval
00103 // </srcblock>
00104 // Note that the row numbers in the bucket start at 0, thus are relative
00105 // to the beginning of the bucket. The main index kept in
00106 // <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of
00107 // each bucket. In this way bucket splitiing and especially row removal
00108 // is much easier.
00109 // <p>
00110 // The bucket can be stored in canonical or local (i.e. native) data format.
00111 // When a bucket is read into memory, its data are read, converted, and
00112 // stored in the ISMBucket object. When flushed, the contents are
00113 // written. ISMBucket takes care that the values stored in its object
00114 // do not exceed the size of the bucket. When full, the user can call
00115 // a function to split it into a left and right bucket. When the new
00116 // value has to be written at the end, the split merely consist of
00117 // creating a new bucket. In any case, care is taken that a row is
00118 // not split. Thus a row is always entirely contained in one bucket.
00119 // <p>
00120 // Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual
00121 // writing of data in a bucket and uses the relevant ISMBucket functions.
00122 
00123 // <motivation>
00124 // ISMBucket encapsulates the data of a bucket.
00125 // </motivation>
00126 
00127 //# <todo asof="$DATE:$">
00128 //# A List of bugs, limitations, extensions or planned refinements.
00129 //# </todo>
00130 
00131 
00132 class ISMBucket
00133 {
00134 public:
00135 
00136     // Create a bucket with the given parent.
00137     // When <src>bucketStorage</src> is non-zero, reconstruct the
00138     // object from it.
00139     // It keeps the pointer to its parent (but does not own it).
00140     ISMBucket (ISMBase* parent, const char* bucketStorage);
00141 
00142     ~ISMBucket();
00143 
00144     // Get the row-interval for given column and row.
00145     // It sets the start and end of the interval to which the row belongs
00146     // and the offset of its current value.
00147     // It returns the index where the row number can be put in the
00148     // bucket index.
00149     uInt getInterval (uInt colnr, uInt rownr, uInt bucketNrrow,
00150                       uInt& start, uInt& end, uInt& offset) const;
00151 
00152     // Is the bucket large enough to add a value?
00153     Bool canAddData (uInt leng) const;
00154 
00155     // Add the data to the data part.
00156     // It updates the bucket index at the given index.
00157     // An exception is thrown if the bucket is too small.
00158     void addData (uInt colnr, uInt rownr, uInt index,
00159                   const char* data, uInt leng);
00160 
00161     // Is the bucket large enough to replace a value?
00162     Bool canReplaceData (uInt newLeng, uInt oldLeng) const;
00163 
00164     // Replace a data item.
00165     // When its length is variable (indicated by fixedLength=0), the old
00166     // value will be removed and the new one appended at the end.
00167     // An exception is thrown if the bucket is too small.
00168     void replaceData (uInt& offset, const char* data, uInt newLeng,
00169                       uInt fixedLength);
00170 
00171     // Get a pointer to the data for the given offset.
00172     const char* get (uInt offset) const;
00173 
00174     // Get the length of the data value.
00175     // It is <src>fixedLength</src> when non-zero,
00176     // otherwise read it from the data value.
00177     uInt getLength (uInt fixedLength, const char* data) const;
00178 
00179     // Get access to the offset of the data for given column and row.
00180     // It allows to change it (used for example by replaceData).
00181     uInt& getOffset (uInt colnr, uInt rownr);
00182 
00183     // Get access to the index information for the given column.
00184     // This is used by ISMColumn when putting the data.
00185     // <group>
00186     // Return the row numbers with a stored value.
00187     Block<uInt>& rowIndex (uInt colnr);
00188     // Return the offsets of the values stored in the data part.
00189     Block<uInt>& offIndex (uInt colnr);
00190     // Return the number of values stored.
00191     uInt& indexUsed (uInt colnr);
00192     // </group>
00193 
00194     // Split the bucket in the middle.
00195     // It returns the row number where the bucket was split and the
00196     // new left and right bucket. The caller is responsible for
00197     // deleting the newly created buckets.
00198     // When possible a simple split is done.
00199     // <br>
00200     // The starting values in the right bucket may be copies of the
00201     // values in the left bucket. The duplicated Block contains a switch
00202     // per column indicating if the value is copied.
00203     uInt split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated,
00204                 uInt bucketStartRow, uInt bucketNrrow,
00205                 uInt colnr, uInt rownr, uInt lengToAdd);
00206 
00207     // Determine whether a simple split is possible. If so, do it.
00208     // This is possible if the new row is at the end of the last bucket,
00209     // which will often be the case.
00210     // <br>A simple split means adding a new bucket for the new row.
00211     // If the old bucket already contains values for that row, those
00212     // values are moved to the new bucket.
00213     // <br>This fuction is only called by split, which created the
00214     // left and right bucket.
00215     Bool simpleSplit (ISMBucket* left, ISMBucket* right,
00216                       Block<Bool>& duplicated,
00217                       uInt& splitRownr, uInt rownr);
00218 
00219     // Return the index where the bucket should be split to get
00220     // two parts with almost identical length.
00221     uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng,
00222                    const Block<uInt>& cumLeng);
00223 
00224     // Remove <src>nr</src> items from data and index part by shifting
00225     // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and
00226     // <src>nused</src> get updated. The caller is responsible for
00227     // removing data when needed (e.g. <src>ISMIndColumn</src> removes
00228     // the indirect arrays from its file).
00229     void shiftLeft (uInt index, uInt nr, Block<uInt>& rowIndex,
00230                     Block<uInt>& offIndex, uInt& nused, uInt leng);
00231 
00232     // Copy the contents of that bucket to this bucket.
00233     // This is used after a split operation.
00234     void copy (const ISMBucket& that);
00235 
00236     // Callback function when BucketCache reads a bucket.
00237     // It creates an ISMBucket object and converts the raw bucketStorage
00238     // to that object.
00239     // It returns the pointer to ISMBucket object which gets part of the cache.
00240     // The object gets deleted by the deleteCallBack function.
00241     static char* readCallBack (void* owner, const char* bucketStorage);
00242 
00243     // Callback function when BucketCache writes a bucket.
00244     // It converts the ISMBucket bucket object to the raw bucketStorage.
00245     static void writeCallBack (void* owner, char* bucketStorage,
00246                                const char* bucket);
00247 
00248     // Callback function when BucketCache adds a new bucket to the data file.
00249     // This function creates an empty ISMBucket object.
00250     // It returns the pointer to ISMBucket object which gets part of the cache.
00251     // The object gets deleted by the deleteCallBack function.
00252     static char* initCallBack (void* owner);
00253 
00254     // Callback function when BucketCache removes a bucket from the cache.
00255     // This function dletes the ISMBucket bucket object.
00256     static void deleteCallBack (void*, char* bucket);
00257 
00258     // Show the layout of the bucket.
00259     void show (ostream& os) const;
00260 
00261 private:
00262     // Forbid copy constructor.
00263     ISMBucket (const ISMBucket&);
00264 
00265     // Forbid assignment.
00266     ISMBucket& operator= (const ISMBucket&);
00267 
00268     // Remove a data item with the given length.
00269     // If the length is zero, its variable length is read first.
00270     void removeData (uInt offset, uInt leng);
00271 
00272     // Insert a data value by appending it to the end.
00273     // It returns the offset of the data value.
00274     uInt insertData (const char* data, uInt leng);
00275 
00276     // Copy a data item from this bucket to the other bucket.
00277     uInt copyData (ISMBucket& other, uInt colnr, uInt toRownr,
00278                    uInt fromIndex, uInt toIndex) const;
00279 
00280     // Read the data from the storage into this bucket.
00281     void read (const char* bucketStorage);
00282 
00283     // Write the bucket into the storage.
00284     void write (char* bucketStorage) const;
00285 
00286 
00287     //# Declare member variables.
00288     // Pointer to the parent storage manager.
00289     ISMBase*          stmanPtr_p;
00290     // The size (in bytes) of an uInt (used in index, etc.).
00291     uInt              uIntSize_p;
00292     // The size (in bytes) of the data.
00293     uInt              dataLeng_p;
00294     // The size (in bytes) of the index.
00295     uInt              indexLeng_p;
00296     // The row index per column; each index contains the row number
00297     // of each value stored in the bucket (for that column).
00298     PtrBlock<Block<uInt>*> rowIndex_p;
00299     // The offset index per column; each index contains the offset (in bytes)
00300     // of each value stored in the bucket (for that column).
00301     PtrBlock<Block<uInt>*> offIndex_p;
00302     // Nr of used elements in each index; i.e. the number of stored values
00303     // per column.
00304     Block<uInt>       indexUsed_p;
00305     // The data space (in external (e.g. canonical) format).
00306     char*             data_p;
00307 };
00308 
00309 
00310 inline const char* ISMBucket::get (uInt offset) const
00311 {
00312     return data_p + offset;
00313 }
00314 inline Block<uInt>& ISMBucket::rowIndex (uInt colnr)
00315 {
00316     return *(rowIndex_p[colnr]);
00317 }
00318 inline Block<uInt>& ISMBucket::offIndex (uInt colnr)
00319 {
00320     return *(offIndex_p[colnr]);
00321 }
00322 inline uInt& ISMBucket::indexUsed (uInt colnr)
00323 {
00324     return indexUsed_p[colnr];
00325 }
00326 
00327 
00328 
00329 } //# NAMESPACE CASA - END
00330 
00331 #endif