casa
$Rev:20696$
|
00001 //# ISMBucket.h: A bucket in the Incremental Storage Manager 00002 //# Copyright (C) 1996,1999,2000,2001 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: ISMBucket.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $ 00027 00028 #ifndef TABLES_ISMBUCKET_H 00029 #define TABLES_ISMBUCKET_H 00030 00031 //# Includes 00032 #include <casa/aips.h> 00033 #include <casa/Containers/Block.h> 00034 #include <casa/BasicSL/String.h> 00035 #include <casa/iosfwd.h> 00036 00037 namespace casa { //# NAMESPACE CASA - BEGIN 00038 00039 //# Forward declarations 00040 class ISMBase; 00041 00042 // <summary> 00043 // A bucket in the Incremental Storage Manager 00044 // </summary> 00045 00046 // <use visibility=local> 00047 00048 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests=""> 00049 // </reviewed> 00050 00051 // <prerequisite> 00052 //# Classes you should understand before using this one. 00053 // <li> <linkto class=IncrementalStMan>IncrementalStMan</linkto> 00054 // <li> <linkto class=BucketCache>BucketCache</linkto> 00055 // </prerequisite> 00056 00057 // <etymology> 00058 // ISMBucket represents a bucket in the Incremental Storage Manager. 00059 // </etymology> 00060 00061 // <synopsis> 00062 // The Incremental Storage Manager uses a <linkto class=BucketCache> 00063 // BucketCache</linkto> object to read/write/cache the buckets 00064 // containing the data. An <src>ISMBucket</src> object is the 00065 // internal representation of the contents of a bucket. <src>ISMBucket</src> 00066 // contains static callback functions which are called by 00067 // <src>BucketCache</src> when reading/writing a bucket. These callback 00068 // functions do the mapping of bucket data to <src>ISMBucket</src> object 00069 // and vice-versa. 00070 // <p> 00071 // A bucket contains the values of several rows 00072 // of all columns bound to this Incremental Storage Manager. 00073 // A bucket is split into a data part and an index part. 00074 // Each part has an arbitrary length but together they do not exceed 00075 // the fixed bucket length. 00076 // <p> 00077 // The beginning of the data part contains the values of all columns 00078 // bound. The remainder of the data part contains the values of 00079 // the rows/columns with a changed value. 00080 // <br> 00081 // The index part contains an index per column. Each index contains the 00082 // row number and an offset for a row with a stored value. The row numbers 00083 // are relative to the beginning of the bucket, so the bucket has 00084 // no knowledge about the absolute row numbers. In this way deletion of 00085 // rows is much simpler. 00086 // <p> 00087 // The contents of a bucket looks like: 00088 // <srcblock> 00089 // ------------------------------------------------------------------- 00090 // | index offset | data part | index part | free | 00091 // ------------------------------------------------------------------- 00092 // 0 4 4+length(data part) 00093 // <--------------------------bucketsize-----------------------------> 00094 // </srcblock> 00095 // The data part contains all data value belonging to the bucket. 00096 // The index part contains for each column the following data: 00097 // <srcblock> 00098 // ----------------------------------------------------------------------- 00099 // | #values stored | row numbers of values | offset in data part of | 00100 // | for column i | stored for column i | values stored for column i | 00101 // ----------------------------------------------------------------------- 00102 // 0 4 4+4*nrval 00103 // </srcblock> 00104 // Note that the row numbers in the bucket start at 0, thus are relative 00105 // to the beginning of the bucket. The main index kept in 00106 // <linkto class=ISMIndex>ISMIndex</linkto> knows the starting row of 00107 // each bucket. In this way bucket splitiing and especially row removal 00108 // is much easier. 00109 // <p> 00110 // The bucket can be stored in canonical or local (i.e. native) data format. 00111 // When a bucket is read into memory, its data are read, converted, and 00112 // stored in the ISMBucket object. When flushed, the contents are 00113 // written. ISMBucket takes care that the values stored in its object 00114 // do not exceed the size of the bucket. When full, the user can call 00115 // a function to split it into a left and right bucket. When the new 00116 // value has to be written at the end, the split merely consist of 00117 // creating a new bucket. In any case, care is taken that a row is 00118 // not split. Thus a row is always entirely contained in one bucket. 00119 // <p> 00120 // Class <linkto class=ISMColumn>ISMColumn</linkto> does the actual 00121 // writing of data in a bucket and uses the relevant ISMBucket functions. 00122 00123 // <motivation> 00124 // ISMBucket encapsulates the data of a bucket. 00125 // </motivation> 00126 00127 //# <todo asof="$DATE:$"> 00128 //# A List of bugs, limitations, extensions or planned refinements. 00129 //# </todo> 00130 00131 00132 class ISMBucket 00133 { 00134 public: 00135 00136 // Create a bucket with the given parent. 00137 // When <src>bucketStorage</src> is non-zero, reconstruct the 00138 // object from it. 00139 // It keeps the pointer to its parent (but does not own it). 00140 ISMBucket (ISMBase* parent, const char* bucketStorage); 00141 00142 ~ISMBucket(); 00143 00144 // Get the row-interval for given column and row. 00145 // It sets the start and end of the interval to which the row belongs 00146 // and the offset of its current value. 00147 // It returns the index where the row number can be put in the 00148 // bucket index. 00149 uInt getInterval (uInt colnr, uInt rownr, uInt bucketNrrow, 00150 uInt& start, uInt& end, uInt& offset) const; 00151 00152 // Is the bucket large enough to add a value? 00153 Bool canAddData (uInt leng) const; 00154 00155 // Add the data to the data part. 00156 // It updates the bucket index at the given index. 00157 // An exception is thrown if the bucket is too small. 00158 void addData (uInt colnr, uInt rownr, uInt index, 00159 const char* data, uInt leng); 00160 00161 // Is the bucket large enough to replace a value? 00162 Bool canReplaceData (uInt newLeng, uInt oldLeng) const; 00163 00164 // Replace a data item. 00165 // When its length is variable (indicated by fixedLength=0), the old 00166 // value will be removed and the new one appended at the end. 00167 // An exception is thrown if the bucket is too small. 00168 void replaceData (uInt& offset, const char* data, uInt newLeng, 00169 uInt fixedLength); 00170 00171 // Get a pointer to the data for the given offset. 00172 const char* get (uInt offset) const; 00173 00174 // Get the length of the data value. 00175 // It is <src>fixedLength</src> when non-zero, 00176 // otherwise read it from the data value. 00177 uInt getLength (uInt fixedLength, const char* data) const; 00178 00179 // Get access to the offset of the data for given column and row. 00180 // It allows to change it (used for example by replaceData). 00181 uInt& getOffset (uInt colnr, uInt rownr); 00182 00183 // Get access to the index information for the given column. 00184 // This is used by ISMColumn when putting the data. 00185 // <group> 00186 // Return the row numbers with a stored value. 00187 Block<uInt>& rowIndex (uInt colnr); 00188 // Return the offsets of the values stored in the data part. 00189 Block<uInt>& offIndex (uInt colnr); 00190 // Return the number of values stored. 00191 uInt& indexUsed (uInt colnr); 00192 // </group> 00193 00194 // Split the bucket in the middle. 00195 // It returns the row number where the bucket was split and the 00196 // new left and right bucket. The caller is responsible for 00197 // deleting the newly created buckets. 00198 // When possible a simple split is done. 00199 // <br> 00200 // The starting values in the right bucket may be copies of the 00201 // values in the left bucket. The duplicated Block contains a switch 00202 // per column indicating if the value is copied. 00203 uInt split (ISMBucket*& left, ISMBucket*& right, Block<Bool>& duplicated, 00204 uInt bucketStartRow, uInt bucketNrrow, 00205 uInt colnr, uInt rownr, uInt lengToAdd); 00206 00207 // Determine whether a simple split is possible. If so, do it. 00208 // This is possible if the new row is at the end of the last bucket, 00209 // which will often be the case. 00210 // <br>A simple split means adding a new bucket for the new row. 00211 // If the old bucket already contains values for that row, those 00212 // values are moved to the new bucket. 00213 // <br>This fuction is only called by split, which created the 00214 // left and right bucket. 00215 Bool simpleSplit (ISMBucket* left, ISMBucket* right, 00216 Block<Bool>& duplicated, 00217 uInt& splitRownr, uInt rownr); 00218 00219 // Return the index where the bucket should be split to get 00220 // two parts with almost identical length. 00221 uInt getSplit (uInt totLeng, const Block<uInt>& rowLeng, 00222 const Block<uInt>& cumLeng); 00223 00224 // Remove <src>nr</src> items from data and index part by shifting 00225 // to the left. The <src>rowIndex</src>, <src>offIndex</src>, and 00226 // <src>nused</src> get updated. The caller is responsible for 00227 // removing data when needed (e.g. <src>ISMIndColumn</src> removes 00228 // the indirect arrays from its file). 00229 void shiftLeft (uInt index, uInt nr, Block<uInt>& rowIndex, 00230 Block<uInt>& offIndex, uInt& nused, uInt leng); 00231 00232 // Copy the contents of that bucket to this bucket. 00233 // This is used after a split operation. 00234 void copy (const ISMBucket& that); 00235 00236 // Callback function when BucketCache reads a bucket. 00237 // It creates an ISMBucket object and converts the raw bucketStorage 00238 // to that object. 00239 // It returns the pointer to ISMBucket object which gets part of the cache. 00240 // The object gets deleted by the deleteCallBack function. 00241 static char* readCallBack (void* owner, const char* bucketStorage); 00242 00243 // Callback function when BucketCache writes a bucket. 00244 // It converts the ISMBucket bucket object to the raw bucketStorage. 00245 static void writeCallBack (void* owner, char* bucketStorage, 00246 const char* bucket); 00247 00248 // Callback function when BucketCache adds a new bucket to the data file. 00249 // This function creates an empty ISMBucket object. 00250 // It returns the pointer to ISMBucket object which gets part of the cache. 00251 // The object gets deleted by the deleteCallBack function. 00252 static char* initCallBack (void* owner); 00253 00254 // Callback function when BucketCache removes a bucket from the cache. 00255 // This function dletes the ISMBucket bucket object. 00256 static void deleteCallBack (void*, char* bucket); 00257 00258 // Show the layout of the bucket. 00259 void show (ostream& os) const; 00260 00261 private: 00262 // Forbid copy constructor. 00263 ISMBucket (const ISMBucket&); 00264 00265 // Forbid assignment. 00266 ISMBucket& operator= (const ISMBucket&); 00267 00268 // Remove a data item with the given length. 00269 // If the length is zero, its variable length is read first. 00270 void removeData (uInt offset, uInt leng); 00271 00272 // Insert a data value by appending it to the end. 00273 // It returns the offset of the data value. 00274 uInt insertData (const char* data, uInt leng); 00275 00276 // Copy a data item from this bucket to the other bucket. 00277 uInt copyData (ISMBucket& other, uInt colnr, uInt toRownr, 00278 uInt fromIndex, uInt toIndex) const; 00279 00280 // Read the data from the storage into this bucket. 00281 void read (const char* bucketStorage); 00282 00283 // Write the bucket into the storage. 00284 void write (char* bucketStorage) const; 00285 00286 00287 //# Declare member variables. 00288 // Pointer to the parent storage manager. 00289 ISMBase* stmanPtr_p; 00290 // The size (in bytes) of an uInt (used in index, etc.). 00291 uInt uIntSize_p; 00292 // The size (in bytes) of the data. 00293 uInt dataLeng_p; 00294 // The size (in bytes) of the index. 00295 uInt indexLeng_p; 00296 // The row index per column; each index contains the row number 00297 // of each value stored in the bucket (for that column). 00298 PtrBlock<Block<uInt>*> rowIndex_p; 00299 // The offset index per column; each index contains the offset (in bytes) 00300 // of each value stored in the bucket (for that column). 00301 PtrBlock<Block<uInt>*> offIndex_p; 00302 // Nr of used elements in each index; i.e. the number of stored values 00303 // per column. 00304 Block<uInt> indexUsed_p; 00305 // The data space (in external (e.g. canonical) format). 00306 char* data_p; 00307 }; 00308 00309 00310 inline const char* ISMBucket::get (uInt offset) const 00311 { 00312 return data_p + offset; 00313 } 00314 inline Block<uInt>& ISMBucket::rowIndex (uInt colnr) 00315 { 00316 return *(rowIndex_p[colnr]); 00317 } 00318 inline Block<uInt>& ISMBucket::offIndex (uInt colnr) 00319 { 00320 return *(offIndex_p[colnr]); 00321 } 00322 inline uInt& ISMBucket::indexUsed (uInt colnr) 00323 { 00324 return indexUsed_p[colnr]; 00325 } 00326 00327 00328 00329 } //# NAMESPACE CASA - END 00330 00331 #endif