casa
$Rev:20696$
|
00001 //# SSMBase.h: Base class of the Standard Storage Manager 00002 //# Copyright (C) 2000,2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: SSMBase.h 21014 2011-01-06 08:57:49Z gervandiepen $ 00027 00028 #ifndef TABLES_SSMBASE_H 00029 #define TABLES_SSMBASE_H 00030 00031 00032 //# Includes 00033 #include <casa/aips.h> 00034 #include <tables/Tables/DataManager.h> 00035 #include <casa/Containers/Block.h> 00036 00037 namespace casa { //# NAMESPACE CASA - BEGIN 00038 00039 //# Forward declarations 00040 class BucketCache; 00041 class BucketFile; 00042 class StManArrayFile; 00043 class SSMIndex; 00044 class SSMColumn; 00045 class SSMStringHandler; 00046 00047 // <summary> 00048 // Base class of the Standard Storage Manager 00049 // </summary> 00050 00051 // <use visibility=local> 00052 00053 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tStandardStMan.cc"> 00054 // </reviewed> 00055 00056 // <prerequisite> 00057 //# Classes you should understand before using this one. 00058 // <li> <linkto class=StandardStMan>StandardStMan</linkto> 00059 // <li> <linkto class=SSMColumn>SSMColumn</linkto> 00060 // </prerequisite> 00061 00062 // <etymology> 00063 // SSMBase is the base class of the Standard Storage Manager. 00064 // </etymology> 00065 00066 // <synopsis> 00067 // The global principles of this class are described in 00068 // <linkto class="StandardStMan:description">StandardStMan</linkto>. 00069 // <p> 00070 // The Standard Storage Manager divides the data file in equally sized 00071 // chunks called buckets. There are 3 types of buckets: 00072 // <ul> 00073 // <li> Data buckets containing the fixed length data (scalars and 00074 // direct arrays of data type Int, Float, Bool, etc.). 00075 // For variable shaped data (strings and indirect arrays) they 00076 // contain references to the actual data position in the 00077 // string buckets or in an external file. 00078 // <li> String buckets containing strings and array of strings. 00079 // <li> Index buckets containing the index info for the data buckets. 00080 // </ul> 00081 // Bucket access is handled by class 00082 // <linkto class=BucketCache>BucketCache</linkto>. 00083 // It also keeps a list of free buckets. A bucket is freed when it is 00084 // not needed anymore (e.g. all data from it are deleted). 00085 // <p> 00086 // Data buckets form the main part of the SSM. The data can be viewed as 00087 // a few streams of buckets, where each stream contains the data of 00088 // a given number of columns. Each stream has an 00089 // <linkto class=SSMIndex>SSMIndex</linkto> object describing the 00090 // number of rows stored in each data bucket of the stream. 00091 // The SSM starts with a single bucket stream (holding all columns), 00092 // but when columns are added, new bucket streams might be created. 00093 // <p> 00094 // For example, we have an SSM with a bucket size of 100 bytes. 00095 // There are 5 Int columns (A,B,C,D,E) each taking 4 bytes per row. 00096 // Column A, B, C, and D are stored in bucket stream 1, while column 00097 // E is stored in bucket stream 2. So in stream 1 each bucket can hold 00098 // 6 rows, while in stream 2 each bucket can hold 25 rows. 00099 // For a 100 row table it will result in 17+4 data buckets. 00100 // <p> 00101 // A few classes collaborate to make it work: 00102 // <ul> 00103 // <li> Each bucket stream has an <linkto class=SSMIndex>SSMIndex</linkto> 00104 // object to map row number to bucket number. 00105 // Note that in principle each bucket in a stream contains the same 00106 // number of rows. However, when a row is deleted it is removed 00107 // from its bucket shifting the remainder to the left. Data in the 00108 // next buckets is not shifted, so that bucket has now one row less. 00109 // <li> For each column SSMBase knows to which bucket stream it belongs 00110 // and at which offset the column starts in a bucket. 00111 // Note that column data in a bucket are adjacent, which is done 00112 // to make it easier to use the 00113 // <linkto class=ColumnCache>ColumnCache</linkto> object in SSMColumn 00114 // and to be able to efficiently store Bool values as bits. 00115 // <li> Each column has an <linkto class=SSMColumn>SSMColumn</linkto> 00116 // object knowing how many bits each data cell takes in a bucket. 00117 // The SSMColumn objects handle all access to data in the columns 00118 // (using SSMBase and SSMIndex). 00119 // </ul> 00120 // <p> 00121 // String buckets are used by class 00122 // <linkto class=SSMStringHandler>SSMStringHandler</linkto> to 00123 // store scalar strings and fixed and variable shaped arrays of strings. 00124 // The bucketnr, offset, and length of such string (arrays) are stored 00125 // in the data buckets. 00126 // <br> 00127 // Indirect arrays of other data types are also stored indirectly 00128 // and their offset is stored in the data buckets. Such arrays are 00129 // handled by class <linkto class=StIndArray>StIndArray</linkto> 00130 // which uses an extra file to store the arrays. 00131 // <p> 00132 // Index buckets are used by SSMBase to make the SSMIndex data persistent. 00133 // It uses alternately 2 sets of index buckets. In that way there is 00134 // always an index availanle in case the system crashes. 00135 // If possible 2 halfs of a single bucket are used alternately, otherwise 00136 // separate buckets are used. 00137 // </synopsis> 00138 00139 // <motivation> 00140 // The public interface of SSMBase is quite large, because the other 00141 // internal SSM classes need these functions. To have a class with a 00142 // minimal interface for the normal user, class <src>StandardStMan</src> 00143 // is derived from it. 00144 // <br>StandardStMan needs an isA- instead of hasA-relation to be 00145 // able to bind columns to it in class <linkto class=SetupNewTable> 00146 // SetupNewTable</linkto>. 00147 // </motivation> 00148 00149 // <todo asof="$DATE:$"> 00150 //# A List of bugs, limitations, extensions or planned refinements. 00151 // <li> Remove AipsIO argument from open and close. 00152 // <li> When only 1 bucket in use addcolumn can check if there's enough 00153 // room to fit the new column (so rearange the bucket) in the free 00154 // row space. 00155 // </todo> 00156 00157 00158 class SSMBase: public DataManager 00159 { 00160 public: 00161 // Create a Standard storage manager with default name SSM. 00162 explicit SSMBase (Int aBucketSize=0, 00163 uInt aCacheSize=1); 00164 00165 // Create a Standard storage manager with the given name. 00166 explicit SSMBase (const String& aDataManName, 00167 Int aBucketSize=0, 00168 uInt aCacheSize=1); 00169 00170 // Create a Standard storage manager with the given name. 00171 // The specifications are part of the record (as created by dataManagerSpec). 00172 SSMBase (const String& aDataManName, 00173 const Record& spec); 00174 00175 ~SSMBase(); 00176 00177 // Clone this object. 00178 // It does not clone SSMColumn objects possibly used. 00179 // The caller has to delete the newly created object. 00180 virtual DataManager* clone() const; 00181 00182 // Get the type name of the data manager (i.e. StandardStMan). 00183 virtual String dataManagerType() const; 00184 00185 // Get the name given to the storage manager (in the constructor). 00186 virtual String dataManagerName() const; 00187 00188 // Record a record containing data manager specifications. 00189 virtual Record dataManagerSpec() const; 00190 00191 // Get data manager properties that can be modified. 00192 // It is only ActualCacheSize (the actual cache size in buckets). 00193 // It is a subset of the data manager specification. 00194 virtual Record getProperties() const; 00195 00196 // Modify data manager properties. 00197 // Only ActualCacheSize can be used. It is similar to function setCacheSize 00198 // with <src>canExceedNrBuckets=False</src>. 00199 virtual void setProperties (const Record& spec); 00200 00201 // Get the version of the class. 00202 uInt getVersion() const; 00203 00204 // Set the cache size (in buckets). 00205 // If <src>canExceedNrBuckets=True</src>, the given cache size can be 00206 // larger than the nr of buckets in the file. In this way the cache can 00207 // be made large enough for a future file extension. 00208 // Otherwise, it is limited to the actual number of buckets. This is useful 00209 // if one wants the entire file to be cached. 00210 void setCacheSize (uInt aCacheSize, Bool canExceedNrBuckets=True); 00211 00212 // Get the current cache size (in buckets). 00213 uInt getCacheSize() const; 00214 00215 // Clear the cache used by this storage manager. 00216 // It will flush the cache as needed and remove all buckets from it. 00217 void clearCache(); 00218 00219 // Show the statistics of all caches used. 00220 void showCacheStatistics (ostream& anOs) const; 00221 00222 // Show Statistics of all indices used. 00223 void showIndexStatistics (ostream & anOs) const; 00224 00225 // Show Statistics of the Base offsets/index etc. 00226 void showBaseStatistics (ostream & anOs) const; 00227 00228 // Get the bucket size. 00229 uInt getBucketSize() const; 00230 00231 // Get the number of rows in this storage manager. 00232 uInt getNRow() const; 00233 00234 // The storage manager can add rows. 00235 virtual Bool canAddRow() const; 00236 00237 // The storage manager can delete rows. 00238 virtual Bool canRemoveRow() const; 00239 00240 // The storage manager can add columns. 00241 virtual Bool canAddColumn() const; 00242 00243 // The storage manager can delete columns. 00244 virtual Bool canRemoveColumn() const; 00245 00246 // Make the object from the type name string. 00247 // This function gets registered in the DataManager "constructor" map. 00248 // The caller has to delete the object. 00249 static DataManager* makeObject (const String& aDataManType, 00250 const Record& spec); 00251 00252 // Get access to the given column. 00253 SSMColumn& getColumn (uInt aColNr); 00254 00255 // Get access to the given Index. 00256 SSMIndex& getIndex (uInt anIdxNr); 00257 00258 // Make the current bucket in the cache dirty (i.e. something has been 00259 // changed in it and it needs to be written when removed from the cache). 00260 // (used by SSMColumn::putValue). 00261 void setBucketDirty(); 00262 00263 // Open (if needed) the file for indirect arrays with the given mode. 00264 // Return a pointer to the object. 00265 StManArrayFile* openArrayFile (ByteIO::OpenOption anOpt); 00266 00267 // Find the bucket containing the column and row and return the pointer 00268 // to the beginning of the column data in that bucket. 00269 // It also fills in the start and end row for the column data. 00270 char* find (uInt aRowNr, uInt aColNr, 00271 uInt& aStartRow, uInt& anEndRow); 00272 00273 // Add a new bucket and get its bucket number. 00274 uInt getNewBucket(); 00275 00276 // Read the bucket (if needed) and return the pointer to it. 00277 char* getBucket (uInt aBucketNr); 00278 00279 // Remove a bucket from the bucket cache. 00280 void removeBucket (uInt aBucketNr); 00281 00282 // Get rows per bucket for the given column. 00283 uInt getRowsPerBucket (uInt aColumn) const; 00284 00285 // Return a pointer to the (one and only) StringHandler object. 00286 SSMStringHandler* getStringHandler(); 00287 00288 // <group> 00289 // Callbacks for BucketCache access. 00290 static char* readCallBack (void* anOwner, const char* aBucketStorage); 00291 static void writeCallBack (void* anOwner, char* aBucketStorage, 00292 const char* aBucket); 00293 static void deleteCallBack (void*, char* aBucket); 00294 static char* initCallBack (void* anOwner); 00295 // </group> 00296 00297 private: 00298 // Copy constructor (only meant for clone function). 00299 SSMBase (const SSMBase& that); 00300 00301 // Assignment cannot be used. 00302 SSMBase& operator= (const SSMBase& that); 00303 00304 // (Re)create the index, file, and cache object. 00305 // It is used when all rows are deleted from the table. 00306 void recreate(); 00307 00308 // Flush and optionally fsync the data. 00309 // It returns a True status if it had to flush (i.e. if data have changed). 00310 virtual Bool flush (AipsIO&, Bool doFsync); 00311 00312 // Let the storage manager create files as needed for a new table. 00313 // This allows a column with an indirect array to create its file. 00314 virtual void create (uInt aNrRows); 00315 00316 // Open the storage manager file for an existing table, read in 00317 // the data, and let the SSMColumn objects read their data. 00318 virtual void open (uInt aRowNr, AipsIO&); 00319 00320 // Resync the storage manager with the new file contents. 00321 // This is done by clearing the cache. 00322 virtual void resync (uInt aRowNr); 00323 00324 // Reopen the storage manager files for read/write. 00325 virtual void reopenRW(); 00326 00327 // The data manager will be deleted (because all its columns are 00328 // requested to be deleted). 00329 // So clean up the things needed (e.g. delete files). 00330 virtual void deleteManager(); 00331 00332 // Let the storage manager initialize itself (upon creation). 00333 // It determines the bucket size and fills the index. 00334 void init(); 00335 00336 // Determine and set the bucket size. 00337 // It returns the number of rows per bucket. 00338 uInt setBucketSize(); 00339 00340 // Get the number of indices in use. 00341 uInt getNrIndices() const; 00342 00343 // Add rows to the storage manager. 00344 // Per column it extends number of rows. 00345 virtual void addRow (uInt aNrRows); 00346 00347 // Delete a row from all columns. 00348 virtual void removeRow (uInt aRowNr); 00349 00350 // Do the final addition of a column. 00351 virtual void addColumn (DataManagerColumn*); 00352 00353 // Remove a column from the data file. 00354 virtual void removeColumn (DataManagerColumn*); 00355 00356 // Create a column in the storage manager on behalf of a table column. 00357 // The caller has to delete the newly created object. 00358 // <group> 00359 // Create a scalar column. 00360 virtual DataManagerColumn* makeScalarColumn (const String& aName, 00361 int aDataType, 00362 const String& aDataTypeID); 00363 // Create a direct array column. 00364 virtual DataManagerColumn* makeDirArrColumn (const String& aName, 00365 int aDataType, 00366 const String& aDataTypeID); 00367 // Create an indirect array column. 00368 virtual DataManagerColumn* makeIndArrColumn (const String& aName, 00369 int aDataType, 00370 const String& aDataTypeID); 00371 // </group> 00372 00373 // Get the cache object. 00374 // This will construct the cache object if not present yet. 00375 // The cache object will be deleted by the destructor. 00376 BucketCache& getCache(); 00377 00378 // Construct the cache object (if not constructed yet). 00379 void makeCache(); 00380 00381 // Read the header. 00382 void readHeader(); 00383 00384 // Read the index from its buckets. 00385 void readIndexBuckets(); 00386 00387 // Write the header and the indices. 00388 void writeIndex(); 00389 00390 00391 //# Declare member variables. 00392 // Name of data manager. 00393 String itsDataManName; 00394 00395 // The file containing the indirect arrays. 00396 StManArrayFile* itsIosFile; 00397 00398 // The number of rows in the columns. 00399 uInt itsNrRows; 00400 00401 // Column offset 00402 Block<uInt> itsColumnOffset; 00403 00404 // Row Index ID containing all the columns in a bucket 00405 Block<uInt> itsColIndexMap; 00406 00407 // Will contain all indices 00408 PtrBlock<SSMIndex*> itsPtrIndex; 00409 00410 // The cache with the SSM buckets. 00411 BucketCache* itsCache; 00412 00413 // The file containing all data. 00414 BucketFile* itsFile; 00415 00416 // String handler class 00417 SSMStringHandler* itsStringHandler; 00418 00419 // The persistent cache size. 00420 uInt itsPersCacheSize; 00421 00422 // The actual cache size. 00423 uInt itsCacheSize; 00424 00425 // The initial number of buckets in the cache. 00426 uInt itsNrBuckets; 00427 00428 // Nr of buckets needed for index. 00429 uInt itsNrIdxBuckets; 00430 00431 // Number of the first index bucket 00432 Int itsFirstIdxBucket; 00433 00434 // Offset of index in first bucket. 00435 // If >0, the index fits in a single bucket. 00436 uInt itsIdxBucketOffset; 00437 00438 // Number of the first String Bucket 00439 Int itsLastStringBucket; 00440 00441 // length of index memoryblock 00442 uInt itsIndexLength; 00443 00444 // The nr of free buckets. 00445 uInt itsFreeBucketsNr; 00446 00447 // The first free bucket. 00448 Int itsFirstFreeBucket; 00449 00450 // The bucket size. 00451 uInt itsBucketSize; 00452 uInt itsBucketRows; 00453 00454 // The assembly of all columns. 00455 PtrBlock<SSMColumn*> itsPtrColumn; 00456 00457 // Has the data changed since the last flush? 00458 Bool isDataChanged; 00459 }; 00460 00461 00462 inline uInt SSMBase::getNrIndices() const 00463 { 00464 return itsPtrIndex.nelements(); 00465 } 00466 00467 inline uInt SSMBase::getCacheSize() const 00468 { 00469 return itsCacheSize; 00470 } 00471 00472 inline uInt SSMBase::getNRow() const 00473 { 00474 return itsNrRows; 00475 } 00476 00477 inline uInt SSMBase::getBucketSize() const 00478 { 00479 return itsBucketSize; 00480 } 00481 00482 inline BucketCache& SSMBase::getCache() 00483 { 00484 if (itsCache == 0) { 00485 makeCache(); 00486 } 00487 return *itsCache; 00488 } 00489 00490 inline SSMColumn& SSMBase::getColumn (uInt aColNr) 00491 { 00492 return *(itsPtrColumn[aColNr]); 00493 } 00494 00495 inline SSMIndex& SSMBase::getIndex (uInt anIdxNr) 00496 { 00497 return *(itsPtrIndex[anIdxNr]); 00498 } 00499 00500 inline SSMStringHandler* SSMBase::getStringHandler() 00501 { 00502 return itsStringHandler; 00503 } 00504 00505 00506 00507 } //# NAMESPACE CASA - END 00508 00509 #endif