casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SSMBase.h
Go to the documentation of this file.
00001 //# SSMBase.h: Base class of the Standard Storage Manager
00002 //# Copyright (C) 2000,2001,2002
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: SSMBase.h 21014 2011-01-06 08:57:49Z gervandiepen $
00027 
00028 #ifndef TABLES_SSMBASE_H
00029 #define TABLES_SSMBASE_H
00030 
00031 
00032 //# Includes
00033 #include <casa/aips.h>
00034 #include <tables/Tables/DataManager.h>
00035 #include <casa/Containers/Block.h>
00036 
00037 namespace casa { //# NAMESPACE CASA - BEGIN
00038 
00039 //# Forward declarations
00040 class BucketCache;
00041 class BucketFile;
00042 class StManArrayFile;
00043 class SSMIndex;
00044 class SSMColumn;
00045 class SSMStringHandler;
00046 
00047 // <summary>
00048 // Base class of the Standard Storage Manager
00049 // </summary>
00050 
00051 // <use visibility=local>
00052 
00053 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tStandardStMan.cc">
00054 // </reviewed>
00055 
00056 // <prerequisite>
00057 //# Classes you should understand before using this one.
00058 //   <li> <linkto class=StandardStMan>StandardStMan</linkto>
00059 //   <li> <linkto class=SSMColumn>SSMColumn</linkto>
00060 // </prerequisite>
00061 
00062 // <etymology>
00063 // SSMBase is the base class of the Standard Storage Manager.
00064 // </etymology>
00065 
00066 // <synopsis>
00067 // The global principles of this class are described in
00068 // <linkto class="StandardStMan:description">StandardStMan</linkto>.
00069 // <p>
00070 // The Standard Storage Manager divides the data file in equally sized
00071 // chunks called buckets. There are 3 types of buckets:
00072 // <ul>
00073 //  <li> Data buckets containing the fixed length data (scalars and
00074 //       direct arrays of data type Int, Float, Bool, etc.).
00075 //       For variable shaped data (strings and indirect arrays) they
00076 //       contain references to the actual data position in the
00077 //       string buckets or in an external file.
00078 //  <li> String buckets containing strings and array of strings.
00079 //  <li> Index buckets containing the index info for the data buckets.
00080 // </ul>
00081 // Bucket access is handled by class
00082 // <linkto class=BucketCache>BucketCache</linkto>.
00083 // It also keeps a list of free buckets. A bucket is freed when it is
00084 // not needed anymore (e.g. all data from it are deleted).
00085 // <p>
00086 // Data buckets form the main part of the SSM. The data can be viewed as
00087 // a few streams of buckets, where each stream contains the data of
00088 // a given number of columns. Each stream has an
00089 // <linkto class=SSMIndex>SSMIndex</linkto> object describing the
00090 // number of rows stored in each data bucket of the stream.
00091 // The SSM starts with a single bucket stream (holding all columns),
00092 // but when columns are added, new bucket streams might be created.
00093 // <p>
00094 // For example, we have an SSM with a bucket size of 100 bytes.
00095 // There are 5 Int columns (A,B,C,D,E) each taking 4 bytes per row.
00096 // Column A, B, C, and D are stored in bucket stream 1, while column
00097 // E is stored in bucket stream 2. So in stream 1 each bucket can hold
00098 // 6 rows, while in stream 2 each bucket can hold 25 rows.
00099 // For a 100 row table it will result in 17+4 data buckets.
00100 // <p>
00101 // A few classes collaborate to make it work:
00102 // <ul>
00103 //  <li> Each bucket stream has an <linkto class=SSMIndex>SSMIndex</linkto>
00104 //       object to map row number to bucket number.
00105 //       Note that in principle each bucket in a stream contains the same
00106 //       number of rows. However, when a row is deleted it is removed
00107 //       from its bucket shifting the remainder to the left. Data in the
00108 //       next buckets is not shifted, so that bucket has now one row less.
00109 //  <li> For each column SSMBase knows to which bucket stream it belongs
00110 //       and at which offset the column starts in a bucket.
00111 //       Note that column data in a bucket are adjacent, which is done
00112 //       to make it easier to use the
00113 //       <linkto class=ColumnCache>ColumnCache</linkto> object in SSMColumn
00114 //       and to be able to efficiently store Bool values as bits.
00115 //  <li> Each column has an <linkto class=SSMColumn>SSMColumn</linkto>
00116 //       object knowing how many bits each data cell takes in a bucket.
00117 //       The SSMColumn objects handle all access to data in the columns
00118 //       (using SSMBase and SSMIndex).
00119 // </ul>
00120 // <p>
00121 // String buckets are used by class
00122 // <linkto class=SSMStringHandler>SSMStringHandler</linkto> to
00123 // store scalar strings and fixed and variable shaped arrays of strings.
00124 // The bucketnr, offset, and length of such string (arrays) are stored
00125 // in the data buckets.
00126 // <br>
00127 // Indirect arrays of other data types are also stored indirectly
00128 // and their offset is stored in the data buckets. Such arrays are
00129 // handled by class <linkto class=StIndArray>StIndArray</linkto>
00130 // which uses an extra file to store the arrays.
00131 // <p>
00132 // Index buckets are used by SSMBase to make the SSMIndex data persistent.
00133 // It uses alternately 2 sets of index buckets. In that way there is
00134 // always an index availanle in case the system crashes.
00135 // If possible 2 halfs of a single bucket are used alternately, otherwise 
00136 // separate buckets are used.
00137 // </synopsis>
00138 
00139 // <motivation>
00140 // The public interface of SSMBase is quite large, because the other
00141 // internal SSM classes need these functions. To have a class with a
00142 // minimal interface for the normal user, class <src>StandardStMan</src>
00143 // is derived from it.
00144 // <br>StandardStMan needs an isA- instead of hasA-relation to be
00145 // able to bind columns to it in class <linkto class=SetupNewTable>
00146 // SetupNewTable</linkto>.
00147 // </motivation>
00148 
00149 // <todo asof="$DATE:$">
00150 //# A List of bugs, limitations, extensions or planned refinements.
00151 //   <li> Remove AipsIO argument from open and close.
00152 //   <li> When only 1 bucket in use addcolumn can check if there's enough
00153 //        room to fit the new column (so rearange the bucket) in the free
00154 //        row space.
00155 // </todo>
00156 
00157 
00158 class SSMBase: public DataManager
00159 {
00160 public:
00161   // Create a Standard storage manager with default name SSM.
00162   explicit SSMBase (Int aBucketSize=0,
00163                     uInt aCacheSize=1);
00164   
00165   // Create a Standard storage manager with the given name.
00166   explicit SSMBase (const String& aDataManName,
00167                     Int aBucketSize=0,
00168                     uInt aCacheSize=1);
00169   
00170   // Create a Standard storage manager with the given name.
00171   // The specifications are part of the record (as created by dataManagerSpec).
00172   SSMBase (const String& aDataManName,
00173            const Record& spec);
00174   
00175   ~SSMBase();
00176   
00177   // Clone this object.
00178   // It does not clone SSMColumn objects possibly used.
00179   // The caller has to delete the newly created object.
00180   virtual DataManager* clone() const;
00181   
00182   // Get the type name of the data manager (i.e. StandardStMan).
00183   virtual String dataManagerType() const;
00184   
00185   // Get the name given to the storage manager (in the constructor).
00186   virtual String dataManagerName() const;
00187   
00188   // Record a record containing data manager specifications.
00189   virtual Record dataManagerSpec() const;
00190 
00191   // Get data manager properties that can be modified.
00192   // It is only ActualCacheSize (the actual cache size in buckets).
00193   // It is a subset of the data manager specification.
00194   virtual Record getProperties() const;
00195 
00196   // Modify data manager properties.
00197   // Only ActualCacheSize can be used. It is similar to function setCacheSize
00198   // with <src>canExceedNrBuckets=False</src>.
00199   virtual void setProperties (const Record& spec);
00200 
00201   // Get the version of the class.
00202   uInt getVersion() const;
00203   
00204   // Set the cache size (in buckets).
00205   // If <src>canExceedNrBuckets=True</src>, the given cache size can be
00206   // larger than the nr of buckets in the file. In this way the cache can
00207   // be made large enough for a future file extension.
00208   // Otherwise, it is limited to the actual number of buckets. This is useful
00209   // if one wants the entire file to be cached.
00210   void setCacheSize (uInt aCacheSize, Bool canExceedNrBuckets=True);
00211 
00212   // Get the current cache size (in buckets).
00213   uInt getCacheSize() const;
00214   
00215   // Clear the cache used by this storage manager.
00216   // It will flush the cache as needed and remove all buckets from it.
00217   void clearCache();
00218 
00219   // Show the statistics of all caches used.
00220   void showCacheStatistics (ostream& anOs) const;
00221 
00222   // Show Statistics of all indices used.
00223   void showIndexStatistics (ostream & anOs) const;
00224 
00225   // Show Statistics of the Base offsets/index etc.
00226   void showBaseStatistics (ostream & anOs) const;
00227 
00228   // Get the bucket size.
00229   uInt getBucketSize() const;
00230   
00231   // Get the number of rows in this storage manager.
00232   uInt getNRow() const;
00233   
00234   // The storage manager can add rows.
00235   virtual Bool canAddRow() const;
00236   
00237   // The storage manager can delete rows.
00238   virtual Bool canRemoveRow() const;
00239   
00240   // The storage manager can add columns.
00241   virtual Bool canAddColumn() const;
00242   
00243   // The storage manager can delete columns.
00244   virtual Bool canRemoveColumn() const;
00245   
00246   // Make the object from the type name string.
00247   // This function gets registered in the DataManager "constructor" map.
00248   // The caller has to delete the object.
00249   static DataManager* makeObject (const String& aDataManType,
00250                                   const Record& spec);
00251   
00252   // Get access to the given column.
00253   SSMColumn& getColumn (uInt aColNr);
00254   
00255   // Get access to the given Index.
00256   SSMIndex& getIndex (uInt anIdxNr);
00257   
00258   // Make the current bucket in the cache dirty (i.e. something has been
00259   // changed in it and it needs to be written when removed from the cache).
00260   // (used by SSMColumn::putValue).
00261   void setBucketDirty();
00262   
00263   // Open (if needed) the file for indirect arrays with the given mode.
00264   // Return a pointer to the object.
00265   StManArrayFile* openArrayFile (ByteIO::OpenOption anOpt);
00266 
00267   // Find the bucket containing the column and row and return the pointer
00268   // to the beginning of the column data in that bucket.
00269   // It also fills in the start and end row for the column data.
00270   char* find (uInt aRowNr,     uInt aColNr, 
00271               uInt& aStartRow, uInt& anEndRow);
00272 
00273   // Add a new bucket and get its bucket number.
00274   uInt getNewBucket();
00275 
00276   // Read the bucket (if needed) and return the pointer to it.
00277   char* getBucket (uInt aBucketNr);
00278 
00279   // Remove a bucket from the bucket cache.
00280   void removeBucket (uInt aBucketNr);
00281 
00282   // Get rows per bucket for the given column.
00283   uInt getRowsPerBucket (uInt aColumn) const;
00284 
00285   // Return a pointer to the (one and only) StringHandler object.
00286   SSMStringHandler* getStringHandler();
00287 
00288   // <group>
00289   // Callbacks for BucketCache access.
00290   static char* readCallBack (void* anOwner, const char* aBucketStorage);
00291   static void writeCallBack (void* anOwner, char* aBucketStorage,
00292                              const char* aBucket);
00293   static void deleteCallBack (void*, char* aBucket);
00294   static char* initCallBack (void* anOwner);
00295   // </group>
00296 
00297 private:
00298   // Copy constructor (only meant for clone function).
00299   SSMBase (const SSMBase& that);
00300   
00301   // Assignment cannot be used.
00302   SSMBase& operator= (const SSMBase& that);
00303   
00304   // (Re)create the index, file, and cache object.
00305   // It is used when all rows are deleted from the table.
00306   void recreate();
00307   
00308   // Flush and optionally fsync the data.
00309   // It returns a True status if it had to flush (i.e. if data have changed).
00310   virtual Bool flush (AipsIO&, Bool doFsync);
00311   
00312   // Let the storage manager create files as needed for a new table.
00313   // This allows a column with an indirect array to create its file.
00314   virtual void create (uInt aNrRows);
00315   
00316   // Open the storage manager file for an existing table, read in
00317   // the data, and let the SSMColumn objects read their data.
00318   virtual void open (uInt aRowNr, AipsIO&);
00319   
00320   // Resync the storage manager with the new file contents.
00321   // This is done by clearing the cache.
00322   virtual void resync (uInt aRowNr);
00323   
00324   // Reopen the storage manager files for read/write.
00325   virtual void reopenRW();
00326   
00327   // The data manager will be deleted (because all its columns are
00328   // requested to be deleted).
00329   // So clean up the things needed (e.g. delete files).
00330   virtual void deleteManager();
00331 
00332   // Let the storage manager initialize itself (upon creation).
00333   // It determines the bucket size and fills the index.
00334   void init();
00335 
00336   // Determine and set the bucket size.
00337   // It returns the number of rows per bucket.
00338   uInt setBucketSize();
00339   
00340   // Get the number of indices in use.
00341   uInt getNrIndices() const;
00342   
00343   // Add rows to the storage manager.
00344   // Per column it extends number of rows.
00345   virtual void addRow (uInt aNrRows);
00346   
00347   // Delete a row from all columns.
00348   virtual void removeRow (uInt aRowNr);
00349   
00350   // Do the final addition of a column.
00351   virtual void addColumn (DataManagerColumn*);
00352   
00353   // Remove a column from the data file.
00354   virtual void removeColumn (DataManagerColumn*);
00355   
00356   // Create a column in the storage manager on behalf of a table column.
00357   // The caller has to delete the newly created object.
00358   // <group>
00359   // Create a scalar column.
00360   virtual DataManagerColumn* makeScalarColumn (const String& aName,
00361                                                int aDataType,
00362                                                const String& aDataTypeID);
00363   // Create a direct array column.
00364   virtual DataManagerColumn* makeDirArrColumn (const String& aName,
00365                                                int aDataType,
00366                                                const String& aDataTypeID);
00367   // Create an indirect array column.
00368   virtual DataManagerColumn* makeIndArrColumn (const String& aName,
00369                                                int aDataType,
00370                                                const String& aDataTypeID);
00371   // </group>
00372   
00373   // Get the cache object.
00374   // This will construct the cache object if not present yet.
00375   // The cache object will be deleted by the destructor.
00376   BucketCache& getCache();
00377   
00378   // Construct the cache object (if not constructed yet).
00379   void makeCache();
00380   
00381   // Read the header.
00382   void readHeader();
00383   
00384   // Read the index from its buckets.
00385   void readIndexBuckets();
00386 
00387   // Write the header and the indices.
00388   void writeIndex();
00389 
00390 
00391   //# Declare member variables.
00392   // Name of data manager.
00393   String       itsDataManName;
00394   
00395   // The file containing the indirect arrays.
00396   StManArrayFile* itsIosFile;
00397   
00398   // The number of rows in the columns.
00399   uInt         itsNrRows;
00400   
00401   // Column offset
00402   Block<uInt> itsColumnOffset;
00403 
00404   // Row Index ID containing all the columns in a bucket
00405   Block<uInt> itsColIndexMap;
00406 
00407   // Will contain all indices
00408   PtrBlock<SSMIndex*>  itsPtrIndex;
00409   
00410   // The cache with the SSM buckets.
00411   BucketCache* itsCache;
00412   
00413   // The file containing all data.
00414   BucketFile*  itsFile;
00415   
00416   // String handler class
00417   SSMStringHandler* itsStringHandler;
00418 
00419   // The persistent cache size.
00420   uInt itsPersCacheSize;
00421   
00422   // The actual cache size.
00423   uInt itsCacheSize;
00424   
00425   // The initial number of buckets in the cache.
00426   uInt itsNrBuckets;
00427 
00428   // Nr of buckets needed for index.
00429   uInt itsNrIdxBuckets;
00430 
00431   // Number of the first index bucket
00432   Int itsFirstIdxBucket;
00433 
00434   // Offset of index in first bucket.
00435   // If >0, the index fits in a single bucket.
00436   uInt itsIdxBucketOffset;
00437 
00438   // Number of the first String Bucket
00439   Int itsLastStringBucket;
00440 
00441   // length of index memoryblock
00442   uInt itsIndexLength;
00443 
00444   // The nr of free buckets.
00445   uInt itsFreeBucketsNr;
00446   
00447   // The first free bucket.
00448   Int itsFirstFreeBucket;
00449   
00450   // The bucket size.
00451   uInt itsBucketSize;
00452   uInt itsBucketRows;
00453   
00454   // The assembly of all columns.
00455   PtrBlock<SSMColumn*> itsPtrColumn;
00456   
00457   // Has the data changed since the last flush?
00458   Bool isDataChanged;
00459 };
00460 
00461 
00462 inline uInt SSMBase::getNrIndices() const
00463 {
00464   return itsPtrIndex.nelements();
00465 }
00466 
00467 inline uInt SSMBase::getCacheSize() const
00468 {
00469   return itsCacheSize;
00470 }
00471 
00472 inline uInt SSMBase::getNRow() const
00473 {
00474   return itsNrRows;
00475 }
00476 
00477 inline uInt SSMBase::getBucketSize() const
00478 {
00479   return itsBucketSize;
00480 }
00481 
00482 inline BucketCache& SSMBase::getCache()
00483 {
00484   if (itsCache == 0) {
00485     makeCache();
00486   }
00487   return *itsCache;
00488 }
00489 
00490 inline SSMColumn& SSMBase::getColumn (uInt aColNr)
00491 {
00492   return *(itsPtrColumn[aColNr]);
00493 }
00494 
00495 inline SSMIndex& SSMBase::getIndex (uInt anIdxNr)
00496 {
00497   return *(itsPtrIndex[anIdxNr]);
00498 }
00499 
00500 inline SSMStringHandler* SSMBase::getStringHandler()
00501 {
00502   return itsStringHandler;
00503 }
00504 
00505 
00506 
00507 } //# NAMESPACE CASA - END
00508 
00509 #endif