casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
TiledStMan.h
Go to the documentation of this file.
00001 //# TiledStMan.h: Base class for Tiled Storage Managers
00002 //# Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: TiledStMan.h 21014 2011-01-06 08:57:49Z gervandiepen $
00027 
00028 #ifndef TABLES_TILEDSTMAN_H
00029 #define TABLES_TILEDSTMAN_H
00030 
00031 
00032 //# Includes
00033 #include <casa/aips.h>
00034 #include <tables/Tables/DataManager.h>
00035 #include <casa/Containers/Block.h>
00036 #include <casa/Arrays/IPosition.h>
00037 #include <casa/OS/Conversion.h>
00038 #include <casa/BasicSL/String.h>
00039 
00040 namespace casa { //# NAMESPACE CASA - BEGIN
00041 
00042 //# Forward Declarations
00043 class TSMColumn;
00044 class TSMDataColumn;
00045 class TSMCube;
00046 class TSMFile;
00047 class TableDesc;
00048 class Record;
00049 template<class T> class Vector;
00050 
00051 
00052 // <summary>
00053 // Base class for Tiled Storage Manager classes
00054 // </summary>
00055 
00056 // <use visibility=export>
00057 
00058 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="">
00059 // </reviewed>
00060 
00061 // <prerequisite>
00062 //# Classes you should understand before using this one.
00063 //   <li> Description of Tiled Storage Manager in module file
00064 //        <linkto module=Tables:TiledStMan>Tables.h</linkto>
00065 //   <li> <linkto class=DataManager>DataManager</linkto>
00066 //   <li> <linkto class=TSMColumn>TSMColumn</linkto>
00067 //   <li> <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
00068 //        for a discussion of the maximum cache size
00069 // </prerequisite>
00070 
00071 // <synopsis> 
00072 // TiledStMan is the base class for Tiled Storage Managers.
00073 // A tiled storage manager is capable of storing a hypercolumn
00074 // (as defined by <linkto file="TableDesc.h#defineHypercolumn">
00075 // TableDesc::defineHypercolumn</linkto>)
00076 // in one or more hypercubes.
00077 // <br>It is not necessary to define a hypercolumn. If not defined,
00078 // it is assumed that all columns bound to this storage manager are
00079 // data columns. At least one of the columns must have a fixed
00080 // dimensionality and is used to determine the hypercube dimnensionality.
00081 // <br>The general concept of these storage managers is explained in the
00082 // <linkto module="Tables:TiledStMan">Tables module description</linkto>.
00083 // <p>
00084 // TiledStMan contains all common functions for the different tiled
00085 // storage managers. In particular, it contains functions
00086 // to check if the definitions of the shapes of hypercubes, coordinates, and
00087 // data cells are consistent.
00088 // It also contains various data members and functions to make them
00089 // persistent by writing them into an AipsIO stream.
00090 // </synopsis> 
00091 
00092 // <motivation>
00093 // This base class contains the common functionality of all
00094 // tiled storage managers. The base class is still abstract.
00095 // Only concrete tiled storage managers derived from it can
00096 // be instantiated.
00097 // <p>
00098 // Tiled storage managers make access to array data possible with
00099 // more or less the same efficiency for access along different axes.
00100 // </motivation>
00101 
00102 //# <todo asof="$DATE:$">
00103 //# A List of bugs, limitations, extensions or planned refinements.
00104 //# </todo>
00105 
00106 
00107 class TiledStMan : public DataManager
00108 {
00109 public:
00110     // Create a TiledStMan.
00111     TiledStMan();
00112 
00113     // Create a TiledStMan storage manager.
00114     // The given maximum cache size is persistent,
00115     // thus will be reused when the table is read back. Note that the class
00116     // <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
00117     // allows one to overwrite the maximum cache size temporarily.
00118     // Its description contains a discussion about the effects of
00119     // setting a maximum cache.
00120     TiledStMan (const String& hypercolumnName, uInt maximumCacheSize);
00121 
00122     virtual ~TiledStMan();
00123 
00124     // Get the name given to the storage manager.
00125     // This is the name of the hypercolumn.
00126     virtual String dataManagerName() const;
00127 
00128     void setDataManagerName (const String& newHypercolumnName);
00129 
00130     // Return a record containing data manager specifications.
00131     virtual Record dataManagerSpec() const;
00132 
00133     // Get data manager properties that can be modified.
00134     // It is only ActualCacheSize (the actual cache size in buckets).
00135     // It is a subset of the data manager specification.
00136     virtual Record getProperties() const;
00137 
00138     // Modify data manager properties.
00139     // Only ActualCacheSize can be used. It is similar to function setCacheSize
00140     // with <src>canExceedNrBuckets=False</src>.
00141     virtual void setProperties (const Record& spec);
00142 
00143     // Set the flag to "data has changed since last flush".
00144     void setDataChanged();
00145 
00146     // Derive the tile shape from the hypercube shape for the given
00147     // number of pixels per tile. It is tried to get the same number
00148     // of tiles for each dimension.
00149     // When a weight vector is given, the number of tiles for a dimension
00150     // is proportional to the weight.
00151     // <br>After the initial guess it tries to optimize it by trying
00152     // to waste as little space as possible, while trying to keep as close
00153     // to the initial guess. The given tolerance (possibly per axis)
00154     // gives the minimum and maximum possible length of a tile axis
00155     // (minimum = initial_guess*tolerance; maximum = initial_guess/tolerance).
00156     // The heuristic is such that a tile axis length dividing the cube length
00157     // exactly is always favoured.
00158     // The test program <src>tTiledStMan</src> can be used to see how
00159     // the algorithm works out for a given tile size and cube shape.
00160     // <group>
00161     static IPosition makeTileShape (const IPosition& hypercubeShape,
00162                                     Double tolerance = 0.5,
00163                                     uInt maxNrPixelsPerTile = 32768);
00164     static IPosition makeTileShape (const IPosition& hypercubeShape,
00165                                     const Vector<double>& weight,
00166                                     const Vector<double>& tolerance,
00167                                     uInt maxNrPixelsPerTile = 32768);
00168     // </group>
00169 
00170     // Set the maximum cache size (in bytes) in a non-persistent way.
00171     virtual void setMaximumCacheSize (uInt nbytes);
00172 
00173     // Get the current maximum cache size (in bytes).
00174     uInt maximumCacheSize() const;
00175 
00176     // Get the current cache size (in buckets) for the hypercube in
00177     // the given row.
00178     uInt cacheSize (uInt rownr) const;
00179 
00180     // Get the hypercube shape of the data in the given row.
00181     const IPosition& hypercubeShape (uInt rownr) const;
00182 
00183     // Get the tile shape of the data in the given row.
00184     const IPosition& tileShape (uInt rownr) const;
00185 
00186     // Get the bucket size (in bytes) of the hypercube in the given row.
00187     uInt bucketSize (uInt rownr) const;
00188 
00189     // Can the tiled storage manager handle changing array shapes?
00190     // The default is no (but TiledCellStMan can).
00191     virtual Bool canChangeShape() const;
00192 
00193     // Can the tiled storage manager access an entire column.
00194     // TiledColumnStMan can always do that.
00195     // The others might be able to do it (for this time).
00196     // The default implementation returns True if there is only 1 hypercube.
00197     // reask is set to True (because next time things might be different).
00198     virtual Bool canAccessColumn (Bool& reask) const;
00199 
00200     // Calculate the cache size (in buckets) for accessing the hypercube
00201     // containing the given row. It takes the maximum cache size into
00202     // account (allowing an overdraft of 10%).
00203     // It uses the given axisPath (i.e. traversal order) to determine
00204     // the optimum size. A window can be specified to indicate that only
00205     // the given subset of the hypercube will be accessed.
00206     // <br>
00207     // The length of the slice and window arguments and <src>axisPath</src>
00208     // must be less or equal to the dimensionality of the hypercube.
00209     // The non-specified <src>windowStart</src> parts default to 0.
00210     // The non-specified <src>windowLength</src> parts default to
00211     // the hypercube shape.
00212     // The non-specified <src>sliceShape</src> parts default to 1.
00213     // <br>
00214     // Axispath = [2,0,1] indicates that the z-axis changes most rapidly,
00215     // thereafter x and y. An axis can occur only once in the axisPath.
00216     // The non-specified <src>axisPath</src> parts get the natural order.
00217     // E.g. in the previous example axisPath=[2] defines the same path.
00218     // <br>When forceSmaller is False, the cache is not resized when the
00219     // new size is smaller.
00220     // <br>A flag is set indicating that the TSMDataColumn
00221     // access functions do not need to size the cache.
00222     uInt calcCacheSize (uInt rownr, const IPosition& sliceShape,
00223                         const IPosition& windowStart,
00224                         const IPosition& windowLength,
00225                         const IPosition& axisPath) const;
00226 
00227     // Set the cache size using the <src>calcCacheSize</src>
00228     // function mentioned above.
00229     void setCacheSize (uInt rownr, const IPosition& sliceShape,
00230                        const IPosition& windowStart,
00231                        const IPosition& windowLength,
00232                        const IPosition& axisPath,
00233                        Bool forceSmaller);
00234 
00235     // Set the cache size for accessing the hypercube containing the given row.
00236     // When the give cache size exceeds the maximum cache size with more
00237     // than 10%, the maximum cache size is used instead.
00238     // <br>When forceSmaller is False, the cache is not resized when the
00239     // new size is smaller.
00240     // <br>A flag is set indicating that the TSMDataColumn
00241     // access functions do not need to size the cache.
00242     void setCacheSize (uInt rownr, uInt nbuckets, Bool forceSmaller);
00243 
00244     // Determine if the user set the cache size (using setCacheSize).
00245     Bool userSetCache (uInt rownr) const;
00246 
00247     // Empty the caches used by the hypercubes in this storage manager.
00248     // It will flush the caches as needed and remove all buckets from them
00249     // resulting in a possibly large drop in memory used.
00250     // It also clears the userSetCache flag.
00251     void emptyCaches();
00252 
00253     // Show the statistics of all caches used.
00254     void showCacheStatistics (ostream& os) const;
00255 
00256     // Get the length of the data for the given number of pixels.
00257     // This can be used to calculate the length of a tile.
00258     uInt getLengthOffset (uInt nrPixels, Block<uInt>& dataOffset,
00259                           Block<uInt>& localOffset,
00260                           uInt& localTileLength) const;
00261 
00262     // Get the number of coordinate vectors.
00263     uInt nrCoordVector() const;
00264 
00265     // Get the nr of rows in this storage manager.
00266     uInt nrow() const;
00267 
00268     // Does the storage manager allow to add rows? (yes)
00269     Bool canAddRow() const;
00270 
00271     // Get the default tile shape.
00272     // By default it returns a zero-length IPosition.
00273     virtual IPosition defaultTileShape() const;
00274 
00275     // Return the number of hypercubes.
00276     uInt nhypercubes() const;
00277 
00278     // Test if only one hypercube is used by this storage manager.
00279     // If not, throw an exception. Otherwise return the hypercube.
00280     virtual TSMCube* singleHypercube();
00281 
00282     // Get the given hypercube.
00283     // <group>
00284     const TSMCube* getTSMCube (uInt hypercube) const;
00285     TSMCube* getTSMCube (uInt hypercube);
00286     // </group>
00287     
00288     // Get the hypercube in which the given row is stored.
00289     // <group>
00290     const TSMCube* getHypercube (uInt rownr) const;
00291     virtual TSMCube* getHypercube (uInt rownr) = 0;
00292     // </group>
00293 
00294     // Get the hypercube in which the given row is stored.
00295     // It also returns the position of the row in that hypercube.
00296     virtual TSMCube* getHypercube (uInt rownr, IPosition& position) = 0;
00297 
00298     // Make the correct TSMCube type (depending on tsmOption()).
00299     TSMCube* makeTSMCube (TSMFile* file, const IPosition& cubeShape,
00300                           const IPosition& tileShape,
00301                           const Record& values, Int64 fileOffset=-1);
00302 
00303     // Read a tile and convert the data to local format.
00304     void readTile (char* local, const Block<uInt>& localOffset,
00305                    const char* external, const Block<uInt>& externalOffset,
00306                    uInt nrpixels);
00307 
00308     // Write a tile after converting the data to external format.
00309     void writeTile (char* external, const Block<uInt>& externalOffset,
00310                     const char* local, const Block<uInt>& localOffset,
00311                     uInt nrpixels);
00312 
00313     // Get the TSMFile object with the given sequence number.
00314     TSMFile* getFile (uInt sequenceNumber);
00315 
00316     // Open the storage manager for an existing table.
00317     virtual void open (uInt nrrow, AipsIO&);
00318 
00319     // Resync the storage manager with the new file contents.
00320     virtual void resync (uInt nrrow);
00321 
00322     // Reopen all files used in this storage manager for read/write access.
00323     virtual void reopenRW();
00324 
00325     // The data manager will be deleted (because all its columns are
00326     // requested to be deleted).
00327     // So clean up the things needed (e.g. delete files).
00328     virtual void deleteManager();
00329 
00330     // Create a column in the storage manager on behalf of a table column.
00331     // <group>
00332     // Create a scalar column.
00333     DataManagerColumn* makeScalarColumn (const String& name, int dataType,
00334                                          const String& dataTypeID);
00335     // Create a direct array column.
00336     DataManagerColumn* makeDirArrColumn (const String& name, int dataType,
00337                                          const String& dataTypeID);
00338     // Create an indirect array column.
00339     DataManagerColumn* makeIndArrColumn (const String& name, int dataType,
00340                                          const String& dataTypeID);
00341     // </group>
00342 
00343     // The TiledStMan wants to do reallocateColumn.
00344     Bool canReallocateColumns() const;
00345 
00346     // Reallocate the column object if it is part of this data manager.
00347     // It returns a pointer to the new column object.
00348     // It is used to remove the indirection of the TSMColumn objects
00349     // resulting in only one iso. two virtual column calls to get the data.
00350     DataManagerColumn* reallocateColumn (DataManagerColumn* column);
00351 
00352     // Set the shape and tile shape of a hypercube.
00353     // By default it throws an "impossible" exception.
00354     virtual void setShape (uInt rownr, TSMCube* hypercube,
00355                            const IPosition& shape,
00356                            const IPosition& tileShape);
00357 
00358     // Check the shape to be set for a hypercube.
00359     // It checks if it matches predefined (fixed shape) columns
00360     // and the shape of already defined coordinate columns.
00361     void checkCubeShape (const TSMCube* hypercube,
00362                          const IPosition& cubeShape) const;
00363 
00364     // Get the data type of the coordinate column with the given name.
00365     // An exception is thrown when the column is unknown.
00366     int coordinateDataType (const String& columnName) const;
00367 
00368     // Initialize the new coordinates for the given cube.
00369     void initCoordinates (TSMCube* hypercube);
00370 
00371     // Get pointer to data column object.
00372     const TSMDataColumn* getDataColumn (uInt colnr) const
00373       { return dataCols_p[colnr]; }
00374 
00375 protected:
00376     // Set the persistent maximum cache size.
00377     void setPersMaxCacheSize (uInt nbytes);
00378 
00379     // Get the bindings of the columns with the given names.
00380     // If bound, the pointer to the TSMColumn object is stored in the block.
00381     // If mustExist is True, an exception is thrown if the column
00382     // is not bound.
00383     // It returns the number of bound columns.
00384     uInt getBindings (const Vector<String>& columnNames,
00385                       PtrBlock<TSMColumn*>& colSet,
00386                       Bool mustExist) const;
00387 
00388     // Function setup calls this function to allow the derived class
00389     // to check specific information. In case of errors, an exception
00390     // should be thrown.
00391     // By default it does nothing.
00392     virtual void setupCheck (const TableDesc& tableDesc,
00393                              const Vector<String>& dataNames) const;
00394 
00395     // Get the table description needed for the hypercolumn description.
00396     virtual const TableDesc& getDesc() const;
00397 
00398     // Check if values are given in the record for all columns in
00399     // the block. Also check if the data types are correct.
00400     // An exception is thrown if something is incorrect.
00401     void checkValues (const PtrBlock<TSMColumn*>& colSet,
00402                       const Record& values) const;
00403 
00404     // Check if the coordinate values are correct.
00405     // This calls checkValues and checks if their shapes match the
00406     // hypercube shape.
00407     // An exception is thrown if invalid.
00408     void checkCoordinates (const PtrBlock<TSMColumn*>& coordColSet,
00409                            const IPosition& cubeShape,
00410                            const Record& values) const;
00411 
00412     // Check if the shapes of FixedShape data and coordinate columns match.
00413     // An exception is thrown if not.
00414     void checkShapeColumn (const IPosition& shape) const;
00415 
00416     // Check if the cube shape matches that of defined coordinates.
00417     void checkCoordinatesShapes (const TSMCube* hypercube,
00418                                  const IPosition& cubeShape) const;
00419 
00420     // Check if the hypercube to be added is correctly defined.
00421     void checkAddHypercube (const IPosition& cubeShape,
00422                             const Record& values) const;
00423 
00424     // Make a new TSMCube object.
00425     TSMCube* makeHypercube (const IPosition& cubeShape,
00426                             const IPosition& tileShape,
00427                             const Record& values);
00428 
00429     // Get the index of the hypercube with the given id-values.
00430     // If not found, -1 is returned.
00431     Int getCubeIndex (const Record& idValues) const;
00432     
00433     // Determine how many rows need to be added for an extension
00434     // (in the last dimension) of a hypercube with the given shape.
00435     uInt addedNrrow (const IPosition& shape, uInt incrInLastDim) const;
00436 
00437     // Flush the caches of all hypercubes.
00438     // If data have put and fsync is set, fsync all files.
00439     Bool flushCaches (Bool fsync);
00440 
00441     // Let a derived class read the header info.
00442     // This is used by the open and resync function.
00443     virtual void readHeader (uInt nrrow, Bool firstTime) = 0;
00444 
00445     // Create the TSM header file.
00446     // It creates an AipsIO object for it.
00447     AipsIO* headerFileCreate();
00448 
00449     // Open the TSM header file.
00450     // It creates an AipsIO object for it.
00451     AipsIO* headerFileOpen();
00452 
00453     // Write the data into the header file.
00454     // The given number of TSMCube objects have to be written.
00455     void headerFilePut (AipsIO& headerFile, uInt nrCube);
00456 
00457     // Read the data from the header file.
00458     // When done for the first time, setup() is called to initialize
00459     // the various variables (using the extraNdim variable).
00460     void headerFileGet (AipsIO& headerFile, uInt tabNrrow, Bool firstTime,
00461                         Int extraNdim);
00462 
00463     // Close the header file.
00464     // It deletes the AipsIO object.
00465     void headerFileClose (AipsIO* headerFile);
00466 
00467     // Set up the TiledStMan variables from the table description.
00468     // The argument specifies the number of extra dimensions for the
00469     // hypercube compared to the data array (usually 0 or 1).
00470     // It is only used if no hypercolumn definition exists.
00471     // -1 means that the hypercolumn definition has to be present.
00472     void setup (Int extraNdim=-1);
00473 
00474     // Create a TSMFile object and store its pointer at the given index
00475     // in the block.
00476     void createFile (uInt index);
00477 
00478     // Convert the scalar data type to an array data type.
00479     // This function is temporary and can disappear when the ColumnDesc
00480     // classes use type TpArray*.
00481     int arrayDataType (int dataType) const;
00482 
00483 
00484     //# Declare all data members.
00485     // The name of the hypercolumn.
00486     String hypercolumnName_p;
00487     // The number of rows in the columns.
00488     uInt  nrrow_p;
00489     // The assembly of all columns.
00490     PtrBlock<TSMColumn*>  colSet_p;
00491     // The assembly of all data columns.
00492     PtrBlock<TSMDataColumn*> dataCols_p;
00493     PtrBlock<TSMColumn*>  dataColSet_p;
00494     // The assembly of all id columns.
00495     PtrBlock<TSMColumn*>  idColSet_p;
00496     // The assembly of all coordinate columns.
00497     PtrBlock<TSMColumn*>  coordColSet_p;
00498     // The assembly of all TSMFile objects.
00499     // The first file is for all non-extensible cubes, while the others
00500     // are for one file per extensible cube.
00501     PtrBlock<TSMFile*> fileSet_p;
00502     // The assembly of all TSMCube objects.
00503     PtrBlock<TSMCube*> cubeSet_p;
00504     // The persistent maximum cache size for a hypercube.
00505     uInt      persMaxCacheSize_p;
00506     // The actual maximum cache size for a hypercube.
00507     uInt      maxCacheSize_p;
00508     // The dimensionality of the hypercolumn.
00509     uInt      nrdim_p;
00510     // The number of vector coordinates.
00511     uInt      nrCoordVector_p;
00512     // The fixed cell shape.
00513     IPosition fixedCellShape_p;
00514     // Has any data changed since the last flush?
00515     Bool      dataChanged_p;
00516 
00517 private:
00518     // Forbid copy constructor.
00519     TiledStMan (const TiledStMan&);
00520 
00521     // Forbid assignment.
00522     TiledStMan& operator= (const TiledStMan&);
00523 };
00524 
00525 
00526 inline uInt TiledStMan::maximumCacheSize() const
00527     { return maxCacheSize_p; }
00528 
00529 inline uInt TiledStMan::nrCoordVector() const
00530     { return nrCoordVector_p; }
00531 
00532 inline uInt TiledStMan::nrow() const
00533     { return nrrow_p; }
00534 
00535 inline uInt TiledStMan::nhypercubes() const
00536     { return cubeSet_p.nelements(); }
00537 
00538 inline void TiledStMan::setDataChanged()
00539     { dataChanged_p = True; }
00540 
00541 inline const TSMCube* TiledStMan::getTSMCube (uInt hypercube) const
00542     { return const_cast<TiledStMan*>(this)->getTSMCube (hypercube); }
00543 
00544 inline const TSMCube* TiledStMan::getHypercube (uInt rownr) const
00545     { return const_cast<TiledStMan*>(this)->getHypercube (rownr); }
00546 
00547 inline void TiledStMan::setPersMaxCacheSize (uInt nbytes)
00548 {
00549     persMaxCacheSize_p = nbytes;
00550     maxCacheSize_p = nbytes;
00551 }
00552 
00553 
00554 
00555 
00556 } //# NAMESPACE CASA - END
00557 
00558 #endif