casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
IncrementalStMan.h
Go to the documentation of this file.
00001 //# IncrementalStMan.h: The Incremental Storage Manager
00002 //# Copyright (C) 1996,1997,1999
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: IncrementalStMan.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
00027 
00028 #ifndef TABLES_INCREMENTALSTMAN_H
00029 #define TABLES_INCREMENTALSTMAN_H
00030 
00031 //# Includes
00032 #include <casa/aips.h>
00033 #include <tables/Tables/ISMBase.h>
00034 
00035 
00036 namespace casa { //# NAMESPACE CASA - BEGIN
00037 
00038 // <summary>
00039 // The Incremental Storage Manager
00040 // </summary>
00041 
00042 // <use visibility=export>
00043 
00044 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tIncrementalStMan.cc">
00045 // </reviewed>
00046 
00047 // <prerequisite>
00048 //# Classes you should understand before using this one.
00049 //   <li> The Table Data Managers concept as described in module file
00050 //        <linkto module="Tables:Data Managers">Tables.h</linkto>
00051 //   <li> <linkto class=ROIncrementalStManAccessor>
00052 //        ROIncrementalStManAccessor</linkto>
00053 //        for a discussion of the cache size
00054 // </prerequisite>
00055 
00056 // <etymology>
00057 // IncrementalStMan is the data manager storing values in an incremental way
00058 // (similar to an incremental backup). A value is only stored when it
00059 // differs from the previous value.
00060 // </etymology>
00061 
00062 // <synopsis>
00063 // IncrementalStMan stores the data in a way that a value is only stored
00064 // when it is different from the value in the previous row. This storage
00065 // manager is very well suited for columns with slowly changing values,
00066 // because the resulting file can be much smaller. It is not suited at
00067 // all for columns with continuously changing data.
00068 // <p>
00069 // In general it can be advantageous to use this storage manager when
00070 // a value changes at most every 4 rows (although it depends on the length
00071 // of the data values themselves). The following simple example 
00072 // shows the approximate savings that can be achieved when storing a column
00073 // with double values changing every CH rows.
00074 // <srcblock>
00075 //   #rows    CH     normal length      ISM length      compress ratio
00076 //   50000     5        4000000          1606000               2.5
00077 //   50000    50        4000000           164000              24.5
00078 //   50000   500        4000000            32800             122
00079 // </srcblock>
00080 // There is a special test program <src>nISMBucket</src> in the Tables module
00081 // doing a simple, but usually adequate, simulation of the amount of
00082 // storage needed for a scenario.
00083 // <p>
00084 // IncrementalStMan stores the values (and associated indices) in
00085 // fixed-length buckets. A <linkto class=BucketCache>BucketCache</linkto>
00086 // object is used to read/write
00087 // the buckets. The default cache size is 1 bucket (which is fine for
00088 // sequential access), but for random access it can make sense to
00089 // increase the size of the cache. This can be done using
00090 // the class <linkto class=ROIncrementalStManAccessor>
00091 // ROIncrementalStManAccessor</linkto>.
00092 // <p>
00093 // The IncrementalStMan can hold values of any standard data type (thus
00094 // from Bool to String). It can handle scalars, direct and indirect
00095 // arrays. It can support an arbitrary number of columns. The values in
00096 // each of them can vary at its own speed.
00097 // <br>
00098 // A bucket contains the values of several consecutive rows.
00099 // At the beginning of a bucket the values of the starting row of all
00100 // columns for this storage manager are repeated. In this way the value
00101 // of a cell can always be found in the bucket and no references
00102 // to previous buckets are needed.
00103 // <br>A bucket should be big enough to hold all starting values and
00104 // a reasonable number of other values. As a rule of thumb it should be
00105 // big enough to hold at least 100 values of each column. In general the
00106 // default bucket size will do. Only in special cases (e.g. when storing
00107 // large variable length strings) the bucket size should be set explicitly.
00108 // Giving a zero bucket size means that a suitale default bucket size
00109 // will be calculated.
00110 // <br>
00111 // When a table is filled sequentially each bucket can be filled as
00112 // much as possible. When writing in a random way, buckets can contain
00113 // some unused space, because a bucket in the middle of the file
00114 // has to be split when a new value has to be put in it.
00115 // <p>
00116 // Each column in the IncrementalStMan has the following properties to
00117 // achieve the "store-different-values-only" behaviour.
00118 // <ul>
00119 // <li> When a row is not explicitly put, it has the same value as the
00120 //      previous row.
00121 //      The first row gets the standard undefined values when not put.
00122 //      The order of put's and addRow's is not important.
00123 //      <br>E.g. when a table has N rows and row N and the following M rows
00124 //      have the same value, the following schematic code has the same effect:
00125 //      <br><src>  add 1 row; put value in row N; add M rows;</src>
00126 //      <br><src>  add M+1 rows; put value in row N;</src>
00127 // <li> When putting a scalar or direct array, it is tested if it matches
00128 //      the previous row. If so, it is not stored again.
00129 //      This test is not done for indirect arrays, because those can
00130 //      be (very) big and it would be too time-consuming. So the only
00131 //      way to save space for indirect arrays is by not putting them
00132 //      as explained in the previous item.
00133 // <li> For indirect arrays the buckets contain a pointer only. The
00134 //      arrays themselves are stored in a separate file.
00135 // <li> When a value of an existing row is updated, only that one row is
00136 //      updated. The next row(s) keep their value, even if it was
00137 //      shared with the row being updated. 
00138 //      <br>For scalars and direct arrays it will be tested if the
00139 //      new value matches the value in the previous and/or next row.
00140 //      If so, those rows will be combined to save storage.  
00141 // <li> The IncrementalStMan is optimized for sequential access to a table.
00142 //      <br>- A bucket is accessed only once, because a bucket contains
00143 //            consecutive rows.
00144 //      <br>- For each column a copy is kept of the last value read.
00145 //            So the value for the next rows (with that same value)
00146 //            is immediately available.
00147 //      <br>For random access the performance can be improved by setting
00148 //          the cache size using class
00149 //          <linkto class=ROIncrementalStManAccessor>
00150 //          ROIncrementalStManAccessor</linkto>.
00151 // </ul>
00152 //
00153 // <note>This class contains many public functions which are only used
00154 // by other ISM classes. The only useful function for the user is the
00155 // constructor.
00156 // </note>
00157 
00158 // <motivation>
00159 // IncrementalStMan can save a lot of storage space.
00160 // Unlike the old StManMirAIO it stores the values directly in the
00161 // file to save on memory usage.
00162 // </motivation>
00163 
00164 // <example>
00165 // This example shows how to create a table and how to attach
00166 // the storage manager to some columns.
00167 // <srcblock>
00168 //   SetupNewTable newtab("name.data", tableDesc, Table::New);
00169 //   IncrementalStMan stman;                  // define storage manager
00170 //   newtab.bindColumn ("column1", stman);    // bind column to st.man.
00171 //   newtab.bindColumn ("column2", stman);    // bind column to st.man.
00172 //   Table tab(newtab);                       // actually create table
00173 // </srcblock>
00174 // </example>
00175 
00176 //# <todo asof="$DATE:$">
00177 //# A List of bugs, limitations, extensions or planned refinements.
00178 //# </todo>
00179 
00180 
00181 class IncrementalStMan : public ISMBase
00182 {
00183 public:
00184     // Create an incremental storage manager with the given name.
00185     // If no name is used, it is set to an empty string.
00186     // The name can be used to construct a
00187     // <linkto class=ROIncrementalStManAccessor>ROIncrementalStManAccessor
00188     // </linkto> object (e.g. to set the cache size).
00189     // <br>
00190     // The bucket size has to be given in bytes and the cache size in buckets.
00191     // Bucket size 0 means that the storage manager will set the bucket
00192     // size such that it can contain about 100 rows
00193     // (with a minimum size of 32768 bytes). However, if that results
00194     // in a very large bucket size (>327680) it'll make it smaller.
00195     // Note it uses 32 bytes for the size of variable length strings,
00196     // so this heuristic may fail when a column contains large strings.
00197     // When <src>checkBucketSize</src> is set and Bucket size > 0
00198     // the storage manager throws an exception
00199     // when the size is too small to hold the values of at least 2 rows.
00200     // For this check it uses 0 for the length of variable length strings.
00201     // <group>
00202     explicit IncrementalStMan (uInt bucketSize = 0,
00203                                Bool checkBucketSize = True,
00204                                uInt cacheSize = 1);
00205     explicit IncrementalStMan (const String& dataManagerName,
00206                                uInt bucketSize = 0,
00207                                Bool checkBucketSize = True,
00208                                uInt cacheSize = 1);
00209     // </group>
00210 
00211     ~IncrementalStMan();
00212 
00213 private:
00214     // Copy constructor cannot be used.
00215     IncrementalStMan (const IncrementalStMan& that);
00216 
00217     // Assignment cannot be used.
00218     IncrementalStMan& operator= (const IncrementalStMan& that);
00219 };
00220 
00221 
00222 
00223 } //# NAMESPACE CASA - END
00224 
00225 #endif