casa
$Rev:20696$
|
00001 //# IncrementalStMan.h: The Incremental Storage Manager 00002 //# Copyright (C) 1996,1997,1999 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: IncrementalStMan.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $ 00027 00028 #ifndef TABLES_INCREMENTALSTMAN_H 00029 #define TABLES_INCREMENTALSTMAN_H 00030 00031 //# Includes 00032 #include <casa/aips.h> 00033 #include <tables/Tables/ISMBase.h> 00034 00035 00036 namespace casa { //# NAMESPACE CASA - BEGIN 00037 00038 // <summary> 00039 // The Incremental Storage Manager 00040 // </summary> 00041 00042 // <use visibility=export> 00043 00044 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tIncrementalStMan.cc"> 00045 // </reviewed> 00046 00047 // <prerequisite> 00048 //# Classes you should understand before using this one. 00049 // <li> The Table Data Managers concept as described in module file 00050 // <linkto module="Tables:Data Managers">Tables.h</linkto> 00051 // <li> <linkto class=ROIncrementalStManAccessor> 00052 // ROIncrementalStManAccessor</linkto> 00053 // for a discussion of the cache size 00054 // </prerequisite> 00055 00056 // <etymology> 00057 // IncrementalStMan is the data manager storing values in an incremental way 00058 // (similar to an incremental backup). A value is only stored when it 00059 // differs from the previous value. 00060 // </etymology> 00061 00062 // <synopsis> 00063 // IncrementalStMan stores the data in a way that a value is only stored 00064 // when it is different from the value in the previous row. This storage 00065 // manager is very well suited for columns with slowly changing values, 00066 // because the resulting file can be much smaller. It is not suited at 00067 // all for columns with continuously changing data. 00068 // <p> 00069 // In general it can be advantageous to use this storage manager when 00070 // a value changes at most every 4 rows (although it depends on the length 00071 // of the data values themselves). The following simple example 00072 // shows the approximate savings that can be achieved when storing a column 00073 // with double values changing every CH rows. 00074 // <srcblock> 00075 // #rows CH normal length ISM length compress ratio 00076 // 50000 5 4000000 1606000 2.5 00077 // 50000 50 4000000 164000 24.5 00078 // 50000 500 4000000 32800 122 00079 // </srcblock> 00080 // There is a special test program <src>nISMBucket</src> in the Tables module 00081 // doing a simple, but usually adequate, simulation of the amount of 00082 // storage needed for a scenario. 00083 // <p> 00084 // IncrementalStMan stores the values (and associated indices) in 00085 // fixed-length buckets. A <linkto class=BucketCache>BucketCache</linkto> 00086 // object is used to read/write 00087 // the buckets. The default cache size is 1 bucket (which is fine for 00088 // sequential access), but for random access it can make sense to 00089 // increase the size of the cache. This can be done using 00090 // the class <linkto class=ROIncrementalStManAccessor> 00091 // ROIncrementalStManAccessor</linkto>. 00092 // <p> 00093 // The IncrementalStMan can hold values of any standard data type (thus 00094 // from Bool to String). It can handle scalars, direct and indirect 00095 // arrays. It can support an arbitrary number of columns. The values in 00096 // each of them can vary at its own speed. 00097 // <br> 00098 // A bucket contains the values of several consecutive rows. 00099 // At the beginning of a bucket the values of the starting row of all 00100 // columns for this storage manager are repeated. In this way the value 00101 // of a cell can always be found in the bucket and no references 00102 // to previous buckets are needed. 00103 // <br>A bucket should be big enough to hold all starting values and 00104 // a reasonable number of other values. As a rule of thumb it should be 00105 // big enough to hold at least 100 values of each column. In general the 00106 // default bucket size will do. Only in special cases (e.g. when storing 00107 // large variable length strings) the bucket size should be set explicitly. 00108 // Giving a zero bucket size means that a suitale default bucket size 00109 // will be calculated. 00110 // <br> 00111 // When a table is filled sequentially each bucket can be filled as 00112 // much as possible. When writing in a random way, buckets can contain 00113 // some unused space, because a bucket in the middle of the file 00114 // has to be split when a new value has to be put in it. 00115 // <p> 00116 // Each column in the IncrementalStMan has the following properties to 00117 // achieve the "store-different-values-only" behaviour. 00118 // <ul> 00119 // <li> When a row is not explicitly put, it has the same value as the 00120 // previous row. 00121 // The first row gets the standard undefined values when not put. 00122 // The order of put's and addRow's is not important. 00123 // <br>E.g. when a table has N rows and row N and the following M rows 00124 // have the same value, the following schematic code has the same effect: 00125 // <br><src> add 1 row; put value in row N; add M rows;</src> 00126 // <br><src> add M+1 rows; put value in row N;</src> 00127 // <li> When putting a scalar or direct array, it is tested if it matches 00128 // the previous row. If so, it is not stored again. 00129 // This test is not done for indirect arrays, because those can 00130 // be (very) big and it would be too time-consuming. So the only 00131 // way to save space for indirect arrays is by not putting them 00132 // as explained in the previous item. 00133 // <li> For indirect arrays the buckets contain a pointer only. The 00134 // arrays themselves are stored in a separate file. 00135 // <li> When a value of an existing row is updated, only that one row is 00136 // updated. The next row(s) keep their value, even if it was 00137 // shared with the row being updated. 00138 // <br>For scalars and direct arrays it will be tested if the 00139 // new value matches the value in the previous and/or next row. 00140 // If so, those rows will be combined to save storage. 00141 // <li> The IncrementalStMan is optimized for sequential access to a table. 00142 // <br>- A bucket is accessed only once, because a bucket contains 00143 // consecutive rows. 00144 // <br>- For each column a copy is kept of the last value read. 00145 // So the value for the next rows (with that same value) 00146 // is immediately available. 00147 // <br>For random access the performance can be improved by setting 00148 // the cache size using class 00149 // <linkto class=ROIncrementalStManAccessor> 00150 // ROIncrementalStManAccessor</linkto>. 00151 // </ul> 00152 // 00153 // <note>This class contains many public functions which are only used 00154 // by other ISM classes. The only useful function for the user is the 00155 // constructor. 00156 // </note> 00157 00158 // <motivation> 00159 // IncrementalStMan can save a lot of storage space. 00160 // Unlike the old StManMirAIO it stores the values directly in the 00161 // file to save on memory usage. 00162 // </motivation> 00163 00164 // <example> 00165 // This example shows how to create a table and how to attach 00166 // the storage manager to some columns. 00167 // <srcblock> 00168 // SetupNewTable newtab("name.data", tableDesc, Table::New); 00169 // IncrementalStMan stman; // define storage manager 00170 // newtab.bindColumn ("column1", stman); // bind column to st.man. 00171 // newtab.bindColumn ("column2", stman); // bind column to st.man. 00172 // Table tab(newtab); // actually create table 00173 // </srcblock> 00174 // </example> 00175 00176 //# <todo asof="$DATE:$"> 00177 //# A List of bugs, limitations, extensions or planned refinements. 00178 //# </todo> 00179 00180 00181 class IncrementalStMan : public ISMBase 00182 { 00183 public: 00184 // Create an incremental storage manager with the given name. 00185 // If no name is used, it is set to an empty string. 00186 // The name can be used to construct a 00187 // <linkto class=ROIncrementalStManAccessor>ROIncrementalStManAccessor 00188 // </linkto> object (e.g. to set the cache size). 00189 // <br> 00190 // The bucket size has to be given in bytes and the cache size in buckets. 00191 // Bucket size 0 means that the storage manager will set the bucket 00192 // size such that it can contain about 100 rows 00193 // (with a minimum size of 32768 bytes). However, if that results 00194 // in a very large bucket size (>327680) it'll make it smaller. 00195 // Note it uses 32 bytes for the size of variable length strings, 00196 // so this heuristic may fail when a column contains large strings. 00197 // When <src>checkBucketSize</src> is set and Bucket size > 0 00198 // the storage manager throws an exception 00199 // when the size is too small to hold the values of at least 2 rows. 00200 // For this check it uses 0 for the length of variable length strings. 00201 // <group> 00202 explicit IncrementalStMan (uInt bucketSize = 0, 00203 Bool checkBucketSize = True, 00204 uInt cacheSize = 1); 00205 explicit IncrementalStMan (const String& dataManagerName, 00206 uInt bucketSize = 0, 00207 Bool checkBucketSize = True, 00208 uInt cacheSize = 1); 00209 // </group> 00210 00211 ~IncrementalStMan(); 00212 00213 private: 00214 // Copy constructor cannot be used. 00215 IncrementalStMan (const IncrementalStMan& that); 00216 00217 // Assignment cannot be used. 00218 IncrementalStMan& operator= (const IncrementalStMan& that); 00219 }; 00220 00221 00222 00223 } //# NAMESPACE CASA - END 00224 00225 #endif