00001 //# Tables.h: The Tables module - AIPS++ data storage 00002 //# Copyright (C) 1994,1995,1996,1997,1998,1999,2000,2001,2002,2003 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id$ 00027 00028 #ifndef TABLES_TABLES_H 00029 #define TABLES_TABLES_H 00030 00031 //# Includes 00032 //# table description 00033 #include <tables/Tables/TableDesc.h> 00034 #include <tables/Tables/ColumnDesc.h> 00035 #include <tables/Tables/ScaColDesc.h> 00036 #include <tables/Tables/ArrColDesc.h> 00037 #include <tables/Tables/ScaRecordColDesc.h> 00038 00039 //# storage managers 00040 #include <tables/Tables/StManAipsIO.h> 00041 #include <tables/Tables/StandardStMan.h> 00042 #include <tables/Tables/StandardStManAccessor.h> 00043 #include <tables/Tables/IncrementalStMan.h> 00044 #include <tables/Tables/IncrStManAccessor.h> 00045 #include <tables/Tables/TiledDataStMan.h> 00046 #include <tables/Tables/TiledDataStManAccessor.h> 00047 #include <tables/Tables/TiledCellStMan.h> 00048 #include <tables/Tables/TiledColumnStMan.h> 00049 #include <tables/Tables/TiledShapeStMan.h> 00050 #include <tables/Tables/MemoryStMan.h> 00051 00052 //# virtual column engines 00053 #include <tables/Tables/RetypedArrayEngine.h> 00054 #include <tables/Tables/RetypedArraySetGet.h> 00055 #include <tables/Tables/ScaledArrayEngine.h> 00056 #include <tables/Tables/MappedArrayEngine.h> 00057 #include <tables/Tables/ForwardCol.h> 00058 #include <tables/Tables/ForwardColRow.h> 00059 #include <tables/Tables/CompressComplex.h> 00060 #include <tables/Tables/CompressFloat.h> 00061 #include <tables/Tables/VirtualTaQLColumn.h> 00062 00063 //# table access 00064 #include <tables/Tables/Table.h> 00065 #include <tables/Tables/TableLock.h> 00066 #include <tables/Tables/SetupNewTab.h> 00067 #include <tables/Tables/ScalarColumn.h> 00068 #include <tables/Tables/ArrayColumn.h> 00069 #include <tables/Tables/TableRow.h> 00070 #include <tables/Tables/TableCopy.h> 00071 #include <casa/Arrays/Array.h> 00072 #include <casa/Arrays/Slicer.h> 00073 #include <casa/Arrays/Slice.h> 00074 00075 //# keywords 00076 #include <tables/Tables/TableRecord.h> 00077 #include <casa/Containers/RecordField.h> 00078 00079 //# table lookup 00080 #include <tables/Tables/ColumnsIndex.h> 00081 #include <tables/Tables/ColumnsIndexArray.h> 00082 00083 //# table expressions (for selection of rows) 00084 #include <tables/Tables/ExprNode.h> 00085 #include <tables/Tables/ExprNodeSet.h> 00086 #include <tables/Tables/TableParse.h> 00087 00088 //# table vectors 00089 #include <tables/Tables/TableVector.h> 00090 #include <tables/Tables/TabVecMath.h> 00091 #include <tables/Tables/TabVecLogic.h> 00092 00093 00094 namespace casa { //# NAMESPACE CASA - BEGIN 00095 00096 // <module> 00097 00098 // <summary> 00099 // Tables are the data storage mechanism for AIPS++ 00100 // </summary> 00101 00102 // <use visibility=export> 00103 00104 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos=""> 00105 // </reviewed> 00106 00107 // <prerequisite> 00108 // <li> <linkto class="Record:description">Record</linkto> class 00109 // </prerequisite> 00110 00111 // <etymology> 00112 // "Table" is a formal term from relational database theory: 00113 // <cite> "The organizing principle in a relational database is the TABLE, 00114 // a rectangular, row/column arrangement of data values."</cite> 00115 // AIPS++ tables are extensions to traditional tables, but are similar 00116 // enough that we use the same name. There is also a strong resemblance 00117 // between the uses of AIPS++ tables, and FITS binary tables, which 00118 // provides another reason to use "Tables" to describe the AIPS++ data 00119 // storage mechanism. 00120 // </etymology> 00121 00122 // <synopsis> 00123 // Tables are the fundamental storage mechanism for AIPS++. This document 00124 // explains <A HREF="#Tables:motivation">why</A> they had to be made, 00125 // <A HREF="#Tables:properties">what</A> their properties are, and 00126 // <A HREF="#Tables:open">how</A> to use them. The last subject is 00127 // discussed and illustrated in a sequence of sections: 00128 // <UL> 00129 // <LI> <A HREF="#Tables:open">opening</A> an existing table, 00130 // <LI> <A HREF="#Tables:read">reading</A> from a table, 00131 // <LI> <A HREF="#Tables:creation">creating</A> a new table, 00132 // <LI> <A HREF="#Tables:write">writing</A> into a table, 00133 // <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table, 00134 // <LI> <A HREF="#Tables:select and sort">selection and sorting</A> 00135 // (see also <A HREF=../../notes/199/199.html>Table Query Language</A>), 00136 // <LI> <A HREF="#Tables:iterate">iterating</A> through a table, 00137 // <LI> <A HREF="#Tables:LockSync">locking/synchronization</A> 00138 // for concurrent access, 00139 // <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup, 00140 // <LI> <A HREF="#Tables:vectors">vector operations</A> on a column. 00141 // <LI> <A HREF="#Tables:performance">performance and robustness</A> considerations. 00142 // </UL> 00143 00144 00145 // <A NAME="Tables:motivation"> 00146 // <motivation></A> 00147 // 00148 // The AIPS++ tables are mainly based upon the ideas of Allen Farris, 00149 // as laid out in the 00150 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz"> 00151 // AIPS++ Database document</A>, from where the following paragraph is taken: 00152 // 00153 // <BLOCKQUOTE> 00154 // Traditional relational database tables have two features that 00155 // decisively limit their applicability to scientific data. First, an item of 00156 // data in a column of a table must be atomic -- it must have no internal 00157 // structure. A consequence of this restriction is that relational 00158 // databases are unable to deal with arrays of data items. Second, an 00159 // item of data in a column of a table must not have any direct or 00160 // implied linkages to other items of data or data aggregates. This 00161 // restriction makes it difficult to model complex relationships between 00162 // collections of data. While these restrictions may make it easy to 00163 // define a mathematically complete set of data manipulation operations, 00164 // they are simply intolerable in a scientific data-handling context. 00165 // Multi-dimensional arrays are frequently the most natural modes in 00166 // which to discuss and think about scientific data. In addition, 00167 // scientific data often requires complex calibration operations that 00168 // must draw on large bodies of data about equipment and its performance 00169 // in various states. The restrictions imposed by the relational model 00170 // make it very difficult to deal with complex problems of this nature. 00171 // </BLOCKQUOTE> 00172 // 00173 // In response to these limitations, and other needs, the AIPS++ tables were 00174 // designed. 00175 // </motivation> 00176 00177 // <A NAME="Tables:properties"> 00178 // <h3>Table Properties</h3></A> 00179 // 00180 // AIPS++ tables have the following properties: 00181 // <ul> 00182 // <li> A table consists of a number of rows and columns. 00183 // <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined 00184 // for the table as a whole and for individual columns. A keyword/value 00185 // pair for a column could, for instance, define its unit. 00186 // <li> Each table has a <A HREF="#Tables:Table Description">description</A> 00187 // which specifies the number and type of columns, and maybe initial 00188 // keyword sets and default values for the columns. 00189 // <li> A cell in a column may contain 00190 // <UL> 00191 // <LI> a scalar; 00192 // <LI> a "direct" array -- which must have the same shape in all 00193 // cells of a column, is usually small, and is stored in the 00194 // table itself; 00195 // <LI> an "indirect" array -- which may have different shapes in 00196 // different cells of the same column, is arbitrarily large, 00197 // and is stored in a separate file; or 00198 // </UL> 00199 // <li> A column may be 00200 // <UL> 00201 // <LI> "filled" -- containing actual data, or 00202 // <LI> "virtual" -- containing a recipe telling how the data will 00203 // be generated dynamically 00204 // </UL> 00205 // <li> Only the standard AIPS++ data types can be used in filled 00206 // columns, be they scalars or arrays: Bool, uChar, Short, uShort, 00207 // Int, uInt, float, double, Complex, DComplex and String. 00208 // Furthermore scalars containing 00209 // <linkto class=TableRecord>record</linkto> values are possible 00210 // <li> A column can have a default value, which will automatically be stored 00211 // in a cell of the column, when a row is added to the table. 00212 // <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the 00213 // reading, writing and generation of data. Each column in a table can 00214 // be assigned its own data manager, which allows for optimization of 00215 // the data storage per column. The choice of data manager determines 00216 // whether a column is filled or virtual. 00217 // <li> Table data are stored in a canonical format, so they can be read 00218 // on any machine. To avoid needless swapping of bytes, the data can 00219 // be stored in big endian (as used on e.g. SUN) or little endian 00220 // (as used on Intel PC-s) canonical format. 00221 // By default it uses the format specified in the aipsrc variable 00222 // <code>table.endianformat</code> which defaults to 00223 // <code>Table::LocalEndian</code> (thus the endian format of the 00224 // machine being used). 00225 // <li> The SQL-like 00226 // <a href="../../notes/199/199.html">Table Query Language</a> (TaQL) 00227 // can be used to do operations on tables like 00228 // select, sort, update, insert, delete, and create. 00229 // </ul> 00230 // 00231 // Tables can be in one of three forms: 00232 // <ul> 00233 // <li> A plain table is a table stored on disk. 00234 // It can be shared by multiple processes. 00235 // <li> A memory table is a table held in memory. 00236 // It is a process specific table, thus not sharable. 00237 // The <linkto class=Table>Table::copy</linkto> function can be used 00238 // to turn a memory table into a plain table. 00239 // <li> A reference table is a table referencing a plain or memory table. 00240 // It is the result of a selection or sort on another table. 00241 // A reference table references the data in the other table, thus 00242 // changing data in a reference table means that the data in the 00243 // original table are changed. 00244 // The <linkto class=Table>Table::deepCopy</linkto> function can be 00245 // used to turn a reference table into a plain table. 00246 // </ul> 00247 // Concurrent access from different processes to the same plain table is 00248 // fully supported by means of a <A HREF="#Tables:LockSync"> 00249 // locking/synchronization</A> mechanism. Concurrent access over NFS is also 00250 // supported. 00251 // <p> 00252 // A (somewhat primitive) mechanism is available to do a 00253 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents 00254 // of a key. In the future this might be replaced by a proper B+-tree index 00255 // mechanism. 00256 00257 // <A NAME="Tables:open"> 00258 // <h3>Opening an Existing Table</h3></A> 00259 // 00260 // To open an existing table you just create a 00261 // <linkto class="Table:description">Table</linkto> object giving 00262 // the name of the table, like: 00263 // 00264 // <srcblock> 00265 // Table readonly_table ("tableName"); 00266 // // or 00267 // Table read_and_write_table ("tableName", Table::Update); 00268 // </srcblock> 00269 // 00270 // The constructor option determines whether the table will be opened as 00271 // readonly or as read/write. A readonly table file must be opened 00272 // as readonly, otherwise an exception is thrown. The functions 00273 // <linkto class="Table">Table::isWritable(...)</linkto> 00274 // can be used to determine if a table is writable. 00275 // 00276 // When the table is opened, the data managers are reinstantiated 00277 // according to their definition at table creation. 00278 00279 // <A NAME="Tables:read"> 00280 // <h3>Reading from a Table</h3></A> 00281 // 00282 // You can read data from a table column with the "get" functions 00283 // in the classes 00284 // <linkto class="ROScalarColumn:description">ROScalarColumn<T></linkto> 00285 // and 00286 // <linkto class="ROArrayColumn:description">ROArrayColumn<T></linkto>. 00287 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short, 00288 // uShort, uInt, float, double, Complex, DComplex and String) you could 00289 // instead use 00290 // <linkto class="ROTableColumn">ROTableColumn::getScalar(...)</linkto> or 00291 // <linkto class="ROTableColumn">ROTableColumn::asXXX(...)</linkto>. 00292 // These functions offer an extra: they do automatic data type promotion; 00293 // so that you can, for example, get a double value from a float column. 00294 // 00295 // These "get" functions are used in the same way as the simple"put" 00296 // functions described in the previous section. 00297 // <p> 00298 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> 00299 // is derived from ROScalarColumn<T>, and 00300 // therefore has the same "get" functions. However, if a 00301 // ScalarColumn<T> object is constructed for a non-writable column, 00302 // an exception is thrown. Only ROScalarColumn<T> objects can be 00303 // constructed for nonwritable columns. 00304 // The same is true for 00305 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and 00306 // <linkto class="TableColumn:description">TableColumn</linkto>. 00307 // <p> 00308 // A typical program could look like: 00309 // <srcblock> 00310 // #include <tables/Tables/Table.h> 00311 // #include <tables/Tables/ScalarColumn.h> 00312 // #include <tables/Tables/ArrayColumn.h> 00313 // #include <casa/Arrays/Vector.h> 00314 // #include <casa/Arrays/Slicer.h> 00315 // #include <casa/Arrays/ArrayMath.h> 00316 // #include <iostream> 00317 // 00318 // main() 00319 // { 00320 // // Open the table (readonly). 00321 // Table tab ("some.name"); 00322 // 00323 // // Construct the various column objects. 00324 // // Their data type has to match the data type in the table description. 00325 // ROScalarColumn<Int> acCol (tab, "ac"); 00326 // ROArrayColumn<Float> arr2Col (tab, "arr2"); 00327 // 00328 // // Loop through all rows in the table. 00329 // uInt nrrow = tab.nrow(); 00330 // for (uInt i=0; i<nrow; i++) { 00331 // // Read the row for both columns. 00332 // cout << "Column ac in row i = " << acCol(i) << endl; 00333 // Array<Float> array = arr2Col.get (i); 00334 // } 00335 // 00336 // // Show the entire column ac, 00337 // // and show the 10th element of arr2 in each row.. 00338 // cout << ac.getColumn(); 00339 // cout << arr2.getColumn (Slicer(Slice(10))); 00340 // } 00341 // </srcblock> 00342 00343 // <A NAME="Tables:creation"> 00344 // <h3>Creating a Table</h3></A> 00345 // 00346 // The creation of a table is a multi-step process: 00347 // <ol> 00348 // <li> 00349 // Create a <A HREF="#Tables:Table Description">table description</A>. 00350 // <li> 00351 // Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto> 00352 // object with the name of the new table. 00353 // <li> 00354 // Create the necessary <A HREF="#Tables:Data Managers">data managers</A>. 00355 // <li> 00356 // Bind each column to the appropriate data manager. 00357 // The system will bind unbound columns to data managers which 00358 // are created internally using the default data manager name 00359 // defined in the column description. 00360 // <li> 00361 // Define the shape of direct columns (if that was not already done in the 00362 // column description). 00363 // <li> 00364 // Create the <linkto class="Table:description">Table</linkto> 00365 // object from the SetupNewTable object. Here, a final check is performed 00366 // and the necessary files are created. 00367 // </ol> 00368 // The recipe above is meant for the creation a plain table, but the 00369 // creation of a memory table is exactly the same. The only difference 00370 // is that in call to construct the Table object the Table::Memory 00371 // type has to be given. Note that in the SetupNewTable object the columns 00372 // can be bound to any data manager. <src>MemoryTable</src> will rebind 00373 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto> 00374 // storage manager, but virtual columns bindings are not changed. 00375 00376 // 00377 // The following example shows how you can create a table. An example 00378 // specifically illustrating the creation of the 00379 // <A HREF="#Tables:Table Description">table description</A> is given 00380 // in that section. Other sections discuss the access to the table. 00381 // 00382 // <srcblock> 00383 // #include <tables/Tables/TableDesc.h> 00384 // #include <tables/Tables/SetupNewTab.h> 00385 // #include <tables/Tables/Table.h> 00386 // #include <tables/Tables/ScaColDesc.h> 00387 // #include <tables/Tables/ScaRecordColDesc.h> 00388 // #include <tables/Tables/ArrColDesc.h> 00389 // #include <tables/Tables/StandardStMan.h> 00390 // #include <tables/Tables/IncrementalStMan.h> 00391 // 00392 // main() 00393 // { 00394 // // Step1 -- Build the table description. 00395 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00396 // td.comment() = "A test of class SetupNewTable"; 00397 // td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab")); 00398 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00399 // td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad")); 00400 // td.addColumn (ScalarColumnDesc<Float> ("ae")); 00401 // td.addColumn (ScalarRecordColumnDesc ("arec")); 00402 // td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct)); 00403 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00404 // td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct)); 00405 // 00406 // // Step 2 -- Setup a new table from the description. 00407 // SetupNewTable newtab("newtab.data", td, Table::New); 00408 // 00409 // // Step 3 -- Create storage managers for it. 00410 // StandardStMan stmanStand_1; 00411 // StandardStMan stmanStand_2; 00412 // IncrementalStMan stmanIncr; 00413 // 00414 // // Step 4 -- First, bind all columns to the first storage 00415 // // manager. Then, bind a few columns to another storage manager 00416 // // (which will overwrite the previous bindings). 00417 // newtab.bindAll (stmanStand_1); 00418 // newtab.bindColumn ("ab", stmanStand_2); 00419 // newtab.bindColumn ("ae", stmanIncr); 00420 // newtab.bindColumn ("arr3", stmanIncr); 00421 // 00422 // // Step 5 -- Define the shape of the direct columns. 00423 // // (this could have been done in the column description). 00424 // newtab.setShapeColumn( "arr1", IPosition(3,2,3,4)); 00425 // newtab.setShapeColumn( "arr3", IPosition(3,3,4,5)); 00426 // 00427 // // Step 6 -- Finally, create the table consisting of 10 rows. 00428 // Table tab(newtab, 10); 00429 // 00430 // // Now we can fill the table, which is shown in a next section. 00431 // // The Table destructor will flush the table to the files. 00432 // } 00433 // </srcblock> 00434 // To create a table in memory, only step 6 has to be modified slightly to: 00435 // <srcblock> 00436 // Table tab(newtab, Table::Memory, 10); 00437 // </srcblock> 00438 00439 // <A NAME="Tables:write"> 00440 // <h3>Writing into a Table</h3></A> 00441 // 00442 // Once a table has been created or has been opened for read/write, 00443 // you want to write data into it. Before doing that you may have 00444 // to add one or more rows to the table. 00445 // <note role=tip> When a table was created with a given number of rows, you 00446 // do not need to add rows; you may not even be able to do so. 00447 // </note> 00448 // 00449 // When adding new rows to the table, either via the 00450 // <linkto class="Table">Table(...) constructor</linkto> 00451 // or via the 00452 // <linkto class="Table">Table::addRow(...)</linkto> 00453 // function, you can choose to have those rows initialized with the 00454 // default values given in the description. 00455 // 00456 // To actually write the data into the table you need the classes 00457 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and 00458 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>. 00459 // For each column you can construct one or 00460 // more of these objects. Their put(...) functions 00461 // let you write a value at a time or the entire column in one go. 00462 // For arrays you can "put" subsections of the arrays. 00463 // 00464 // As an alternative for scalars of a standard data type (i.e. Bool, 00465 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex 00466 // and String) you could use the functions 00467 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>. 00468 // These functions offer an extra: automatic data type promotion; so that 00469 // you can, for example, put a float value in a double column. 00470 // 00471 // A typical program could look like: 00472 // <srcblock> 00473 // #include <tables/Tables/TableDesc.h> 00474 // #include <tables/Tables/SetupNewTab.h> 00475 // #include <tables/Tables/Table.h> 00476 // #include <tables/Tables/ScaColDesc.h> 00477 // #include <tables/Tables/ArrColDesc.h> 00478 // #include <tables/Tables/ScalarColumn.h> 00479 // #include <tables/Tables/ArrayColumn.h> 00480 // #include <casa/Arrays/Vector.h> 00481 // #include <casa/Arrays/Slicer.h> 00482 // #include <casa/Arrays/ArrayMath.h> 00483 // #include <iostream> 00484 // 00485 // main() 00486 // { 00487 // // First build the table description. 00488 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00489 // td.comment() = "A test of class SetupNewTable"; 00490 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00491 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00492 // 00493 // // Setup a new table from the description, 00494 // // and create the (still empty) table. 00495 // // Note that since we do not explicitly bind columns to 00496 // // data managers, all columns will be bound to the default 00497 // // standard storage manager StandardStMan. 00498 // SetupNewTable newtab("newtab.data", td, Table::New); 00499 // Table tab(newtab); 00500 // 00501 // // Construct the various column objects. 00502 // // Their data type has to match the data type in the description. 00503 // ScalarColumn<Int> ac (tab, "ac"); 00504 // ArrayColumn<Float> arr2 (tab, "arr2"); 00505 // Vector<Float> vec2(100); 00506 // 00507 // // Write the data into the columns. 00508 // // In each cell arr2 will be a vector of length 100. 00509 // // Since its shape is not set explicitly, it is done implicitly. 00510 // for (uInt i=0; i<10; i++) { 00511 // tab.addRow(); // First add a row. 00512 // ac.put (i, i+10); // value is i+10 in row i 00513 // indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119 00514 // arr2.put (i, vec2); 00515 // } 00516 // 00517 // // Finally, show the entire column ac, 00518 // // and show the 10th element of arr2. 00519 // cout << ac.getColumn(); 00520 // cout << arr2.getColumn (Slicer(Slice(10))); 00521 // 00522 // // The Table destructor writes the table. 00523 // } 00524 // </srcblock> 00525 // 00526 // In this example we added rows in the for loop, but we could also have 00527 // created 10 rows straightaway by constructing the Table object as: 00528 // <srcblock> 00529 // Table tab(newtab, 10); 00530 // </srcblock> 00531 // in which case we would not include 00532 // <srcblock> 00533 // tab.addRow() 00534 // </srcblock> 00535 // 00536 // The classes 00537 // <linkto class="TableColumn:description">TableColumn</linkto>, 00538 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and 00539 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> 00540 // contain several functions to put values into a single cell or into the 00541 // whole column. This may look confusing, but is actually quite simple. 00542 // The functions can be divided in two groups: 00543 // <ol> 00544 // <li> 00545 // Put the given value into the column cell(s). 00546 // <ul> 00547 // <li> 00548 // The simplest put functions, 00549 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and 00550 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00551 // put a value into the given column cell. For convenience, there is an 00552 // <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto> 00553 // to put only a part of the array. 00554 // <li> 00555 // <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and 00556 // <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto> 00557 // fill an entire column by putting the given value into all the cells 00558 // of the column. 00559 // <li> 00560 // The simplest putColumn functions, 00561 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and 00562 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>, 00563 // put an array of values into the column. There is a special 00564 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00565 // version which puts only a part of the arrays. 00566 // </ul> 00567 // 00568 // <li> 00569 // Copy values from another column to this column.<BR> 00570 // These functions have the advantage that the 00571 // data type of the input and/or output column can be unknown. 00572 // The generic (RO)TableColumn objects can be used for this purpose. 00573 // The put(Column) function checks the data types and, if possible, 00574 // converts them. If the conversion is not possible, it throws an 00575 // exception. 00576 // <ul> 00577 // <li> 00578 // The put functions copy the value in a cell of the input column 00579 // to a cell in the output column. The row numbers of the cells 00580 // in the columns can be different. 00581 // <li> 00582 // The putColumn functions copy the entire contents of the input column 00583 // to the output column. The lengths of the columns must be equal. 00584 // </ul> 00585 // Each class has its own set of these functions. 00586 // <ul> 00587 // <li> 00588 // <linkto class="TableColumn">TableColumn::put(...)</linkto> and 00589 // <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and 00590 // are the most generic. They can be 00591 // used when the data types of both input and output column are unknown. 00592 // Note that these functions are virtual. 00593 // <li> 00594 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>, 00595 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00596 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and 00597 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00598 // are less generic and therefore potentially more efficient. 00599 // The most efficient variants are the ones taking a 00600 // ROScalar/ArrayColumn<T>, because they require no data type 00601 // conversion. 00602 // </ul> 00603 // </ol> 00604 00605 // <A NAME="Tables:row-access"> 00606 // <h3>Accessing rows in a Table</h3></A> 00607 // 00608 // Apart from accessing a table column-wise as described in the 00609 // previous two sections, it is also possible to access a table row-wise. 00610 // The <linkto class=TableRow>TableRow</linkto> class makes it possible 00611 // to access multiple fields in a table row as a whole. Note that like the 00612 // XXColumn classes described above, there is also an ROTableRow class 00613 // for access to readonly tables. 00614 // <p> 00615 // On construction of a TableRow object it has to be specified which 00616 // fields (i.e. columns) are part of the row. For these fields a 00617 // fixed structured <linkto class=TableRecord>TableRecord</linkto> 00618 // object is constructed as part of the TableRow object. The TableRow::get 00619 // function will fill this record with the table data for the given row. 00620 // The user has access to the record and can use 00621 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for 00622 // speedier access to the record. 00623 // <p> 00624 // The class could be used as shown in the following example. 00625 // <srcblock> 00626 // // Open the table as readonly and define a row object to contain 00627 // // the given columns. 00628 // // Note that the function stringToVector is a very convenient 00629 // // way to construct a Vector<String>. 00630 // // Show the description of the fields in the row. 00631 // Table table("Some.table"); 00632 // ROTableRow row (table, stringToVector("col1,col2,col3")); 00633 // cout << row.record().description(); 00634 // // Since the structure of the record is known, the RecordFieldPtr 00635 // // objects could be used to allow for easy and fast access to 00636 // // the record which is refilled for each get. 00637 // RORecordFieldPtr<String> col1(row.record(), "col1"); 00638 // RORecordFieldPtr<Double> col2(row.record(), "col2"); 00639 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3"); 00640 // for (uInt i=0; i<table.nrow(); i++) { 00641 // row.get (i); 00642 // someString = *col1; 00643 // somedouble = *col2; 00644 // someArrayInt = *col3; 00645 // } 00646 // </srcblock> 00647 // The description of TableRow contains some more extensive examples. 00648 00649 // <A NAME="Tables:select and sort"> 00650 // <h3>Table Selection and Sorting</h3></A> 00651 // 00652 // The result of a select and sort of a table is another table, 00653 // which references the original table. This means that an update 00654 // of a sorted or selected table results in the update of the original 00655 // table. The result is, however, a table in itself, so all table 00656 // functions (including select and sort) can be used with it. 00657 // Note that a true copy of such a reference table can be made with 00658 // the <linkto class=Table>Table::deepCopy</linkto> function. 00659 // <p> 00660 // Rows or columns can be selected from a table. Columns can be selected 00661 // by the 00662 // <linkto class="Table">Table::project(...)</linkto> 00663 // function, while rows can be selected by the various 00664 // <linkto class="Table">Table operator()</linkto> functions. 00665 // Usually a row is selected by giving a select expression with 00666 // <linkto class="TableExprNode:description">TableExprNode</linkto> 00667 // objects. These objects represent the various nodes 00668 // in an expression, e.g. a constant, a column, or a subexpression. 00669 // The Table function 00670 // <linkto class="Table">Table::col(...)</linkto> 00671 // creates a TableExprNode object for a column. The function 00672 // <linkto class="Table">Table::key(...)</linkto> 00673 // does the same for a keyword by reading 00674 // the keyword value and storing it as a constant in an expression node. 00675 // All column nodes in an expression must belong to the same table, 00676 // otherwise an exception is thrown. 00677 // In the following example we select all rows with RA>10: 00678 // <srcblock> 00679 // #include <tables/Tables/ExprNode.h> 00680 // Table table ("Table.name"); 00681 // Table result = table (table.col("RA") > 10); 00682 // </srcblock> 00683 // while in the next one we select rows with RA and DEC in the given 00684 // intervals: 00685 // <srcblock> 00686 // Table result = table (table.col("RA") > 10 00687 // && table.col("RA") < 14 00688 // && table.col("DEC") >= -10 00689 // && table.col("DEC") <= 10); 00690 // </srcblock> 00691 // The following operators can be used to form arbitrarily 00692 // complex expressions: 00693 // <ul> 00694 // <li> Relational operators ==, !=, >, >=, < and <=. 00695 // <li> Logical operators &&, || and !. 00696 // <li> Arithmetic operators +, -, *, /, %, ^, and unary + and -. 00697 // <li> Operator() to take a subsection of an array. 00698 // </ul> 00699 // Many functions (like sin, max, conj) can be used in an expression. 00700 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows 00701 // the available functions. 00702 // E.g. 00703 // <srcblock> 00704 // Table result = table (sin (table.col("RA")) > 0.5); 00705 // </srcblock> 00706 // Function <src>in</src> can be used to select from a set of values. 00707 // A value set can be constructed using class 00708 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>. 00709 // <srcblock> 00710 // TableExprNodeSet set; 00711 // set.add (TableExprNodeSetElem ("abc")); 00712 // set.add (TableExprNodeSetElem ("defg")); 00713 // set.add (TableExprNodeSetElem ("h")); 00714 // Table result = table (table.col("NAME).in (set)); 00715 // </srcblock> 00716 // select rows with a NAME equal to <src>abc</src>, 00717 // <src>defg</src>, or <src>h</src>. 00718 // 00719 // <p> 00720 // You can sort a table on one or more columns containing scalars. 00721 // In this example we simply sort on column RA (default is ascending): 00722 // <srcblock> 00723 // Table table ("Table.name"); 00724 // Table result = table.sort ("RA"); 00725 // </srcblock> 00726 // Multiple 00727 // <linkto class="Table">Table::sort(...)</linkto> 00728 // functions exist which allow for more flexible control over the sort order. 00729 // In the next example we sort first on RA in descending order 00730 // and then on DEC in ascending order: 00731 // <srcblock> 00732 // Table table ("Table.name"); 00733 // Block<String> sortKeys(2); 00734 // Block<int> sortOrders(2); 00735 // sortKeys(0) = "RA"; 00736 // sortOrders(0) = Sort::Descending; 00737 // sortKeys(1) = "DEC"; 00738 // sortOrders(1) = Sort::Ascending; 00739 // Table result = table.sort (sortKeys, sortOrders); 00740 // </srcblock> 00741 // 00742 // Tables stemming from the same root, can be combined in several 00743 // ways with the help of the various logical 00744 // <linkto class="Table">Table operators</linkto> (operator|, etc.). 00745 00746 // <h4>Table Query Language</h4> 00747 // The selection and sorting mechanism described above can only be used 00748 // in a hard-coded way in a C++ program. 00749 // There is, however, another way. Strings containing selection and 00750 // sorting commands can be used. 00751 // The syntax of these commands is based on SQL and is described in the 00752 // <a href="../../notes/199/199.html">Table Query Language</a> (TaQL). 00753 // <br>Such a command can be executed with the static function 00754 // <src>TableParse::tableCommand</src> defined in class 00755 // <linkto class=TableParse>TableParse</linkto>. 00756 00757 // <A NAME="Tables:iterate"> 00758 // <h3>Table Iterators</h3></A> 00759 // 00760 // You can iterate through a table in an arbitrary order by getting 00761 // a subset of the table consisting of the rows in which the iteration 00762 // columns have the same value. 00763 // An iterator object is created by constructing a 00764 // <linkto class="TableIterator:description">TableIterator</linkto> 00765 // object with the appropriate column names. 00766 // 00767 // In the next example we define an iteration on the columns Time and 00768 // Baseline. Each iteration step returns a table subset in which Time and 00769 // Baseline have the same value. 00770 // 00771 // <srcblock> 00772 // // Iterate over Time and Baseline (by default in ascending order). 00773 // // Time is the main iteration order, thus the first column specified. 00774 // Table t; 00775 // Table tab ("UV_Table.data"); 00776 // Block<String> iv0(2); 00777 // iv0[0] = "Time"; 00778 // iv0[1] = "Baseline"; 00779 // // 00780 // // Create the iterator. This will prepare the first subtable. 00781 // TableIterator iter(tab, iv0); 00782 // Int nr = 0; 00783 // while (!iter.pastEnd()) { 00784 // // Get the first subtable. 00785 // // This will contain rows with equal Time and Baseline. 00786 // t = iter.table(); 00787 // cout << t.nrow() << " "; 00788 // nr++; 00789 // // Prepare the next subtable with the next Time,Baseline value. 00790 // iter.next(); 00791 // } 00792 // cout << endl << nr << " iteration steps" << endl; 00793 // </srcblock> 00794 // 00795 // You can define more than one iterator on the same table; they operate 00796 // independently. 00797 // 00798 // Note that the result of each iteration step is a table in itself which 00799 // references the original table, just as in the case of a sort or select. 00800 // This means that the resulting table can be used again in a sort, select, 00801 // iteration, etc.. 00802 00803 // <A NAME="Tables:vectors"> 00804 // <h3>Table Vectors</h3></A> 00805 // 00806 // A table vector makes it possible to treat a column in a table 00807 // as a vector. Almost all operators and functions defined for normal 00808 // vectors, are also defined for table vectors. So it is, for instance, 00809 // possible to add a constant to a table vector. This has the effect 00810 // that the underlying column gets changed. 00811 // 00812 // You can use the templated classes 00813 // <linkto class="ROTableVector:description">ROTableVector</linkto> and 00814 // <linkto class="TableVector:description">TableVector</linkto> and 00815 // to define a table vector (readonly and read/write, respectively) for 00816 // a scalar column. Columns containing arrays or tables are not supported. 00817 // The data type of the (RO)TableVector object must match the 00818 // data type of the column. 00819 // A table vector can also hold a normal vector so that (temporary) 00820 // results of table vector operations can be handled. 00821 // 00822 // In the following example we double the data in column COL1 and 00823 // store the result in a temporary table vector. 00824 // <srcblock> 00825 // // Create a table vector for column COL1. 00826 // // It has to be a ROTableVector, because the table is opened 00827 // // as readonly. 00828 // Table tab ("Table.data"); 00829 // ROTableVector<Int> tabvec(tab, "COL1"); 00830 // // Multiply it by a constant. 00831 // // The result has to be stored in a TableVector, 00832 // // since a ROTableVector cannot be written to. 00833 // TableVector<Int> temp = 2 * tabvec; 00834 // </srcblock> 00835 // 00836 // In the next example we double the data in COL1 and put the result back 00837 // in the column. 00838 // <srcblock> 00839 // // Create a table vector for column COL1. 00840 // // It has to be a TableVector to be able to change the column. 00841 // Table tab ("Table.data", Table::Update); 00842 // TableVector<Int> tabvec(tab, "COL1"); 00843 // // Multiply it by a constant. 00844 // tabvec *= 2; 00845 // </srcblock> 00846 00847 // <A NAME="Tables:keywords"> 00848 // <h3>Table Keywords</h3></A> 00849 // 00850 // Any number of keyword/value pairs may be attached to the table as a whole, 00851 // or to any individual column. They may be freely added, retrieved, 00852 // re-assigned, or deleted. They are, in essence, a self-resizing list of 00853 // values (any of the primitive types) indexed by Strings (the keyword). 00854 // 00855 // A table keyword/value pair might be 00856 // <srcblock> 00857 // Observer = Grote Reber 00858 // Date = 10 october 1942 00859 // </srcblock> 00860 // Column keyword/value pairs might be 00861 // <srcblock> 00862 // Units = mJy 00863 // Reference Pixel = 320 00864 // </srcblock> 00865 // The class 00866 // <linkto class="TableRecord:description">TableRecord</linkto> 00867 // represents the keywords in a table. 00868 // It is (indirectly) derived from the standard record classes in the class 00869 // <linkto class="Record:description">Record</linkto> 00870 00871 // <A NAME="Tables:Table Description"> 00872 // <h3>Table Description</h3></A> 00873 // 00874 // A table contains a description of itself, which defines the layout of the 00875 // columns and the keyword sets for the table and for the individual columns. 00876 // It may also define initial keyword sets and default values for the columns. 00877 // Such a default value is automatically stored in a cell in the table column, 00878 // whenever a row is added to the table. 00879 // 00880 // The creation of the table descriptor is the first step in the creation of 00881 // a new table. The description is part of the table itself, but may also 00882 // exist in a separate file. This is useful when you need to create a number 00883 // of tables with the same structure; in other circumstances it probably 00884 // should be avoided. 00885 // 00886 // The public classes to set up a table description are: 00887 // <ul> 00888 // <li> <linkto class="TableDesc:description">TableDesc</linkto> 00889 // -- holds the table description. 00890 // <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto> 00891 // -- holds a generic column description. 00892 // <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T> 00893 // </linkto> 00894 // -- defines a column containing a scalar value. 00895 // <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc; 00896 // </linkto> 00897 // -- defines a column containing a scalar record value. 00898 // <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T> 00899 // </linkto> 00900 // -- defines a column containing an (in)direct array. 00901 // </ul> 00902 // 00903 // Here follows a typical example of the construction of a table 00904 // description. For more specialized things -- like the definition of a 00905 // default data manager -- we refer to the descriptions of the above 00906 // mentioned classes. 00907 // 00908 // <srcblock> 00909 // #include <tables/Tables/TableDesc.h> 00910 // #include <tables/Tables/ScaColDesc.h> 00911 // #include <tables/Tables/ArrColDesc.h> 00912 // #include <aips/Tables/ScaRecordTabDesc.h> 00913 // #include <tables/Tables/TableRecord.h> 00914 // #include <casa/Arrays/IPosition.h> 00915 // #include <casa/Arrays/Vector.h> 00916 // 00917 // main() 00918 // { 00919 // // Create a new table description 00920 // // Define a comment for the table description. 00921 // // Define some keywords. 00922 // ColumnDesc colDesc1, colDesc2; 00923 // TableDesc td("tTableDesc", "1", TableDesc::New); 00924 // td.comment() = "A test of class TableDesc"; 00925 // td.rwKeywordSet().define ("ra" float(3.14)); 00926 // td.rwKeywordSet().define ("equinox", double(1950)); 00927 // td.rwKeywordSet().define ("aa", Int(1)); 00928 // 00929 // // Define an integer column ab. 00930 // td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab")); 00931 // 00932 // // Add a scalar integer column ac, define keywords for it 00933 // // and define a default value 0. 00934 // // Overwrite the value of keyword unit. 00935 // ScalarColumnDesc<Int> acColumn("ac"); 00936 // acColumn.rwKeywordSet().define ("scale" Complex(0,0)); 00937 // acColumn.rwKeywordSet().define ("unit", ""); 00938 // acColumn.setDefault (0); 00939 // td.addColumn (acColumn); 00940 // td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG"); 00941 // 00942 // // Add a scalar string column ad and define its comment string. 00943 // td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad")); 00944 // 00945 // // Now define array columns. 00946 // // This one is indirect and has no dimensionality mentioned yet. 00947 // td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1")); 00948 // // This one is indirect and has 3-dim arrays. 00949 // td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3)); 00950 // // This one is direct and has 2-dim arrays with axes length 4 and 7. 00951 // td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1", 00952 // IPosition(2,4,7), 00953 // ColumnDesc::Direct)); 00954 // 00955 // // Add columns containing records. 00956 // td.addColumn (ScalarRecordColumnDesc ("Rec1")); 00957 // } 00958 // </srcblock> 00959 00960 // <A NAME="Tables:Data Managers"> 00961 // <h3>Data Managers</h3></A> 00962 // 00963 // Data managers take care of the actual access to the data in a column. 00964 // There are two kinds of data managers: 00965 // <ol> 00966 // <li> <A HREF="#Tables:storage managers">Storage managers</A> -- 00967 // which store the data as such. They can only handle the standard 00968 // data type (Bool,...,String) as discussed in the section about the 00969 // <A HREF="#Tables:properties">table properties</A>). 00970 // <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A> 00971 // -- which manipulate the data. 00972 // An engine could be a simple thing like scaling the data (as done 00973 // in classic AIPS to reduce data storage), but it could also be an 00974 // elaborate thing like applying corrections on-the-fly. 00975 // <br>An engine must be used to store data objects with a non-standard type. 00976 // It has to break down the object into items with standard data types 00977 // which can be stored with a storage manager. 00978 // </ol> 00979 // In general the user of a table does not need to be aware which 00980 // data managers are being used underneath. Only when the table is created 00981 // data managers have to be bound to the columns. Thereafter it is 00982 // completely transparent. 00983 00984 // <A NAME="Tables:storage managers"> 00985 // <h3>Storage Managers</h3></A> 00986 // 00987 // Several storage managers are currently supported. 00988 // The default and preferred storage manager is <src>StandardStMan</src>. 00989 // Other storage managers should only be used when they pay off in 00990 // file space (like <src>IncrementalStMan</src> for slowly varying data) 00991 // or access speed (like the tiled storage managers for large data arrays). 00992 // <br>The storage managers store the data in a big or little endian 00993 // canonical format. The format can be specified when the table is created. 00994 // By default it uses the endian format as specified in the aipsrc variable 00995 // <code>table.endianformat</code> which can have the value local, big, 00996 // or little. The default is local. 00997 // <ol> 00998 // <li> 00999 // <linkto class="StandardStMan:description">StandardStMan</linkto> 01000 // stores all the values in so-called buckets (equally sized chunks 01001 // in the file). It requires little memory. 01002 // <br>It replaces the old <src>StManAipsIO</src>. 01003 // 01004 // <li> 01005 // <linkto class="IncrementalStMan:description">IncrementalStMan</linkto> 01006 // uses a storage mechanism resembling "incremental backups". A value 01007 // is only stored when it is different from the previous row. It is 01008 // very well suited for slowly varying data. 01009 // <br>The class <linkto class="ROIncrementalStManAccessor:description"> 01010 // ROIncrementalStManAccessor</linkto> can be used to tune the 01011 // behaviour of the <src>IncrementalStMan</src>. It contains functions 01012 // to deal with the cache size and to show the behaviour of the cache. 01013 // 01014 // <li> 01015 // The <a href="#Tables:TiledStMan">Tiled Storage Managers</a> 01016 // store the data as a tiled hypercube allowing for more or less equally 01017 // efficient data access along all main axes. It can be used for 01018 // UV-data as well as for image data. 01019 // 01020 // <li> 01021 // <linkto class="StManAipsIO:description">StManAipsIO</linkto> 01022 // uses <src>AipsIO</src> to store the data in the columns. 01023 // It supports all table functionality, but its I/O is probably not 01024 // as efficient as other storage managers. It also requires that 01025 // a large part of the table fits in memory. 01026 // <br>It should not be used anymore, because it uses a lot of memory 01027 // for larger tables and because it is not very robust in case an 01028 // application or system crashes. 01029 // </ol> 01030 // 01031 // <li> 01032 // <linkto class="MemoryStMan:description">MemoryStMan</linkto> 01033 // holds the data in memory. It means that data 'stored' with this 01034 // storage manager are NOT persistent. 01035 // <br>This storage manager is primarily meant for tables held in 01036 // memory, but it can also be useful for temporary columns in 01037 // normal tables. Note, however, that when a table is accessed 01038 // concurrently from multiple processes, MemoryStMan data cannot be 01039 // synchronized. 01040 // </ol> 01041 // 01042 // The storage manager framework makes it possible to support arbitrary files 01043 // as tables. This has been used in a case where a file is filled 01044 // by the data acquisition system of a telescope. The file is simultaneously 01045 // used as a table using a dedicated storage manager. The table 01046 // system and storage manager provide a sync function to synchronize 01047 // the processes, i.e. to make the table system aware of changes 01048 // in the file size (thus in the table size) by the filling process. 01049 // 01050 // <note role=tip> 01051 // Not all data managers support all the table functionality. So, the choice 01052 // of a data manager can greatly influence the type of operations you can do 01053 // on the table as a whole. 01054 // For example, if a column uses the tiled storage manager, 01055 // it is not possible to delete rows from the table, because that storage 01056 // manager will not support deletion of rows. 01057 // However, it is always possible to delete all columns of a data 01058 // manager in one single call. 01059 // </note> 01060 01061 // <A NAME="Tables:TiledStMan"> 01062 // <h3>Tiled Storage Manager</h3> 01063 // The Tiled Storage Managers allow one to store the data of 01064 // one or more columns in a tiled way. Tiling means 01065 // that the data are stored without a preferred order to make access 01066 // along the different main axes equally efficient. This is done by 01067 // storing the data in so-called tiles (i.e. equally shaped subsets of an 01068 // array) to increase data locality. The user can define the tile shape 01069 // to optimize for the most frequently used access. 01070 // <p> 01071 // The Tiled Storage Manager has the following properties: 01072 // <ul> 01073 // <li> There can be more than one Tiled Storage Manager in 01074 // a table; each with its own (unique) name. 01075 // <li> Each Tiled Storage Manager can store an 01076 // N-dimensional so-called hypercolumn (defined using 01077 // <linkto file="TableDesc.h#defineHypercolumn"> 01078 // TableDesc::defineHypercolumn</linkto>). 01079 // <br>A hypercolumn consists of up to three types 01080 // of columns: 01081 // <dl> 01082 // <dt> Data columns 01083 // <dd> contain the data to be stored in a tiled way. This will 01084 // be done in tiled hypercubes. 01085 // There must be at least one data column. 01086 // <br> For example: a table contains UV-data with 01087 // data columns "Visibility" and "Weight". 01088 // <dt> Coordinate columns 01089 // <dd> define the world coordinates of the pixels in the data columns. 01090 // Coordinate columns are optional, but when given there must 01091 // be N coordinate columns for an N-dimensional hypercolumn. 01092 // <br> 01093 // For example: the data in the example above is 4-dimensional 01094 // and has coordinate columns "Time", "Baseline", "Frequency", 01095 // and "Polarization". 01096 // <dt> Id columns 01097 // <dd> are needed when TiledDataStMan is used. 01098 // Different rows in the data columns can be stored in different 01099 // hypercube. The values in the id column(s) uniquely identify 01100 // the hypercube a row is stored in. 01101 // <br> 01102 // For example: the line and continuum data in a MeasurementSet 01103 // table need to be stored in 2 different hypercubes (because 01104 // their shapes are different (see below)). A column containing 01105 // the type (line or continuum) has to be used as an id column. 01106 // </dl> 01107 // <li> When multiple data columns are used, the shape of their data 01108 // must be conforming in each individual row. 01109 // When data in different rows have different shapes, they must be 01110 // stored in different hypercubes, because a hypercube can only hold 01111 // data with conforming shapes. 01112 // <br> 01113 // Thus in the example above, rows with line data will have conforming 01114 // shapes and can be stored in one hypercube. The continuum data 01115 // will have another shape and can be stored in another hypercube. 01116 // <br> 01117 // The storage manager keeps track of the mapping of rows to/from 01118 // hypercubes. 01119 // <li> Each hypercube can be tiled in its own way. It is not required 01120 // that an integer number of tiles fits in the hypercube. The last 01121 // tiles will be padded as needed. 01122 // <li> The last axis of a hypercube can be extensible. This means that 01123 // the size of that axis does not need to be defined when the 01124 // hypercube is defined in the storage manager. Instead, the hypercube 01125 // can be extended when another chunk of data has to be stored. 01126 // This can be very useful in, for example, a (quasi-)realtime 01127 // environment where the size of the time axis is not known. 01128 // <li> When coordinate columns are defined, they describe the coordinates 01129 // of the axes of the hypercubes. Each hypercube has its own set of 01130 // coordinates. 01131 // <li> Data and id columns have to be stored with the Tiled 01132 // Storage Manager. However, coordinate columns do not need to be 01133 // stored with the Tiled Storage Manager. 01134 // Especially in the case where the coordinates for a hypercube axis 01135 // are varying (i.e. dependent on other axes), another storage manager 01136 // has to be used (because the Tiled Storage Manager can only 01137 // hold constant coordinates). 01138 // </ul> 01139 // The Tiled Storage Managers use internal caches to minimize IO. It is 01140 // possible to define a maximum cache size. The description of class 01141 // <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto> 01142 // contains a discussion about the effect of defining a maximum cache size. 01143 // <p> 01144 // The following Tiled Storage Managers are available: 01145 // <dl> 01146 // <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto> 01147 // <dd> creates (automatically) a new hypercube for each row. 01148 // Thus each row of the hypercolumn is stored in a separate hypercube. 01149 // Note that the row number serves as the id value. So an id column 01150 // is not needed, although there are multiple hypercubes. 01151 // <br> 01152 // This storage manager is meant for tables where the data arrays 01153 // in the different rows are not accessed together. One can think 01154 // of a column containing images. Each row contains an image and 01155 // only one image is shown at a time. 01156 // <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto> 01157 // <dd> creates one hypercube for the entire hypercolumn. Thus all cells 01158 // in the hypercube have to have the same shape and therefore this 01159 // storage manager is only possible when all columns in the hypercolumn 01160 // have the attribute FixedShape. 01161 // <br> 01162 // This storage manager could be used for a table with a column 01163 // containing images for the Stokes parameters I, Q, U, and V. 01164 // By storing them in one hypercube, it is possible to retrieve 01165 // the 4 Stokes values for a subset of the image or for an individual 01166 // pixel in a very efficient way. 01167 // <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto> 01168 // <dd> allows one to control the creation and extension of hypercubes. 01169 // This is done by means of the class 01170 // <linkto class=TiledDataStManAccessor:description> 01171 // TiledDataStManAccessor</linkto>. 01172 // This makes it possible to store, say, row 0-9 in hypercube A, 01173 // row 10-34 in hypercube B, row 35-54 in hypercube A again, etc.. 01174 // <br> 01175 // This storage manager could be used to store UV-data with a mix 01176 // of continuum and line data. 01177 // <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto> 01178 // <dd> can be seen as a specialization of <src>TiledDataStMan</src> 01179 // by using the array shape as the id value. 01180 // Similarly to <src>TiledDataStMan</src> it can maintain multiple 01181 // hypercubes and store multiple rows in a hypercube, but is is 01182 // easier to use, because the special <src>addHypercube</src> and 01183 // <src>extendHypercube</src> functions are not needed. 01184 // An hypercube is automatically added when a new array shape is 01185 // encountered. 01186 // <br> 01187 // This storage manager could be used for a table with a column 01188 // containing line and continuum data, which will result 01189 // in 2 hypercubes. 01190 // </dl> 01191 // 01192 // For example:<br> 01193 // UV-data and weights have to be stored in a table. 01194 // The data have the coordinates Pol, Freq, Baseline and Time. 01195 // There is continuum and line data, which have to be stored in 2 separate 01196 // hypercubes. This could lead to the following scenario when creating/filling 01197 // the table: 01198 // <ul> 01199 // <li> Define a hypercolumn with data columns Data and Weight, 01200 // coordinate columns Pol, Freq, Baseline and Time and id column Id. 01201 // The id column is needed to differentiate between continuum and line. 01202 // <li> Use the storage manager TiledDataStMan to be able to drive which 01203 // hypercube is used. 01204 // <li> Add the two hypercubes (using TiledDataStManAccessor) 01205 // with their correct id values and coordinate values. 01206 // The last axis (i.e. time) is extensible. 01207 // <li> Read the data from a source (which will be in time-order). 01208 // Add rows to the table, extend the appropriate hypercube and put 01209 // the data into the row(s). 01210 // </ul> 01211 // An alternative scenario could be that the data in the source is not 01212 // in time order, but that the size of the data is known. In that case 01213 // the hypercubes can be defined with their correct shape and putColumn 01214 // (with a Slicer) can be used to put the data (and reorder them implicitly). 01215 // <br> 01216 // Another alternative is to use TiledShapeStMan, so the hypercubes are 01217 // added or extended automatically. 01218 01219 // <A NAME="Tables:virtual column engines"> 01220 // <h3>Virtual Column Engines</h3></A> 01221 // 01222 // Virtual column engines are used to implement the virtual (i.e. 01223 // calculated-on-the-fly) columns. The Table system provides 01224 // an abstract base class (or "interface class") 01225 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto> 01226 // that specifies the protocol for these engines. 01227 // The programmer must derive a concrete class to implement 01228 // the application-specific virtual column. 01229 // <p> 01230 // For example: the programmer 01231 // needs a column in a table which is the difference between two other 01232 // columns. (Perhaps these two other columns are updated periodically 01233 // during the execution of a program.) A good way to handle this would 01234 // be to have a virtual column in the table, and write a virtual column 01235 // engine which knows how to calculate the difference between corresponding 01236 // cells of the two other columns. So the result is that accessing a 01237 // particular cell of the virtual column invokes the virtual column engine, 01238 // which then gets the values from the other two columns, and returns their 01239 // difference. This particular example could be done using 01240 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>. 01241 // <p> 01242 // Several virtual column engines exist: 01243 // <ol> 01244 // <li> The class 01245 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto> 01246 // makes it possible to define a column as an arbitrary expression of 01247 // other columns. It uses the <a href="../../notes/199/199.html">TaQL</a> 01248 // CALC command. The virtual column can be a scalar or an array and 01249 // can have one of the standard data types supported by the Table System. 01250 // <li> The class 01251 // <linkto class="CompressFloat:description">CompressFloat</linkto> 01252 // compresses a single precision floating point array by scaling the 01253 // values to shorts (16-bit integer). 01254 // <li> The class 01255 // <linkto class="CompressComplex:description">CompressComplex</linkto> 01256 // compresses a single precision complex array by scaling the 01257 // values to shorts (16-bit integer). In fact, the 2 parts of the complex 01258 // number are combined to an 32-bit integer. 01259 // <li> The class 01260 // <linkto class="CompressComplexSD:description">CompressComplexSD</linkto> 01261 // does the same as CompressComplex, but optimizes for the case where the 01262 // imaginary part is zero (which is often the case for Single Dish data). 01263 // <li> The double templated class 01264 // <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto> 01265 // scales the data in an array from, for example, 01266 // float to short before putting it. 01267 // <li> The double templated class 01268 // <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto> 01269 // converts the data from one data type to another. Sometimes it might be 01270 // needed to store the residual data in an MS in double precision. 01271 // Because the imaging task can only handle single precision, this enigne 01272 // can be used to map the data from double to single precision. 01273 // <li> The double templated class 01274 // <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto> 01275 // converts the data from one data type to another with the possibility 01276 // to reduce the number of dimensions. For example, it can be used to 01277 // store an 2-d array of StokesVector objects as a 3-d array of floats 01278 // by treating the 4 data elements as an extra array axis. When the 01279 // StokesVector class is simple, it can be done very efficiently. 01280 // <li> The class 01281 // <linkto class="ForwardColumnEngine:description"> 01282 // ForwardColumnEngine</linkto> 01283 // forwards the gets and puts on a row in a column to the same row 01284 // in a column with the same name in another table. This provides 01285 // a virtual copy of the referenced column. 01286 // <li> The class 01287 // <linkto class="ForwardColumnIndexedRowEngine:description"> 01288 // ForwardColumnIndexedRowEngine</linkto> 01289 // is similar to <src>ForwardColumnEngine.</src>. 01290 // However, instead of forwarding it to the same row it uses a 01291 // a column to map its row number to a row number in the referenced 01292 // table. In this way multiple rows can share the same data. 01293 // This data manager only allows for get operations. 01294 // <li> The calibration module has implemented a virtual column engine 01295 // to do on-the-fly calibration in a transparent way. 01296 // </ol> 01297 // To handle arbitrary data types the templated abstract base class 01298 // <linkto class="VSCEngine:description">VSCEngine</linkto> 01299 // has been written. An example of how to use this class can be 01300 // found in the demo program <src>dVSCEngine.cc</src>. 01301 01302 // <A NAME="Tables:LockSync"> 01303 // <h3>Table locking and synchronization</h3></A> 01304 // 01305 // Multiple concurrent readers and writers (also via NFS) of a 01306 // table are supported by means of a locking/synchronization mechanism. 01307 // This mechanism is not very sophisticated in the sense that it is 01308 // very coarsely grained. When locking, the entire table gets locked. 01309 // A special lock file is used to lock the table. This lock file also 01310 // contains some synchronization data. 01311 // <p> 01312 // Five ways of locking are supported (see class 01313 // <linkto class=TableLock>TableLock</linkto>): 01314 // <dl> 01315 // <dt> TableLock::PermanentLocking(Wait) 01316 // <dd> locks the table permanently (from open till close). This means 01317 // that one writer OR multiple readers are possible. 01318 // <dt> TableLock::AutoLocking 01319 // <dd> does the locking automatically. This is the default mode. 01320 // This mode makes it possible that a table is shared amongst 01321 // processes without the user needing to write any special code. 01322 // It also means that a lock is only released when needed. 01323 // <dt> TableLock::AutoNoReadLocking 01324 // <dd> is similar to AutoLocking. However, no lock is acquired when 01325 // reading the table making it possible to read the table while 01326 // another process holds a write-lock. It also means that for read 01327 // purposes no automatic synchronization is done when the table is 01328 // updated in another process. 01329 // Explicit synchronization can be done by means of the function 01330 // <src>Table::resync</src>. 01331 // <dt> TableLock::UserLocking 01332 // <dd> requires that the programmer explicitly acquires and releases 01333 // a lock on the table. This makes some kind of transaction 01334 // processing possible. E.g. set a write lock, add a row, 01335 // write all data into the row and release the lock. 01336 // The Table functions <src>lock</src> and <src>unlock</src> 01337 // have to be used to acquire and release a (read or write) lock. 01338 // <dt> TableLock::UserNoReadLocking 01339 // <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking 01340 // no lock is needed to read the table. 01341 // </dl> 01342 // Synchronization of the processes accessing the same table is done 01343 // by means of the lock file. When a lock is released, the storage 01344 // managers flush their data into the table files. Some synchronization data 01345 // is written into the lock file telling the new number of table rows 01346 // and telling which storage managers have written data. 01347 // This information is read when another process acquires the lock 01348 // and is used to determine which storage managers have to refresh 01349 // their internal caches. 01350 // <br>Note that for the NoReadLocking modes (see above) explicit 01351 // synchronization might be needed using <src>Table::resync</src>. 01352 // <p> 01353 // The function <src>Table::hasDataChanged</src> can be used to check 01354 // if a table is (being) changed by another process. In this way 01355 // a program can react on it. E.g. the table browser can refresh its 01356 // screen when the underlying table is changed. 01357 // <p> 01358 // In general the default locking option will do. 01359 // From the above it should be clear that heavy concurrent access 01360 // results in a lot of flushing, thus will have a negative impact on 01361 // performance. When uninterrupted access to a table is needed, 01362 // the <src>PermanentLocking</src> option should be used. 01363 // When transaction-like processing is done (e.g. updating a table 01364 // containing an observation catalogue), the <src>UserLocking</src> 01365 // option is probably best. 01366 // <p> 01367 // Creation or deletion of a table is not possible when that table 01368 // is still open in another process. The function 01369 // <src>Table::isMultiUsed()</src> can be used to check if a table 01370 // is open in other processes. 01371 // <br> 01372 // The function <src>deleteTable</src> should be used to delete 01373 // a table. Before deleting the table it ensures that it is writable 01374 // and that it is not open in the current or another process 01375 // <p> 01376 // The following example wants to read the table uninterrupted, thus it uses 01377 // the <src>PermanentLocking</src> option. It also wants to wait 01378 // until the lock is actually acquired. 01379 // Note that the destructor closes the table and releases the lock. 01380 // <srcblock> 01381 // // Open the table (readonly). 01382 // // Acquire a permanent (read) lock. 01383 // // It waits until the lock is acquired. 01384 // Table tab ("some.name", 01385 // TableLock(TableLock::PermanentLockingWait)); 01386 // </srcblock> 01387 // 01388 // The following example uses the automatic locking.. 01389 // It tells the system to check about every 20 seconds if another 01390 // process wants access to the table. 01391 // <srcblock> 01392 // // Open the table (readonly). 01393 // Table tab ("some.name", 01394 // TableLock(TableLock::AutoLocking, 20)); 01395 // </srcblock> 01396 // 01397 // The following example gets data (say from a GUI) and writes it 01398 // as a row into the table. The lock the table as little as possible 01399 // the lock is acquired just before writing and released immediately 01400 // thereafter. 01401 // <srcblock> 01402 // // Open the table (writable). 01403 // Table tab ("some.name", 01404 // TableLock(TableLock::UserLocking), 01405 // Table::Update); 01406 // while (True) { 01407 // get input data 01408 // tab.lock(); // Acquire a write lock and wait for it. 01409 // tab.addRow(); 01410 // write data into the row 01411 // tab.unlock(); // Release the lock. 01412 // } 01413 // </srcblock> 01414 // 01415 // The following example deletes a table when it is not used in 01416 // another process. 01417 // <srcblock> 01418 // Table tab ("some.name"); 01419 // if (! tab.isMultiUsed()) { 01420 // tab.markForDelete(); 01421 // } 01422 // </srcblock> 01423 01424 // <A NAME="Tables:KeyLookup"> 01425 // <h3>Table lookup based on a key</h3></A> 01426 // 01427 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the 01428 // user a means to find the rows matching a given key or key range. 01429 // It is a somewhat primitive replacement of a B-tree index and in the 01430 // future it may be replaced by a proper B+-tree implementation. 01431 // <p> 01432 // The <src>ColumnsIndex</src> class makes it possible to build an 01433 // in-core index on one or more columns. Looking a key or key range 01434 // is done using a binary search on that index. It returns a vector 01435 // containing the row numbers of the rows matching the key (range). 01436 // <p> 01437 // The class is not capable of tracing changes in the underlying column(s). 01438 // It detects a change in the number of rows and updates the index 01439 // accordingly. However, it has to be told explicitly when a value 01440 // in the underlying column(s) changes. 01441 // <p> 01442 // The following example shows how the class can be used. 01443 // <example> 01444 // Suppose one has an antenna table with key ANTENNA. 01445 // <srcblock> 01446 // // Open the table and make an index for column ANTENNA. 01447 // Table tab("antenna.tab") 01448 // ColumnsIndex colInx(tab, "ANTENNA"); 01449 // // Make a RecordFieldPtr for the ANTENNA field in the index key record. 01450 // // Its data type has to match the data type of the column. 01451 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA"); 01452 // // Now loop in some way and find the row for the antenna 01453 // // involved in that loop. 01454 // Bool found; 01455 // while (...) { 01456 // // Fill the key field and get the row number. 01457 // // ANTENNA is a unique key, so only one row number matches. 01458 // // Otherwise function getRowNumbers had to be used. 01459 // *antFld = antenna; 01460 // uInt antRownr = colInx.getRowNumber (found); 01461 // if (!found) { 01462 // cout << "Antenna " << antenna << " is unknown" << endl; 01463 // } else { 01464 // // antRownr can now be used to get data from that row in 01465 // // the antenna table. 01466 // } 01467 // } 01468 // </srcblock> 01469 // </example> 01470 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more 01471 // advanced example. It shows how to use a private compare function 01472 // to adjust the lookup when the index does not contain single 01473 // key values, but intervals instead. This is useful when a row in 01474 // a (sub)table is valid for, say, a time range instead of a single 01475 // timestamp. 01476 01477 // <A NAME="Tables:performance"> 01478 // <h3>Performance and robustness considerations</h3></A> 01479 // 01480 // The Table System resembles a database system, but it is not as robust. 01481 // It lacks the transaction and logging facilities common to data base systems. 01482 // It means that in case of a crash data might be lost. 01483 // To reduce the risk of data loss to 01484 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally 01485 // with an <tt>fsync</tt> to ensure that all data are really written. 01486 // However, that can degrade the performance because it involves extra writes. 01487 // So one should find the right balance between robustness and performance. 01488 // 01489 // To get a good feeling for the performance issues, it is important to 01490 // understand some of the internals of the Table System. 01491 // <br>The storage managers drive the performance. All storage managers use 01492 // buckets (called tiles for the TiledStMan) which contain the data. 01493 // All IO is done by bucket. The bucket/tile size is defined when creating 01494 // the storage manager objects. Sometimes the default will do, but usually 01495 // it is better to set it explicitly. 01496 // 01497 // It is best to do a flush when a tile is full. 01498 // For example: <br> 01499 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines 01500 // or N*(N+1) if auto-correlations are stored as well) it makes sense to 01501 // store, say, N/2 rows in a tile and do a flush each time all baselines 01502 // are written. In that way tiles are fully filled when doing the flush, so 01503 // no extra IO is involved. 01504 // <br>Here is some code showing this when creating a MeasurementSet. 01505 // The code should speak for itself. 01506 // <srcblock> 01507 // MS* createMS (const String& msName, int nrchan, int nrant) 01508 // { 01509 // // Get the MS main default table description. 01510 // TableDesc td = MS::requiredTableDesc(); 01511 // // Add the data column and its unit. 01512 // MS::addColumnToDesc(td, MS::DATA, 2); 01513 // td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet(). 01514 // define("UNIT","Jy"); 01515 // // Store the DATA and FLAG column in two separate files. 01516 // // In this way accessing FLAG only is much cheaper than 01517 // // when combining DATA and FLAG. 01518 // // All data have the same shape, thus use TiledColumnStMan. 01519 // // Also store UVW with TiledColumnStMan. 01520 // Vector<String> tsmNames(1); 01521 // tsmNames[0] = MS::columnName(MS::DATA); 01522 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01523 // td.defineHypercolumn("TiledData", 3, tsmNames); 01524 // tsmNames[0] = MS::columnName(MS::FLAG); 01525 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01526 // td.defineHypercolumn("TiledFlag", 3, tsmNames); 01527 // tsmNames[0] = MS::columnName(MS::UVW); 01528 // td.defineHypercolumn("TiledUVW", 2, tsmNames); 01529 // // Setup the new table. 01530 // SetupNewTable newTab(msName, td, Table::New); 01531 // // Most columns vary slowly and use the IncrStMan. 01532 // IncrementalStMan incrStMan("ISMData"); 01533 // // A few columns use he StandardStMan (set an appropriate bucket size). 01534 // StandardStMan stanStMan("SSMData", 32768); 01535 // // Store all pol and freq and some rows in a single tile. 01536 // // autocorrelations are written, thus in total there are 01537 // // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an 01538 // // integer number of tiles. 01539 // TiledColumnStMan tiledData("TiledData", 01540 // IPosition(3,4,nchan,(nrant+1)/2)); 01541 // TiledColumnStMan tiledFlag("TiledFlag", 01542 // IPosition(3,4,nchan,8*(nrant+1)/2)); 01543 // TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,)); 01544 // IPosition(2,3,nrant*(nrant+1)/2)); 01545 // newTab.bindAll (incrStMan); 01546 // newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan); 01547 // newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan); 01548 // newTab.bindColumn(MS::columnName(MS::DATA),tiledData); 01549 // newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag); 01550 // newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW); 01551 // // Create the MS and its subtables. 01552 // // Get access to its columns. 01553 // MS* msp = new MeasurementSet(newTab); 01554 // // Create all subtables. 01555 // // Do this after the creation of optional subtables, 01556 // // so the MS will know about those optional sutables. 01557 // msp->createDefaultSubtables (Table::New); 01558 // return msp; 01559 // } 01560 // </srcblock> 01561 01562 // <h4>Some more performance considerations</h4> 01563 // Which storage managers to use and how to use them depends heavily on 01564 // the type of data and the access patterns to the data. Here follow some 01565 // guidelines: 01566 // <ol> 01567 // <li> Scalar data can be stored with the StandardStMan (SSM) or 01568 // IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column 01569 // in a MeasurementSet) it is best to use the ISM. Otherwise the SSM. 01570 // Note that very long strings (longer than the bucketsize) can only 01571 // be stored with the SSM. 01572 // <li> Any number of storage managers can be used. In fact, each column 01573 // can have a storage manager of its own resulting in column-wise 01574 // stored data which is more and more used in data base systems. 01575 // In that way a query or sort on that column is very fast, because 01576 // the buckets to read only contain data of that column. 01577 // In practice one can decide to combine a few frequently used columns 01578 // in a storage manager. 01579 // <li> Array data can be stored with any column manager. Small fixed size 01580 // arrays can be stored directly with the SSM 01581 // (or ISM if not changing much). 01582 // However, they can also be stored with a TiledStMan (TSM) as shown 01583 // for the UVW column in the example above. 01584 // <br> Large arrays should usually be stored with a TSM. However, 01585 // if it must be possible to change the shape of an array after it 01586 // was stored, the SSM (or ISM) must be used. Note that in that 01587 // case a lot of disk space can be wasted, because the SSM and ISM 01588 // store the array data at the end of the file when the array got 01589 // bigger and do not reuse the old space. The only way to 01590 // reclaim it is by making a deep copy of the entire table. 01591 // <li> When an array is stored with a TSM, it is important to decide 01592 // which TSM to use. 01593 // <ol> 01594 // <li> The TiledColumnStMan is the most efficient, but only suitable 01595 // for arrays having the same shape in the entire column. 01596 // <li> The TiledShapeStMan is suitable for columns where the arrays 01597 // can have a few shapes. 01598 // <li> The TiledCellStMan is suitable for columns where the arrays 01599 // can have many different shapes. 01600 // </ol> 01601 // This is discussed in more detail 01602 // <a href="#Tables:TiledStMan">above</a>. 01603 // <li> When storing an array with a TSM, it can be very important to 01604 // choose the right tile shape. Not only does this define the size 01605 // of a tile, but it also defines if access in other directions 01606 // than the natural direction can be fast. It is also discussed in 01607 // more detail <a href="#Tables:TiledStMan">above</a>. 01608 // <li> Columns can be combined in a single TiledStMan. For instance, combining DATA 01609 // and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG 01610 // is used on its own (e.g. in combination with CORRECTED_DATA), it is better 01611 // to separate them, otherwise tiles containing FLAG also contain DATA making the 01612 // tiles much bigger, thus more expensive to access. 01613 // </ol> 01614 01615 // </synopsis> 01616 // </module> 01617 01618 01619 01620 } //# NAMESPACE CASA - END 01621 01622 #endif
1.5.1