casa
$Rev:20696$
|
00001 //# Tables.h: The Tables module - Casacore data storage 00002 //# Copyright (C) 1994-2010 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: Tables.h 21014 2011-01-06 08:57:49Z gervandiepen $ 00027 00028 #ifndef TABLES_TABLES_H 00029 #define TABLES_TABLES_H 00030 00031 //# Includes 00032 //# table description 00033 #include <tables/Tables/TableDesc.h> 00034 #include <tables/Tables/ColumnDesc.h> 00035 #include <tables/Tables/ScaColDesc.h> 00036 #include <tables/Tables/ArrColDesc.h> 00037 #include <tables/Tables/ScaRecordColDesc.h> 00038 00039 //# storage managers 00040 #include <tables/Tables/StManAipsIO.h> 00041 #include <tables/Tables/StandardStMan.h> 00042 #include <tables/Tables/StandardStManAccessor.h> 00043 #include <tables/Tables/IncrementalStMan.h> 00044 #include <tables/Tables/IncrStManAccessor.h> 00045 #include <tables/Tables/TiledDataStMan.h> 00046 #include <tables/Tables/TiledDataStManAccessor.h> 00047 #include <tables/Tables/TiledCellStMan.h> 00048 #include <tables/Tables/TiledColumnStMan.h> 00049 #include <tables/Tables/TiledShapeStMan.h> 00050 #include <tables/Tables/MemoryStMan.h> 00051 00052 //# virtual column engines 00053 #include <tables/Tables/RetypedArrayEngine.h> 00054 #include <tables/Tables/RetypedArraySetGet.h> 00055 #include <tables/Tables/ScaledArrayEngine.h> 00056 #include <tables/Tables/MappedArrayEngine.h> 00057 #include <tables/Tables/ForwardCol.h> 00058 #include <tables/Tables/ForwardColRow.h> 00059 #include <tables/Tables/CompressComplex.h> 00060 #include <tables/Tables/CompressFloat.h> 00061 #include <tables/Tables/VirtualTaQLColumn.h> 00062 00063 //# table access 00064 #include <tables/Tables/Table.h> 00065 #include <tables/Tables/TableLock.h> 00066 #include <tables/Tables/SetupNewTab.h> 00067 #include <tables/Tables/ScalarColumn.h> 00068 #include <tables/Tables/ArrayColumn.h> 00069 #include <tables/Tables/TableRow.h> 00070 #include <tables/Tables/TableCopy.h> 00071 #include <casa/Arrays/Array.h> 00072 #include <casa/Arrays/Slicer.h> 00073 #include <casa/Arrays/Slice.h> 00074 00075 //# keywords 00076 #include <tables/Tables/TableRecord.h> 00077 #include <casa/Containers/RecordField.h> 00078 00079 //# table lookup 00080 #include <tables/Tables/ColumnsIndex.h> 00081 #include <tables/Tables/ColumnsIndexArray.h> 00082 00083 //# table expressions (for selection of rows) 00084 #include <tables/Tables/ExprNode.h> 00085 #include <tables/Tables/ExprNodeSet.h> 00086 #include <tables/Tables/TableParse.h> 00087 00088 //# table vectors 00089 #include <tables/Tables/TableVector.h> 00090 #include <tables/Tables/TabVecMath.h> 00091 #include <tables/Tables/TabVecLogic.h> 00092 00093 00094 namespace casa { //# NAMESPACE CASA - BEGIN 00095 00096 // <module> 00097 00098 // <summary> 00099 // Tables are the data storage mechanism for Casacore 00100 // </summary> 00101 00102 // <use visibility=export> 00103 00104 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos=""> 00105 // </reviewed> 00106 00107 // <prerequisite> 00108 // <li> <linkto class="Record:description">Record</linkto> class 00109 // </prerequisite> 00110 00111 // <etymology> 00112 // "Table" is a formal term from relational database theory: 00113 // <em> "The organizing principle in a relational database is the TABLE, 00114 // a rectangular, row/column arrangement of data values."</em> 00115 // Casacore tables are extensions to traditional tables, but are similar 00116 // enough that we use the same name. There is also a strong resemblance 00117 // between the uses of Casacore tables, and FITS binary tables, which 00118 // provides another reason to use "Tables" to describe the Casacore data 00119 // storage mechanism. 00120 // </etymology> 00121 00122 // <synopsis> 00123 // Tables are the fundamental storage mechanism for Casacore. This document 00124 // explains <A HREF="#Tables:motivation">why</A> they had to be made, 00125 // <A HREF="#Tables:properties">what</A> their properties are, and 00126 // <A HREF="#Tables:open">how</A> to use them. The last subject is 00127 // discussed and illustrated in a sequence of sections: 00128 // <UL> 00129 // <LI> <A HREF="#Tables:open">opening</A> an existing table, 00130 // <LI> <A HREF="#Tables:read">reading</A> from a table, 00131 // <LI> <A HREF="#Tables:creation">creating</A> a new table, 00132 // <LI> <A HREF="#Tables:write">writing</A> into a table, 00133 // <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table, 00134 // <LI> <A HREF="#Tables:select and sort">selection and sorting</A> 00135 // (see also <A HREF="../notes/199.html">Table Query Language</A>), 00136 // <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A> 00137 // <LI> <A HREF="#Tables:iterate">iterating</A> through a table, 00138 // <LI> <A HREF="#Tables:LockSync">locking/synchronization</A> 00139 // for concurrent access, 00140 // <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup, 00141 // <LI> <A HREF="#Tables:vectors">vector operations</A> on a column. 00142 // <LI> <A HREF="#Tables:performance">performance and robustness</A> considerations. 00143 // </UL> 00144 00145 00146 // <ANCHOR NAME="Tables:motivation"> 00147 // <motivation></ANCHOR> 00148 // 00149 // The Casacore tables are mainly based upon the ideas of Allen Farris, 00150 // as laid out in the 00151 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz"> 00152 // AIPS++ Database document</A>, from where the following paragraph is taken: 00153 // 00154 // <p> 00155 // Traditional relational database tables have two features that 00156 // decisively limit their applicability to scientific data. First, an item of 00157 // data in a column of a table must be atomic -- it must have no internal 00158 // structure. A consequence of this restriction is that relational 00159 // databases are unable to deal with arrays of data items. Second, an 00160 // item of data in a column of a table must not have any direct or 00161 // implied linkages to other items of data or data aggregates. This 00162 // restriction makes it difficult to model complex relationships between 00163 // collections of data. While these restrictions may make it easy to 00164 // define a mathematically complete set of data manipulation operations, 00165 // they are simply intolerable in a scientific data-handling context. 00166 // Multi-dimensional arrays are frequently the most natural modes in 00167 // which to discuss and think about scientific data. In addition, 00168 // scientific data often requires complex calibration operations that 00169 // must draw on large bodies of data about equipment and its performance 00170 // in various states. The restrictions imposed by the relational model 00171 // make it very difficult to deal with complex problems of this nature. 00172 // <p> 00173 // 00174 // In response to these limitations, and other needs, the Casacore tables were 00175 // designed. 00176 // </motivation> 00177 00178 // <ANCHOR NAME="Tables:properties"> 00179 // <h3>Table Properties</h3></ANCHOR> 00180 // 00181 // Casacore tables have the following properties: 00182 // <ul> 00183 // <li> A table consists of a number of rows and columns. 00184 // <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined 00185 // for the table as a whole and for individual columns. A keyword/value 00186 // pair for a column could, for instance, define its unit. 00187 // <li> Each table has a <A HREF="#Tables:Table Description">description</A> 00188 // which specifies the number and type of columns, and maybe initial 00189 // keyword sets and default values for the columns. 00190 // <li> A cell in a column may contain 00191 // <UL> 00192 // <LI> a scalar; 00193 // <LI> a "direct" array -- which must have the same shape in all 00194 // cells of a column, is usually small, and is stored in the 00195 // table itself; 00196 // <LI> an "indirect" array -- which may have different shapes in 00197 // different cells of the same column, is arbitrarily large, 00198 // and is stored in a separate file; or 00199 // </UL> 00200 // <li> A column may be 00201 // <UL> 00202 // <LI> "filled" -- containing actual data, or 00203 // <LI> "virtual" -- containing a recipe telling how the data will 00204 // be generated dynamically 00205 // </UL> 00206 // <li> Only the standard Casacore data types can be used in filled 00207 // columns, be they scalars or arrays: Bool, uChar, Short, uShort, 00208 // Int, uInt, float, double, Complex, DComplex and String. 00209 // Furthermore scalars containing 00210 // <linkto class=TableRecord>record</linkto> values are possible 00211 // <li> A column can have a default value, which will automatically be stored 00212 // in a cell of the column, when a row is added to the table. 00213 // <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the 00214 // reading, writing and generation of data. Each column in a table can 00215 // be assigned its own data manager, which allows for optimization of 00216 // the data storage per column. The choice of data manager determines 00217 // whether a column is filled or virtual. 00218 // <li> Table data are stored in a canonical format, so they can be read 00219 // on any machine. To avoid needless swapping of bytes, the data can 00220 // be stored in big endian (as used on e.g. SUN) or little endian 00221 // (as used on Intel PC-s) canonical format. 00222 // By default it uses the format specified in the aipsrc variable 00223 // <code>table.endianformat</code> which defaults to 00224 // <code>Table::LocalEndian</code> (thus the endian format of the 00225 // machine being used). 00226 // <li> The SQL-like 00227 // <a href="../notes/199.html">Table Query Language</a> (TaQL) 00228 // can be used to do operations on tables like 00229 // select, sort, update, insert, delete, and create. 00230 // </ul> 00231 // 00232 // Tables can be in one of three forms: 00233 // <ul> 00234 // <li> A plain table is a table stored on disk. 00235 // It can be shared by multiple processes. 00236 // <li> A memory table is a table held in memory. 00237 // It is a process specific table, thus not sharable. 00238 // The <linkto class=Table>Table::copy</linkto> function can be used 00239 // to turn a memory table into a plain table. 00240 // <li> A reference table is a table referencing a plain or memory table. 00241 // It is the result of a selection or sort on another table. 00242 // A reference table references the data in the other table, thus 00243 // changing data in a reference table means that the data in the 00244 // original table are changed. 00245 // The <linkto class=Table>Table::deepCopy</linkto> function can be 00246 // used to turn a reference table into a plain table. 00247 // </ul> 00248 // Concurrent access from different processes to the same plain table is 00249 // fully supported by means of a <A HREF="#Tables:LockSync"> 00250 // locking/synchronization</A> mechanism. Concurrent access over NFS is also 00251 // supported. 00252 // <p> 00253 // A (somewhat primitive) mechanism is available to do a 00254 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents 00255 // of a key. In the future this might be replaced by a proper B+-tree index 00256 // mechanism. 00257 00258 // <ANCHOR NAME="Tables:open"> 00259 // <h3>Opening an Existing Table</h3></ANCHOR> 00260 // 00261 // To open an existing table you just create a 00262 // <linkto class="Table:description">Table</linkto> object giving 00263 // the name of the table, like: 00264 // 00265 // <srcblock> 00266 // Table readonly_table ("tableName"); 00267 // // or 00268 // Table read_and_write_table ("tableName", Table::Update); 00269 // </srcblock> 00270 // 00271 // The constructor option determines whether the table will be opened as 00272 // readonly or as read/write. A readonly table file must be opened 00273 // as readonly, otherwise an exception is thrown. The functions 00274 // <linkto class="Table">Table::isWritable(...)</linkto> 00275 // can be used to determine if a table is writable. 00276 // 00277 // When the table is opened, the data managers are reinstantiated 00278 // according to their definition at table creation. 00279 00280 // <ANCHOR NAME="Tables:read"> 00281 // <h3>Reading from a Table</h3></ANCHOR> 00282 // 00283 // You can read data from a table column with the "get" functions 00284 // in the classes 00285 // <linkto class="ROScalarColumn:description">ROScalarColumn<T></linkto> 00286 // and 00287 // <linkto class="ROArrayColumn:description">ROArrayColumn<T></linkto>. 00288 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short, 00289 // uShort, uInt, float, double, Complex, DComplex and String) you could 00290 // instead use 00291 // <linkto class="ROTableColumn">ROTableColumn::getScalar(...)</linkto> or 00292 // <linkto class="ROTableColumn">ROTableColumn::asXXX(...)</linkto>. 00293 // These functions offer an extra: they do automatic data type promotion; 00294 // so that you can, for example, get a double value from a float column. 00295 // 00296 // These "get" functions are used in the same way as the simple"put" 00297 // functions described in the previous section. 00298 // <p> 00299 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> 00300 // is derived from ROScalarColumn<T>, and 00301 // therefore has the same "get" functions. However, if a 00302 // ScalarColumn<T> object is constructed for a non-writable column, 00303 // an exception is thrown. Only ROScalarColumn<T> objects can be 00304 // constructed for nonwritable columns. 00305 // The same is true for 00306 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and 00307 // <linkto class="TableColumn:description">TableColumn</linkto>. 00308 // <p> 00309 // A typical program could look like: 00310 // <srcblock> 00311 // #include <tables/Tables/Table.h> 00312 // #include <tables/Tables/ScalarColumn.h> 00313 // #include <tables/Tables/ArrayColumn.h> 00314 // #include <casa/Arrays/Vector.h> 00315 // #include <casa/Arrays/Slicer.h> 00316 // #include <casa/Arrays/ArrayMath.h> 00317 // #include <iostream> 00318 // 00319 // main() 00320 // { 00321 // // Open the table (readonly). 00322 // Table tab ("some.name"); 00323 // 00324 // // Construct the various column objects. 00325 // // Their data type has to match the data type in the table description. 00326 // ROScalarColumn<Int> acCol (tab, "ac"); 00327 // ROArrayColumn<Float> arr2Col (tab, "arr2"); 00328 // 00329 // // Loop through all rows in the table. 00330 // uInt nrrow = tab.nrow(); 00331 // for (uInt i=0; i<nrow; i++) { 00332 // // Read the row for both columns. 00333 // cout << "Column ac in row i = " << acCol(i) << endl; 00334 // Array<Float> array = arr2Col.get (i); 00335 // } 00336 // 00337 // // Show the entire column ac, 00338 // // and show the 10th element of arr2 in each row.. 00339 // cout << ac.getColumn(); 00340 // cout << arr2.getColumn (Slicer(Slice(10))); 00341 // } 00342 // </srcblock> 00343 00344 // <ANCHOR NAME="Tables:creation"> 00345 // <h3>Creating a Table</h3></ANCHOR> 00346 // 00347 // The creation of a table is a multi-step process: 00348 // <ol> 00349 // <li> 00350 // Create a <A HREF="#Tables:Table Description">table description</A>. 00351 // <li> 00352 // Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto> 00353 // object with the name of the new table. 00354 // <li> 00355 // Create the necessary <A HREF="#Tables:Data Managers">data managers</A>. 00356 // <li> 00357 // Bind each column to the appropriate data manager. 00358 // The system will bind unbound columns to data managers which 00359 // are created internally using the default data manager name 00360 // defined in the column description. 00361 // <li> 00362 // Define the shape of direct columns (if that was not already done in the 00363 // column description). 00364 // <li> 00365 // Create the <linkto class="Table:description">Table</linkto> 00366 // object from the SetupNewTable object. Here, a final check is performed 00367 // and the necessary files are created. 00368 // </ol> 00369 // The recipe above is meant for the creation a plain table, but the 00370 // creation of a memory table is exactly the same. The only difference 00371 // is that in call to construct the Table object the Table::Memory 00372 // type has to be given. Note that in the SetupNewTable object the columns 00373 // can be bound to any data manager. <src>MemoryTable</src> will rebind 00374 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto> 00375 // storage manager, but virtual columns bindings are not changed. 00376 00377 // 00378 // The following example shows how you can create a table. An example 00379 // specifically illustrating the creation of the 00380 // <A HREF="#Tables:Table Description">table description</A> is given 00381 // in that section. Other sections discuss the access to the table. 00382 // 00383 // <srcblock> 00384 // #include <tables/Tables/TableDesc.h> 00385 // #include <tables/Tables/SetupNewTab.h> 00386 // #include <tables/Tables/Table.h> 00387 // #include <tables/Tables/ScaColDesc.h> 00388 // #include <tables/Tables/ScaRecordColDesc.h> 00389 // #include <tables/Tables/ArrColDesc.h> 00390 // #include <tables/Tables/StandardStMan.h> 00391 // #include <tables/Tables/IncrementalStMan.h> 00392 // 00393 // main() 00394 // { 00395 // // Step1 -- Build the table description. 00396 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00397 // td.comment() = "A test of class SetupNewTable"; 00398 // td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab")); 00399 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00400 // td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad")); 00401 // td.addColumn (ScalarColumnDesc<Float> ("ae")); 00402 // td.addColumn (ScalarRecordColumnDesc ("arec")); 00403 // td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct)); 00404 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00405 // td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct)); 00406 // 00407 // // Step 2 -- Setup a new table from the description. 00408 // SetupNewTable newtab("newtab.data", td, Table::New); 00409 // 00410 // // Step 3 -- Create storage managers for it. 00411 // StandardStMan stmanStand_1; 00412 // StandardStMan stmanStand_2; 00413 // IncrementalStMan stmanIncr; 00414 // 00415 // // Step 4 -- First, bind all columns to the first storage 00416 // // manager. Then, bind a few columns to another storage manager 00417 // // (which will overwrite the previous bindings). 00418 // newtab.bindAll (stmanStand_1); 00419 // newtab.bindColumn ("ab", stmanStand_2); 00420 // newtab.bindColumn ("ae", stmanIncr); 00421 // newtab.bindColumn ("arr3", stmanIncr); 00422 // 00423 // // Step 5 -- Define the shape of the direct columns. 00424 // // (this could have been done in the column description). 00425 // newtab.setShapeColumn( "arr1", IPosition(3,2,3,4)); 00426 // newtab.setShapeColumn( "arr3", IPosition(3,3,4,5)); 00427 // 00428 // // Step 6 -- Finally, create the table consisting of 10 rows. 00429 // Table tab(newtab, 10); 00430 // 00431 // // Now we can fill the table, which is shown in a next section. 00432 // // The Table destructor will flush the table to the files. 00433 // } 00434 // </srcblock> 00435 // To create a table in memory, only step 6 has to be modified slightly to: 00436 // <srcblock> 00437 // Table tab(newtab, Table::Memory, 10); 00438 // </srcblock> 00439 00440 // <ANCHOR NAME="Tables:write"> 00441 // <h3>Writing into a Table</h3></ANCHOR> 00442 // 00443 // Once a table has been created or has been opened for read/write, 00444 // you want to write data into it. Before doing that you may have 00445 // to add one or more rows to the table. 00446 // <note role=tip> If a table was created with a given number of rows, you 00447 // do not need to add rows; you may not even be able to do so. 00448 // </note> 00449 // 00450 // When adding new rows to the table, either via the 00451 // <linkto class="Table">Table(...) constructor</linkto> 00452 // or via the 00453 // <linkto class="Table">Table::addRow(...)</linkto> 00454 // function, you can choose to have those rows initialized with the 00455 // default values given in the description. 00456 // 00457 // To actually write the data into the table you need the classes 00458 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and 00459 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>. 00460 // For each column you can construct one or 00461 // more of these objects. Their put(...) functions 00462 // let you write a value at a time or the entire column in one go. 00463 // For arrays you can "put" subsections of the arrays. 00464 // 00465 // As an alternative for scalars of a standard data type (i.e. Bool, 00466 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex 00467 // and String) you could use the functions 00468 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>. 00469 // These functions offer an extra: automatic data type promotion; so that 00470 // you can, for example, put a float value in a double column. 00471 // 00472 // A typical program could look like: 00473 // <srcblock> 00474 // #include <tables/Tables/TableDesc.h> 00475 // #include <tables/Tables/SetupNewTab.h> 00476 // #include <tables/Tables/Table.h> 00477 // #include <tables/Tables/ScaColDesc.h> 00478 // #include <tables/Tables/ArrColDesc.h> 00479 // #include <tables/Tables/ScalarColumn.h> 00480 // #include <tables/Tables/ArrayColumn.h> 00481 // #include <casa/Arrays/Vector.h> 00482 // #include <casa/Arrays/Slicer.h> 00483 // #include <casa/Arrays/ArrayMath.h> 00484 // #include <iostream> 00485 // 00486 // main() 00487 // { 00488 // // First build the table description. 00489 // TableDesc td("tTableDesc", "1", TableDesc::Scratch); 00490 // td.comment() = "A test of class SetupNewTable"; 00491 // td.addColumn (ScalarColumnDesc<Int> ("ac")); 00492 // td.addColumn (ArrayColumnDesc<Float> ("arr2",0)); 00493 // 00494 // // Setup a new table from the description, 00495 // // and create the (still empty) table. 00496 // // Note that since we do not explicitly bind columns to 00497 // // data managers, all columns will be bound to the default 00498 // // standard storage manager StandardStMan. 00499 // SetupNewTable newtab("newtab.data", td, Table::New); 00500 // Table tab(newtab); 00501 // 00502 // // Construct the various column objects. 00503 // // Their data type has to match the data type in the description. 00504 // ScalarColumn<Int> ac (tab, "ac"); 00505 // ArrayColumn<Float> arr2 (tab, "arr2"); 00506 // Vector<Float> vec2(100); 00507 // 00508 // // Write the data into the columns. 00509 // // In each cell arr2 will be a vector of length 100. 00510 // // Since its shape is not set explicitly, it is done implicitly. 00511 // for (uInt i=0; i<10; i++) { 00512 // tab.addRow(); // First add a row. 00513 // ac.put (i, i+10); // value is i+10 in row i 00514 // indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119 00515 // arr2.put (i, vec2); 00516 // } 00517 // 00518 // // Finally, show the entire column ac, 00519 // // and show the 10th element of arr2. 00520 // cout << ac.getColumn(); 00521 // cout << arr2.getColumn (Slicer(Slice(10))); 00522 // 00523 // // The Table destructor writes the table. 00524 // } 00525 // </srcblock> 00526 // 00527 // In this example we added rows in the for loop, but we could also have 00528 // created 10 rows straightaway by constructing the Table object as: 00529 // <srcblock> 00530 // Table tab(newtab, 10); 00531 // </srcblock> 00532 // in which case we would not include 00533 // <srcblock> 00534 // tab.addRow() 00535 // </srcblock> 00536 // 00537 // The classes 00538 // <linkto class="TableColumn:description">TableColumn</linkto>, 00539 // <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and 00540 // <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> 00541 // contain several functions to put values into a single cell or into the 00542 // whole column. This may look confusing, but is actually quite simple. 00543 // The functions can be divided in two groups: 00544 // <ol> 00545 // <li> 00546 // Put the given value into the column cell(s). 00547 // <ul> 00548 // <li> 00549 // The simplest put functions, 00550 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and 00551 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00552 // put a value into the given column cell. For convenience, there is an 00553 // <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto> 00554 // to put only a part of the array. 00555 // <li> 00556 // <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and 00557 // <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto> 00558 // fill an entire column by putting the given value into all the cells 00559 // of the column. 00560 // <li> 00561 // The simplest putColumn functions, 00562 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and 00563 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>, 00564 // put an array of values into the column. There is a special 00565 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00566 // version which puts only a part of the arrays. 00567 // </ul> 00568 // 00569 // <li> 00570 // Copy values from another column to this column.<BR> 00571 // These functions have the advantage that the 00572 // data type of the input and/or output column can be unknown. 00573 // The generic (RO)TableColumn objects can be used for this purpose. 00574 // The put(Column) function checks the data types and, if possible, 00575 // converts them. If the conversion is not possible, it throws an 00576 // exception. 00577 // <ul> 00578 // <li> 00579 // The put functions copy the value in a cell of the input column 00580 // to a cell in the output column. The row numbers of the cells 00581 // in the columns can be different. 00582 // <li> 00583 // The putColumn functions copy the entire contents of the input column 00584 // to the output column. The lengths of the columns must be equal. 00585 // </ul> 00586 // Each class has its own set of these functions. 00587 // <ul> 00588 // <li> 00589 // <linkto class="TableColumn">TableColumn::put(...)</linkto> and 00590 // <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and 00591 // are the most generic. They can be 00592 // used if the data types of both input and output column are unknown. 00593 // Note that these functions are virtual. 00594 // <li> 00595 // <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>, 00596 // <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>, 00597 // <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and 00598 // <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto> 00599 // are less generic and therefore potentially more efficient. 00600 // The most efficient variants are the ones taking a 00601 // ROScalar/ArrayColumn<T>, because they require no data type 00602 // conversion. 00603 // </ul> 00604 // </ol> 00605 00606 // <ANCHOR NAME="Tables:row-access"> 00607 // <h3>Accessing rows in a Table</h3></ANCHOR> 00608 // 00609 // Apart from accessing a table column-wise as described in the 00610 // previous two sections, it is also possible to access a table row-wise. 00611 // The <linkto class=TableRow>TableRow</linkto> class makes it possible 00612 // to access multiple fields in a table row as a whole. Note that like the 00613 // XXColumn classes described above, there is also an ROTableRow class 00614 // for access to readonly tables. 00615 // <p> 00616 // On construction of a TableRow object it has to be specified which 00617 // fields (i.e. columns) are part of the row. For these fields a 00618 // fixed structured <linkto class=TableRecord>TableRecord</linkto> 00619 // object is constructed as part of the TableRow object. The TableRow::get 00620 // function will fill this record with the table data for the given row. 00621 // The user has access to the record and can use 00622 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for 00623 // speedier access to the record. 00624 // <p> 00625 // The class could be used as shown in the following example. 00626 // <srcblock> 00627 // // Open the table as readonly and define a row object to contain 00628 // // the given columns. 00629 // // Note that the function stringToVector is a very convenient 00630 // // way to construct a Vector<String>. 00631 // // Show the description of the fields in the row. 00632 // Table table("Some.table"); 00633 // ROTableRow row (table, stringToVector("col1,col2,col3")); 00634 // cout << row.record().description(); 00635 // // Since the structure of the record is known, the RecordFieldPtr 00636 // // objects could be used to allow for easy and fast access to 00637 // // the record which is refilled for each get. 00638 // RORecordFieldPtr<String> col1(row.record(), "col1"); 00639 // RORecordFieldPtr<Double> col2(row.record(), "col2"); 00640 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3"); 00641 // for (uInt i=0; i<table.nrow(); i++) { 00642 // row.get (i); 00643 // someString = *col1; 00644 // somedouble = *col2; 00645 // someArrayInt = *col3; 00646 // } 00647 // </srcblock> 00648 // The description of TableRow contains some more extensive examples. 00649 00650 // <ANCHOR NAME="Tables:select and sort"> 00651 // <h3>Table Selection and Sorting</h3></ANCHOR> 00652 // 00653 // The result of a select and sort of a table is another table, 00654 // which references the original table. This means that an update 00655 // of a sorted or selected table results in the update of the original 00656 // table. The result is, however, a table in itself, so all table 00657 // functions (including select and sort) can be used with it. 00658 // Note that a true copy of such a reference table can be made with 00659 // the <linkto class=Table>Table::deepCopy</linkto> function. 00660 // <p> 00661 // Rows or columns can be selected from a table. Columns can be selected 00662 // by the 00663 // <linkto class="Table">Table::project(...)</linkto> 00664 // function, while rows can be selected by the various 00665 // <linkto class="Table">Table operator()</linkto> functions. 00666 // Usually a row is selected by giving a select expression with 00667 // <linkto class="TableExprNode:description">TableExprNode</linkto> 00668 // objects. These objects represent the various nodes 00669 // in an expression, e.g. a constant, a column, or a subexpression. 00670 // The Table function 00671 // <linkto class="Table">Table::col(...)</linkto> 00672 // creates a TableExprNode object for a column. The function 00673 // <linkto class="Table">Table::key(...)</linkto> 00674 // does the same for a keyword by reading 00675 // the keyword value and storing it as a constant in an expression node. 00676 // All column nodes in an expression must belong to the same table, 00677 // otherwise an exception is thrown. 00678 // In the following example we select all rows with RA>10: 00679 // <srcblock> 00680 // #include <tables/Tables/ExprNode.h> 00681 // Table table ("Table.name"); 00682 // Table result = table (table.col("RA") > 10); 00683 // </srcblock> 00684 // while in the next one we select rows with RA and DEC in the given 00685 // intervals: 00686 // <srcblock> 00687 // Table result = table (table.col("RA") > 10 00688 // && table.col("RA") < 14 00689 // && table.col("DEC") >= -10 00690 // && table.col("DEC") <= 10); 00691 // </srcblock> 00692 // The following operators can be used to form arbitrarily 00693 // complex expressions: 00694 // <ul> 00695 // <li> Relational operators ==, !=, >, >=, < and <=. 00696 // <li> Logical operators &&, || and !. 00697 // <li> Arithmetic operators +, -, *, /, %, and unary + and -. 00698 // <li> Bit operators ^, &, |, and unary ~. 00699 // <li> Operator() to take a subsection of an array. 00700 // </ul> 00701 // Many functions (like sin, max, conj) can be used in an expression. 00702 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows 00703 // the available functions. 00704 // E.g. 00705 // <srcblock> 00706 // Table result = table (sin (table.col("RA")) > 0.5); 00707 // </srcblock> 00708 // Function <src>in</src> can be used to select from a set of values. 00709 // A value set can be constructed using class 00710 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>. 00711 // <srcblock> 00712 // TableExprNodeSet set; 00713 // set.add (TableExprNodeSetElem ("abc")); 00714 // set.add (TableExprNodeSetElem ("defg")); 00715 // set.add (TableExprNodeSetElem ("h")); 00716 // Table result = table (table.col("NAME).in (set)); 00717 // </srcblock> 00718 // select rows with a NAME equal to <src>abc</src>, 00719 // <src>defg</src>, or <src>h</src>. 00720 // 00721 // <p> 00722 // You can sort a table on one or more columns containing scalars. 00723 // In this example we simply sort on column RA (default is ascending): 00724 // <srcblock> 00725 // Table table ("Table.name"); 00726 // Table result = table.sort ("RA"); 00727 // </srcblock> 00728 // Multiple 00729 // <linkto class="Table">Table::sort(...)</linkto> 00730 // functions exist which allow for more flexible control over the sort order. 00731 // In the next example we sort first on RA in descending order 00732 // and then on DEC in ascending order: 00733 // <srcblock> 00734 // Table table ("Table.name"); 00735 // Block<String> sortKeys(2); 00736 // Block<int> sortOrders(2); 00737 // sortKeys(0) = "RA"; 00738 // sortOrders(0) = Sort::Descending; 00739 // sortKeys(1) = "DEC"; 00740 // sortOrders(1) = Sort::Ascending; 00741 // Table result = table.sort (sortKeys, sortOrders); 00742 // </srcblock> 00743 // 00744 // Tables stemming from the same root, can be combined in several 00745 // ways with the help of the various logical 00746 // <linkto class="Table">Table operators</linkto> (operator|, etc.). 00747 00748 // <h4>Table Query Language</h4> 00749 // The selection and sorting mechanism described above can only be used 00750 // in a hard-coded way in a C++ program. 00751 // There is, however, another way. Strings containing selection and 00752 // sorting commands can be used. 00753 // The syntax of these commands is based on SQL and is described in the 00754 // <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199. 00755 // The language supports UDFs (User Defined Functions) in dynamically 00756 // loadable libraries as explained in the note. 00757 // <br>A TaQL command can be executed with the static function 00758 // <src>tableCommand</src> defined in class 00759 // <linkto class=TableParse>TableParse</linkto>. 00760 00761 // <ANCHOR NAME="Tables:concatenation"> 00762 // <h3>Table Concatenation</h3></ANCHOR> 00763 // Tables with identical descriptions can be concatenated in a virtual way 00764 // using the Table concatenation constructor. Such a Table object behaves 00765 // as any other Table object, thus any operation can be performed on it. 00766 // An identical description means that the number of columns, the column names, 00767 // and their data types of the columns must be the same. The columns do not 00768 // need to be ordered in the same way nor to be stored in the same way. 00769 // <br>Note that if tables have different column names, it is possible 00770 // to form a projection (as described in the previous section) first 00771 // to make them appear identical. 00772 // 00773 // Sometimes a MeasurementSet is partitioned, for instance in chunks of 00774 // one hour. All those chunks can be virtually concatenated this way. 00775 // Note that all tables in the concatenation will be opened, thus one might 00776 // run out of file descriptors if there are many chunks. 00777 // 00778 // Similar to reference tables, it is possible to make a concatenated Table 00779 // persistent by using the <src>rename</src> function. It will not copy the 00780 // data; only the names of the tables used are written. 00781 // 00782 // The keywords of a concatenated table are taken from the first table. 00783 // It is possible to change or add keywords, but that is not persistent, 00784 // not even if the concatenated table is made persistent. 00785 // <br>The keywords holding subtables can be handled in a special way. 00786 // Normally the subtables of the concatenation are the subtables of the first 00787 // table are used, but is it possible to concatenate subtables as well by 00788 // giving their names in the constructor. 00789 // In this way the, say, SYSCAL subtable of a MeasurementSet can be 00790 // concatenated as well. 00791 // <srcblock> 00792 // // Create virtual concatenation of ms0 and ms1. 00793 // Block<String> names(2); 00794 // names[0] = "ms0"; 00795 // names[1] = "ms1"; 00796 // // Also concatenate their SYSCAL subtables. 00797 // Block<String> subNames(1, "SYSCAL"); 00798 // Table concTab (names, subNames); 00799 // </srcblock> 00800 00801 // <ANCHOR NAME="Tables:iterate"> 00802 // <h3>Table Iterators</h3></ANCHOR> 00803 // 00804 // You can iterate through a table in an arbitrary order by getting 00805 // a subset of the table consisting of the rows in which the iteration 00806 // columns have the same value. 00807 // An iterator object is created by constructing a 00808 // <linkto class="TableIterator:description">TableIterator</linkto> 00809 // object with the appropriate column names. 00810 // 00811 // In the next example we define an iteration on the columns Time and 00812 // Baseline. Each iteration step returns a table subset in which Time and 00813 // Baseline have the same value. 00814 // 00815 // <srcblock> 00816 // // Iterate over Time and Baseline (by default in ascending order). 00817 // // Time is the main iteration order, thus the first column specified. 00818 // Table t; 00819 // Table tab ("UV_Table.data"); 00820 // Block<String> iv0(2); 00821 // iv0[0] = "Time"; 00822 // iv0[1] = "Baseline"; 00823 // // 00824 // // Create the iterator. This will prepare the first subtable. 00825 // TableIterator iter(tab, iv0); 00826 // Int nr = 0; 00827 // while (!iter.pastEnd()) { 00828 // // Get the first subtable. 00829 // // This will contain rows with equal Time and Baseline. 00830 // t = iter.table(); 00831 // cout << t.nrow() << " "; 00832 // nr++; 00833 // // Prepare the next subtable with the next Time,Baseline value. 00834 // iter.next(); 00835 // } 00836 // cout << endl << nr << " iteration steps" << endl; 00837 // </srcblock> 00838 // 00839 // You can define more than one iterator on the same table; they operate 00840 // independently. 00841 // 00842 // Note that the result of each iteration step is a table in itself which 00843 // references the original table, just as in the case of a sort or select. 00844 // This means that the resulting table can be used again in a sort, select, 00845 // iteration, etc.. 00846 00847 // <ANCHOR NAME="Tables:vectors"> 00848 // <h3>Table Vectors</h3></ANCHOR> 00849 // 00850 // A table vector makes it possible to treat a column in a table 00851 // as a vector. Almost all operators and functions defined for normal 00852 // vectors, are also defined for table vectors. So it is, for instance, 00853 // possible to add a constant to a table vector. This has the effect 00854 // that the underlying column gets changed. 00855 // 00856 // You can use the templated classes 00857 // <linkto class="ROTableVector:description">ROTableVector</linkto> and 00858 // <linkto class="TableVector:description">TableVector</linkto> and 00859 // to define a table vector (readonly and read/write, respectively) for 00860 // a scalar column. Columns containing arrays or tables are not supported. 00861 // The data type of the (RO)TableVector object must match the 00862 // data type of the column. 00863 // A table vector can also hold a normal vector so that (temporary) 00864 // results of table vector operations can be handled. 00865 // 00866 // In the following example we double the data in column COL1 and 00867 // store the result in a temporary table vector. 00868 // <srcblock> 00869 // // Create a table vector for column COL1. 00870 // // It has to be a ROTableVector, because the table is opened 00871 // // as readonly. 00872 // Table tab ("Table.data"); 00873 // ROTableVector<Int> tabvec(tab, "COL1"); 00874 // // Multiply it by a constant. 00875 // // The result has to be stored in a TableVector, 00876 // // since a ROTableVector cannot be written to. 00877 // TableVector<Int> temp = 2 * tabvec; 00878 // </srcblock> 00879 // 00880 // In the next example we double the data in COL1 and put the result back 00881 // in the column. 00882 // <srcblock> 00883 // // Create a table vector for column COL1. 00884 // // It has to be a TableVector to be able to change the column. 00885 // Table tab ("Table.data", Table::Update); 00886 // TableVector<Int> tabvec(tab, "COL1"); 00887 // // Multiply it by a constant. 00888 // tabvec *= 2; 00889 // </srcblock> 00890 00891 // <ANCHOR NAME="Tables:keywords"> 00892 // <h3>Table Keywords</h3></ANCHOR> 00893 // 00894 // Any number of keyword/value pairs may be attached to the table as a whole, 00895 // or to any individual column. They may be freely added, retrieved, 00896 // re-assigned, or deleted. They are, in essence, a self-resizing list of 00897 // values (any of the primitive types) indexed by Strings (the keyword). 00898 // 00899 // A table keyword/value pair might be 00900 // <srcblock> 00901 // Observer = Grote Reber 00902 // Date = 10 october 1942 00903 // </srcblock> 00904 // Column keyword/value pairs might be 00905 // <srcblock> 00906 // Units = mJy 00907 // Reference Pixel = 320 00908 // </srcblock> 00909 // The class 00910 // <linkto class="TableRecord:description">TableRecord</linkto> 00911 // represents the keywords in a table. 00912 // It is (indirectly) derived from the standard record classes in the class 00913 // <linkto class="Record:description">Record</linkto> 00914 00915 // <ANCHOR NAME="Tables:Table Description"> 00916 // <h3>Table Description</h3></ANCHOR> 00917 // 00918 // A table contains a description of itself, which defines the layout of the 00919 // columns and the keyword sets for the table and for the individual columns. 00920 // It may also define initial keyword sets and default values for the columns. 00921 // Such a default value is automatically stored in a cell in the table column, 00922 // whenever a row is added to the table. 00923 // 00924 // The creation of the table descriptor is the first step in the creation of 00925 // a new table. The description is part of the table itself, but may also 00926 // exist in a separate file. This is useful if you need to create a number 00927 // of tables with the same structure; in other circumstances it probably 00928 // should be avoided. 00929 // 00930 // The public classes to set up a table description are: 00931 // <ul> 00932 // <li> <linkto class="TableDesc:description">TableDesc</linkto> 00933 // -- holds the table description. 00934 // <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto> 00935 // -- holds a generic column description. 00936 // <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T> 00937 // </linkto> 00938 // -- defines a column containing a scalar value. 00939 // <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc; 00940 // </linkto> 00941 // -- defines a column containing a scalar record value. 00942 // <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T> 00943 // </linkto> 00944 // -- defines a column containing an (in)direct array. 00945 // </ul> 00946 // 00947 // Here follows a typical example of the construction of a table 00948 // description. For more specialized things -- like the definition of a 00949 // default data manager -- we refer to the descriptions of the above 00950 // mentioned classes. 00951 // 00952 // <srcblock> 00953 // #include <tables/Tables/TableDesc.h> 00954 // #include <tables/Tables/ScaColDesc.h> 00955 // #include <tables/Tables/ArrColDesc.h> 00956 // #include <aips/Tables/ScaRecordTabDesc.h> 00957 // #include <tables/Tables/TableRecord.h> 00958 // #include <casa/Arrays/IPosition.h> 00959 // #include <casa/Arrays/Vector.h> 00960 // 00961 // main() 00962 // { 00963 // // Create a new table description 00964 // // Define a comment for the table description. 00965 // // Define some keywords. 00966 // ColumnDesc colDesc1, colDesc2; 00967 // TableDesc td("tTableDesc", "1", TableDesc::New); 00968 // td.comment() = "A test of class TableDesc"; 00969 // td.rwKeywordSet().define ("ra" float(3.14)); 00970 // td.rwKeywordSet().define ("equinox", double(1950)); 00971 // td.rwKeywordSet().define ("aa", Int(1)); 00972 // 00973 // // Define an integer column ab. 00974 // td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab")); 00975 // 00976 // // Add a scalar integer column ac, define keywords for it 00977 // // and define a default value 0. 00978 // // Overwrite the value of keyword unit. 00979 // ScalarColumnDesc<Int> acColumn("ac"); 00980 // acColumn.rwKeywordSet().define ("scale" Complex(0,0)); 00981 // acColumn.rwKeywordSet().define ("unit", ""); 00982 // acColumn.setDefault (0); 00983 // td.addColumn (acColumn); 00984 // td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG"); 00985 // 00986 // // Add a scalar string column ad and define its comment string. 00987 // td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad")); 00988 // 00989 // // Now define array columns. 00990 // // This one is indirect and has no dimensionality mentioned yet. 00991 // td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1")); 00992 // // This one is indirect and has 3-dim arrays. 00993 // td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3)); 00994 // // This one is direct and has 2-dim arrays with axes length 4 and 7. 00995 // td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1", 00996 // IPosition(2,4,7), 00997 // ColumnDesc::Direct)); 00998 // 00999 // // Add columns containing records. 01000 // td.addColumn (ScalarRecordColumnDesc ("Rec1")); 01001 // } 01002 // </srcblock> 01003 01004 // <ANCHOR NAME="Tables:Data Managers"> 01005 // <h3>Data Managers</h3></ANCHOR> 01006 // 01007 // Data managers take care of the actual access to the data in a column. 01008 // There are two kinds of data managers: 01009 // <ol> 01010 // <li> <A HREF="#Tables:storage managers">Storage managers</A> -- 01011 // which store the data as such. They can only handle the standard 01012 // data type (Bool,...,String) as discussed in the section about the 01013 // <A HREF="#Tables:properties">table properties</A>). 01014 // <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A> 01015 // -- which manipulate the data. 01016 // An engine could be a simple thing like scaling the data (as done 01017 // in classic AIPS to reduce data storage), but it could also be an 01018 // elaborate thing like applying corrections on-the-fly. 01019 // <br>An engine must be used to store data objects with a non-standard type. 01020 // It has to break down the object into items with standard data types 01021 // which can be stored with a storage manager. 01022 // </ol> 01023 // In general the user of a table does not need to be aware which 01024 // data managers are being used underneath. Only when the table is created 01025 // data managers have to be bound to the columns. Thereafter it is 01026 // completely transparent. 01027 // 01028 // Data managers needs to be registered, so they can be found when a table is 01029 // opened. All data managers mentioned below are part of the system and 01030 // pre-registered. 01031 // It is, however, also possible to load data managers on demand. If a data 01032 // manager is not registered it is tried to load a shared library with the 01033 // part of the data manager name (in lowercase) before a dot or left arrow. 01034 // The dot makes it possible to have multiple data managers in a shared library, 01035 // while the left arrow is meant for templated data manager classes. 01036 // <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared 01037 // library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If 01038 // successful, its function <src>register_bitflagsengine()</src> will be 01039 // executed which should register the data manager(s). Thereafter it is known 01040 // and will be used. For example in a file Register.h and Register.cc: 01041 // <srcblock> 01042 // // Declare in .h file as C function, so no name mangling is done. 01043 // extern "C" { 01044 // void register_bitflagsengine(); 01045 // } 01046 // // Implement in .cc file. 01047 // void register_bitflagsengine() 01048 // { 01049 // BitFlagsEngine<uChar>::registerClass(); 01050 // BitFlagsEngine<Short>::registerClass(); 01051 // BitFlagsEngine<Int>::registerClass(); 01052 // } 01053 // </srcblock> 01054 // There are several functions that can give information which data managers 01055 // are used for which columns and to obtain the characteristics and properties 01056 // of them. Class RODataManAccessor and derived classes can be used for it 01057 // as well as the functions <src>dataManagerInfo</src> and 01058 // <src>showStructure</src> in class Table. 01059 01060 // <ANCHOR NAME="Tables:storage managers"> 01061 // <h3>Storage Managers</h3></ANCHOR> 01062 // 01063 // Several storage managers are currently supported. 01064 // The default and preferred storage manager is <src>StandardStMan</src>. 01065 // Other storage managers should only be used if they pay off in 01066 // file space (like <src>IncrementalStMan</src> for slowly varying data) 01067 // or access speed (like the tiled storage managers for large data arrays). 01068 // <br>The storage managers store the data in a big or little endian 01069 // canonical format. The format can be specified when the table is created. 01070 // By default it uses the endian format as specified in the aipsrc variable 01071 // <code>table.endianformat</code> which can have the value local, big, 01072 // or little. The default is local. 01073 // <ol> 01074 // <li> 01075 // <linkto class="StandardStMan:description">StandardStMan</linkto> 01076 // stores all the values in so-called buckets (equally sized chunks 01077 // in the file). It requires little memory. 01078 // <br>It replaces the old <src>StManAipsIO</src>. 01079 // 01080 // <li> 01081 // <linkto class="IncrementalStMan:description">IncrementalStMan</linkto> 01082 // uses a storage mechanism resembling "incremental backups". A value 01083 // is only stored if it is different from the previous row. It is 01084 // very well suited for slowly varying data. 01085 // <br>The class <linkto class="ROIncrementalStManAccessor:description"> 01086 // ROIncrementalStManAccessor</linkto> can be used to tune the 01087 // behaviour of the <src>IncrementalStMan</src>. It contains functions 01088 // to deal with the cache size and to show the behaviour of the cache. 01089 // 01090 // <li> 01091 // The <a href="#Tables:TiledStMan">Tiled Storage Managers</a> 01092 // store the data as a tiled hypercube allowing for more or less equally 01093 // efficient data access along all main axes. It can be used for 01094 // UV-data as well as for image data. 01095 // 01096 // <li> 01097 // <linkto class="StManAipsIO:description">StManAipsIO</linkto> 01098 // uses <src>AipsIO</src> to store the data in the columns. 01099 // It supports all table functionality, but its I/O is probably not 01100 // as efficient as other storage managers. It also requires that 01101 // a large part of the table fits in memory. 01102 // <br>It should not be used anymore, because it uses a lot of memory 01103 // for larger tables and because it is not very robust in case an 01104 // application or system crashes. 01105 // 01106 // <li> 01107 // <linkto class="MemoryStMan:description">MemoryStMan</linkto> 01108 // holds the data in memory. It means that data 'stored' with this 01109 // storage manager are NOT persistent. 01110 // <br>This storage manager is primarily meant for tables held in 01111 // memory, but it can also be useful for temporary columns in 01112 // normal tables. Note, however, that if a table is accessed 01113 // concurrently from multiple processes, MemoryStMan data cannot be 01114 // synchronized. 01115 // </ol> 01116 // 01117 // The storage manager framework makes it possible to support arbitrary files 01118 // as tables. This has been used in a case where a file is filled 01119 // by the data acquisition system of a telescope. The file is simultaneously 01120 // used as a table using a dedicated storage manager. The table 01121 // system and storage manager provide a sync function to synchronize 01122 // the processes, i.e. to make the table system aware of changes 01123 // in the file size (thus in the table size) by the filling process. 01124 // 01125 // <note role=tip> 01126 // Not all data managers support all the table functionality. So, the choice 01127 // of a data manager can greatly influence the type of operations you can do 01128 // on the table as a whole. 01129 // For example, if a column uses the tiled storage manager, 01130 // it is not possible to delete rows from the table, because that storage 01131 // manager will not support deletion of rows. 01132 // However, it is always possible to delete all columns of a data 01133 // manager in one single call. 01134 // </note> 01135 01136 // <ANCHOR NAME="Tables:TiledStMan"> 01137 // <h3>Tiled Storage Manager</h3></ANCHOR> 01138 // The Tiled Storage Managers allow one to store the data of 01139 // one or more columns in a tiled way. Tiling means 01140 // that the data are stored without a preferred order to make access 01141 // along the different main axes equally efficient. This is done by 01142 // storing the data in so-called tiles (i.e. equally shaped subsets of an 01143 // array) to increase data locality. The user can define the tile shape 01144 // to optimize for the most frequently used access. 01145 // <p> 01146 // The Tiled Storage Manager has the following properties: 01147 // <ul> 01148 // <li> There can be more than one Tiled Storage Manager in 01149 // a table; each with its own (unique) name. 01150 // <li> Each Tiled Storage Manager can store an 01151 // N-dimensional so-called hypercolumn. 01152 // Elaborate hypercolumns can be defined using 01153 // <linkto file="TableDesc.h#defineHypercolumn"> 01154 // TableDesc::defineHypercolumn</linkto>). 01155 // <br>Note that defining a hypercolumn is only necessary if it 01156 // contains multiple columns or if the TiledDataStMan is used. 01157 // It means that in practice it is hardly ever needed to define a 01158 // hypercolumn. 01159 // <br>A hypercolumn consists of up to three types of columns: 01160 // <dl> 01161 // <dt> Data columns 01162 // <dd> contain the data to be stored in a tiled way. This will 01163 // be done in tiled hypercubes. 01164 // There must be at least one data column. 01165 // <br> For example: a table contains UV-data with 01166 // data columns "Visibility" and "Weight". 01167 // <dt> Coordinate columns 01168 // <dd> define the world coordinates of the pixels in the data columns. 01169 // Coordinate columns are optional, but if given there must 01170 // be N coordinate columns for an N-dimensional hypercolumn. 01171 // <br> 01172 // For example: the data in the example above is 4-dimensional 01173 // and has coordinate columns "Time", "Baseline", "Frequency", 01174 // and "Polarization". 01175 // <dt> Id columns 01176 // <dd> are needed if TiledDataStMan is used. 01177 // Different rows in the data columns can be stored in different 01178 // hypercubes. The values in the id column(s) uniquely identify 01179 // the hypercube a row is stored in. 01180 // <br> 01181 // For example: the line and continuum data in a MeasurementSet 01182 // table need to be stored in 2 different hypercubes (because 01183 // their shapes are different (see below)). A column containing 01184 // the type (line or continuum) has to be used as an id column. 01185 // </dl> 01186 // <li> If multiple data columns are used, the shape of their data 01187 // must be conforming in each individual row. 01188 // If data in different rows have different shapes, they must be 01189 // stored in different hypercubes, because a hypercube can only hold 01190 // data with conforming shapes. 01191 // <br> 01192 // Thus in the example above, rows with line data will have conforming 01193 // shapes and can be stored in one hypercube. The continuum data 01194 // will have another shape and can be stored in another hypercube. 01195 // <br> 01196 // The storage manager keeps track of the mapping of rows to/from 01197 // hypercubes. 01198 // <li> Each hypercube can be tiled in its own way. It is not required 01199 // that an integer number of tiles fits in the hypercube. The last 01200 // tiles will be padded as needed. 01201 // <li> The last axis of a hypercube can be extensible. This means that 01202 // the size of that axis does not need to be defined when the 01203 // hypercube is defined in the storage manager. Instead, the hypercube 01204 // can be extended when another chunk of data has to be stored. 01205 // This can be very useful in, for example, a (quasi-)realtime 01206 // environment where the size of the time axis is not known. 01207 // <li> If coordinate columns are defined, they describe the coordinates 01208 // of the axes of the hypercubes. Each hypercube has its own set of 01209 // coordinates. 01210 // <li> Data and id columns have to be stored with the Tiled 01211 // Storage Manager. However, coordinate columns do not need to be 01212 // stored with the Tiled Storage Manager. 01213 // Especially in the case where the coordinates for a hypercube axis 01214 // are varying (i.e. dependent on other axes), another storage manager 01215 // has to be used (because the Tiled Storage Manager can only 01216 // hold constant coordinates). 01217 // </ul> 01218 // <p> 01219 // The following Tiled Storage Managers are available: 01220 // <dl> 01221 // <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto> 01222 // <dd> can be seen as a specialization of <src>TiledDataStMan</src> 01223 // by using the array shape as the id value. 01224 // Similarly to <src>TiledDataStMan</src> it can maintain multiple 01225 // hypercubes and store multiple rows in a hypercube, but it is 01226 // easier to use, because the special <src>addHypercube</src> and 01227 // <src>extendHypercube</src> functions are not needed. 01228 // An hypercube is automatically added when a new array shape is 01229 // encountered. 01230 // <br> 01231 // This storage manager could be used for a table with a column 01232 // containing line and continuum data, which will result 01233 // in 2 hypercubes. 01234 // <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto> 01235 // <dd> creates (automatically) a new hypercube for each row. 01236 // Thus each row of the hypercolumn is stored in a separate hypercube. 01237 // Note that the row number serves as the id value. So an id column 01238 // is not needed, although there are multiple hypercubes. 01239 // <br> 01240 // This storage manager is meant for tables where the data arrays 01241 // in the different rows are not accessed together. One can think 01242 // of a column containing images. Each row contains an image and 01243 // only one image is shown at a time. 01244 // <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto> 01245 // <dd> creates one hypercube for the entire hypercolumn. Thus all cells 01246 // in the hypercube have to have the same shape and therefore this 01247 // storage manager is only possible if all columns in the hypercolumn 01248 // have the attribute FixedShape. 01249 // <br> 01250 // This storage manager could be used for a table with a column 01251 // containing images for the Stokes parameters I, Q, U, and V. 01252 // By storing them in one hypercube, it is possible to retrieve 01253 // the 4 Stokes values for a subset of the image or for an individual 01254 // pixel in a very efficient way. 01255 // <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto> 01256 // <dd> allows one to control the creation and extension of hypercubes. 01257 // This is done by means of the class 01258 // <linkto class=TiledDataStManAccessor:description> 01259 // TiledDataStManAccessor</linkto>. 01260 // It makes it possible to store, say, row 0-9 in hypercube A, 01261 // row 10-34 in hypercube B, row 35-54 in hypercube A again, etc.. 01262 // <br> 01263 // The drawback of this storage manager is that its hypercubes are not 01264 // automatically extended when adding new rows. The special functions 01265 // <src>addHypercube</src> and <src>extendHypercube</src> have to be 01266 // used making it somewhat tedious to use. 01267 // Therefore this storage manager may become obsolete in the near future. 01268 // </dl> 01269 // The Tiled Storage Managers have 3 ways to access and cache the data. 01270 // Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an 01271 // access choice and use it in a Table constructor. 01272 // <ul> 01273 // <li> The old way (the only way until January 2010) uses a cache 01274 // of its own to keep tiles that might need to be reused. It will always 01275 // access entire tiles, even if only a small part is needed. 01276 // It is possible to define a maximum cache size. The description of class 01277 // <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto> 01278 // contains a discussion about the effect of defining a maximum cache 01279 // size. 01280 // <li> Memory-mapping the data files. In this way the operating system 01281 // takes care of the IO and caching. However, the limited address space 01282 // may preclude using it for large tables on 32-bit systems. 01283 // <li> Use buffered IO and let the kernel's file cache take care of caching. 01284 // It will access the data in chunks of the given buffer size, so the 01285 // entire tile does not need to be accessed if only a small part is 01286 // needed. 01287 // </ul> 01288 // Apart from reading, all access ways described above can also handle writing 01289 // and extending tables. They create fully equal files. Both little and big 01290 // endian data can be read or written. 01291 01292 // <ANCHOR NAME="Tables:virtual column engines"> 01293 // <h3>Virtual Column Engines</h3></ANCHOR> 01294 // 01295 // Virtual column engines are used to implement the virtual (i.e. 01296 // calculated-on-the-fly) columns. The Table system provides 01297 // an abstract base class (or "interface class") 01298 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto> 01299 // that specifies the protocol for these engines. 01300 // The programmer must derive a concrete class to implement 01301 // the application-specific virtual column. 01302 // <p> 01303 // For example: the programmer 01304 // needs a column in a table which is the difference between two other 01305 // columns. (Perhaps these two other columns are updated periodically 01306 // during the execution of a program.) A good way to handle this would 01307 // be to have a virtual column in the table, and write a virtual column 01308 // engine which knows how to calculate the difference between corresponding 01309 // cells of the two other columns. So the result is that accessing a 01310 // particular cell of the virtual column invokes the virtual column engine, 01311 // which then gets the values from the other two columns, and returns their 01312 // difference. This particular example could be done using 01313 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>. 01314 // <p> 01315 // Several virtual column engines exist: 01316 // <ol> 01317 // <li> The class 01318 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto> 01319 // makes it possible to define a column as an arbitrary expression of 01320 // other columns. It uses the <a href="../notes/199.html">TaQL</a> 01321 // CALC command. The virtual column can be a scalar or an array and 01322 // can have one of the standard data types supported by the Table System. 01323 // <li> The class 01324 // <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto> 01325 // maps an integer bit flags column to a Bool column. A read and write mask 01326 // can be defined telling which bits to take into account when mapping 01327 // to and from Bool (thus when reading or writing the Bool). 01328 // <li> The class 01329 // <linkto class="CompressFloat:description">CompressFloat</linkto> 01330 // compresses a single precision floating point array by scaling the 01331 // values to shorts (16-bit integer). 01332 // <li> The class 01333 // <linkto class="CompressComplex:description">CompressComplex</linkto> 01334 // compresses a single precision complex array by scaling the 01335 // values to shorts (16-bit integer). In fact, the 2 parts of the complex 01336 // number are combined to an 32-bit integer. 01337 // <li> The class 01338 // <linkto class="CompressComplexSD:description">CompressComplexSD</linkto> 01339 // does the same as CompressComplex, but optimizes for the case where the 01340 // imaginary part is zero (which is often the case for Single Dish data). 01341 // <li> The double templated class 01342 // <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto> 01343 // scales the data in an array from, for example, 01344 // float to short before putting it. 01345 // <li> The double templated class 01346 // <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto> 01347 // converts the data from one data type to another. Sometimes it might be 01348 // needed to store the residual data in an MS in double precision. 01349 // Because the imaging task can only handle single precision, this enigne 01350 // can be used to map the data from double to single precision. 01351 // <li> The double templated class 01352 // <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto> 01353 // converts the data from one data type to another with the possibility 01354 // to reduce the number of dimensions. For example, it can be used to 01355 // store an 2-d array of StokesVector objects as a 3-d array of floats 01356 // by treating the 4 data elements as an extra array axis. If the 01357 // StokesVector class is simple, it can be done very efficiently. 01358 // <li> The class 01359 // <linkto class="ForwardColumnEngine:description"> 01360 // ForwardColumnEngine</linkto> 01361 // forwards the gets and puts on a row in a column to the same row 01362 // in a column with the same name in another table. This provides 01363 // a virtual copy of the referenced column. 01364 // <li> The class 01365 // <linkto class="ForwardColumnIndexedRowEngine:description"> 01366 // ForwardColumnIndexedRowEngine</linkto> 01367 // is similar to <src>ForwardColumnEngine.</src>. 01368 // However, instead of forwarding it to the same row it uses a 01369 // a column to map its row number to a row number in the referenced 01370 // table. In this way multiple rows can share the same data. 01371 // This data manager only allows for get operations. 01372 // <li> The calibration module has implemented a virtual column engine 01373 // to do on-the-fly calibration in a transparent way. 01374 // </ol> 01375 // To handle arbitrary data types the templated abstract base class 01376 // <linkto class="VSCEngine:description">VSCEngine</linkto> 01377 // has been written. An example of how to use this class can be 01378 // found in the demo program <src>dVSCEngine.cc</src>. 01379 01380 // <ANCHOR NAME="Tables:LockSync"> 01381 // <h3>Table locking and synchronization</h3></ANCHOR> 01382 // 01383 // Multiple concurrent readers and writers (also via NFS) of a 01384 // table are supported by means of a locking/synchronization mechanism. 01385 // This mechanism is not very sophisticated in the sense that it is 01386 // very coarsely grained. When locking, the entire table gets locked. 01387 // A special lock file is used to lock the table. This lock file also 01388 // contains some synchronization data. 01389 // <p> 01390 // Five ways of locking are supported (see class 01391 // <linkto class=TableLock>TableLock</linkto>): 01392 // <dl> 01393 // <dt> TableLock::PermanentLocking(Wait) 01394 // <dd> locks the table permanently (from open till close). This means 01395 // that one writer OR multiple readers are possible. 01396 // <dt> TableLock::AutoLocking 01397 // <dd> does the locking automatically. This is the default mode. 01398 // This mode makes it possible that a table is shared amongst 01399 // processes without the user needing to write any special code. 01400 // It also means that a lock is only released when needed. 01401 // <dt> TableLock::AutoNoReadLocking 01402 // <dd> is similar to AutoLocking. However, no lock is acquired when 01403 // reading the table making it possible to read the table while 01404 // another process holds a write-lock. It also means that for read 01405 // purposes no automatic synchronization is done when the table is 01406 // updated in another process. 01407 // Explicit synchronization can be done by means of the function 01408 // <src>Table::resync</src>. 01409 // <dt> TableLock::UserLocking 01410 // <dd> requires that the programmer explicitly acquires and releases 01411 // a lock on the table. This makes some kind of transaction 01412 // processing possible. E.g. set a write lock, add a row, 01413 // write all data into the row and release the lock. 01414 // The Table functions <src>lock</src> and <src>unlock</src> 01415 // have to be used to acquire and release a (read or write) lock. 01416 // <dt> TableLock::UserNoReadLocking 01417 // <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking 01418 // no lock is needed to read the table. 01419 // </dl> 01420 // Synchronization of the processes accessing the same table is done 01421 // by means of the lock file. When a lock is released, the storage 01422 // managers flush their data into the table files. Some synchronization data 01423 // is written into the lock file telling the new number of table rows 01424 // and telling which storage managers have written data. 01425 // This information is read when another process acquires the lock 01426 // and is used to determine which storage managers have to refresh 01427 // their internal caches. 01428 // <br>Note that for the NoReadLocking modes (see above) explicit 01429 // synchronization might be needed using <src>Table::resync</src>. 01430 // <p> 01431 // The function <src>Table::hasDataChanged</src> can be used to check 01432 // if a table is (being) changed by another process. In this way 01433 // a program can react on it. E.g. the table browser can refresh its 01434 // screen when the underlying table is changed. 01435 // <p> 01436 // In general the default locking option will do. 01437 // From the above it should be clear that heavy concurrent access 01438 // results in a lot of flushing, thus will have a negative impact on 01439 // performance. If uninterrupted access to a table is needed, 01440 // the <src>PermanentLocking</src> option should be used. 01441 // If transaction-like processing is done (e.g. updating a table 01442 // containing an observation catalogue), the <src>UserLocking</src> 01443 // option is probably best. 01444 // <p> 01445 // Creation or deletion of a table is not possible if that table 01446 // is still open in another process. The function 01447 // <src>Table::isMultiUsed()</src> can be used to check if a table 01448 // is open in other processes. 01449 // <br> 01450 // The function <src>deleteTable</src> should be used to delete 01451 // a table. Before deleting the table it ensures that it is writable 01452 // and that it is not open in the current or another process 01453 // <p> 01454 // The following example wants to read the table uninterrupted, thus it uses 01455 // the <src>PermanentLocking</src> option. It also wants to wait 01456 // until the lock is actually acquired. 01457 // Note that the destructor closes the table and releases the lock. 01458 // <srcblock> 01459 // // Open the table (readonly). 01460 // // Acquire a permanent (read) lock. 01461 // // It waits until the lock is acquired. 01462 // Table tab ("some.name", 01463 // TableLock(TableLock::PermanentLockingWait)); 01464 // </srcblock> 01465 // 01466 // The following example uses the automatic locking.. 01467 // It tells the system to check about every 20 seconds if another 01468 // process wants access to the table. 01469 // <srcblock> 01470 // // Open the table (readonly). 01471 // Table tab ("some.name", 01472 // TableLock(TableLock::AutoLocking, 20)); 01473 // </srcblock> 01474 // 01475 // The following example gets data (say from a GUI) and writes it 01476 // as a row into the table. The lock the table as little as possible 01477 // the lock is acquired just before writing and released immediately 01478 // thereafter. 01479 // <srcblock> 01480 // // Open the table (writable). 01481 // Table tab ("some.name", 01482 // TableLock(TableLock::UserLocking), 01483 // Table::Update); 01484 // while (True) { 01485 // get input data 01486 // tab.lock(); // Acquire a write lock and wait for it. 01487 // tab.addRow(); 01488 // write data into the row 01489 // tab.unlock(); // Release the lock. 01490 // } 01491 // </srcblock> 01492 // 01493 // The following example deletes a table if it is not used in 01494 // another process. 01495 // <srcblock> 01496 // Table tab ("some.name"); 01497 // if (! tab.isMultiUsed()) { 01498 // tab.markForDelete(); 01499 // } 01500 // </srcblock> 01501 01502 // <ANCHOR NAME="Tables:KeyLookup"> 01503 // <h3>Table lookup based on a key</h3></ANCHOR> 01504 // 01505 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the 01506 // user a means to find the rows matching a given key or key range. 01507 // It is a somewhat primitive replacement of a B-tree index and in the 01508 // future it may be replaced by a proper B+-tree implementation. 01509 // <p> 01510 // The <src>ColumnsIndex</src> class makes it possible to build an 01511 // in-core index on one or more columns. Looking a key or key range 01512 // is done using a binary search on that index. It returns a vector 01513 // containing the row numbers of the rows matching the key (range). 01514 // <p> 01515 // The class is not capable of tracing changes in the underlying column(s). 01516 // It detects a change in the number of rows and updates the index 01517 // accordingly. However, it has to be told explicitly when a value 01518 // in the underlying column(s) changes. 01519 // <p> 01520 // The following example shows how the class can be used. 01521 // <example> 01522 // Suppose one has an antenna table with key ANTENNA. 01523 // <srcblock> 01524 // // Open the table and make an index for column ANTENNA. 01525 // Table tab("antenna.tab") 01526 // ColumnsIndex colInx(tab, "ANTENNA"); 01527 // // Make a RecordFieldPtr for the ANTENNA field in the index key record. 01528 // // Its data type has to match the data type of the column. 01529 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA"); 01530 // // Now loop in some way and find the row for the antenna 01531 // // involved in that loop. 01532 // Bool found; 01533 // while (...) { 01534 // // Fill the key field and get the row number. 01535 // // ANTENNA is a unique key, so only one row number matches. 01536 // // Otherwise function getRowNumbers had to be used. 01537 // *antFld = antenna; 01538 // uInt antRownr = colInx.getRowNumber (found); 01539 // if (!found) { 01540 // cout << "Antenna " << antenna << " is unknown" << endl; 01541 // } else { 01542 // // antRownr can now be used to get data from that row in 01543 // // the antenna table. 01544 // } 01545 // } 01546 // </srcblock> 01547 // </example> 01548 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more 01549 // advanced example. It shows how to use a private compare function 01550 // to adjust the lookup if the index does not contain single 01551 // key values, but intervals instead. This is useful if a row in 01552 // a (sub)table is valid for, say, a time range instead of a single 01553 // timestamp. 01554 01555 // <ANCHOR NAME="Tables:performance"> 01556 // <h3>Performance and robustness considerations</h3></ANCHOR> 01557 // 01558 // The Table System resembles a database system, but it is not as robust. 01559 // It lacks the transaction and logging facilities common to data base systems. 01560 // It means that in case of a crash data might be lost. 01561 // To reduce the risk of data loss to 01562 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally 01563 // with an <tt>fsync</tt> to ensure that all data are really written. 01564 // However, that can degrade the performance because it involves extra writes. 01565 // So one should find the right balance between robustness and performance. 01566 // 01567 // To get a good feeling for the performance issues, it is important to 01568 // understand some of the internals of the Table System. 01569 // <br>The storage managers drive the performance. All storage managers use 01570 // buckets (called tiles for the TiledStMan) which contain the data. 01571 // All IO is done by bucket. The bucket/tile size is defined when creating 01572 // the storage manager objects. Sometimes the default will do, but usually 01573 // it is better to set it explicitly. 01574 // 01575 // It is best to do a flush when a tile is full. 01576 // For example: <br> 01577 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines 01578 // or N*(N+1) if auto-correlations are stored as well) it makes sense to 01579 // store, say, N/2 rows in a tile and do a flush each time all baselines 01580 // are written. In that way tiles are fully filled when doing the flush, so 01581 // no extra IO is involved. 01582 // <br>Here is some code showing this when creating a MeasurementSet. 01583 // The code should speak for itself. 01584 // <srcblock> 01585 // MS* createMS (const String& msName, int nrchan, int nrant) 01586 // { 01587 // // Get the MS main default table description. 01588 // TableDesc td = MS::requiredTableDesc(); 01589 // // Add the data column and its unit. 01590 // MS::addColumnToDesc(td, MS::DATA, 2); 01591 // td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet(). 01592 // define("UNIT","Jy"); 01593 // // Store the DATA and FLAG column in two separate files. 01594 // // In this way accessing FLAG only is much cheaper than 01595 // // when combining DATA and FLAG. 01596 // // All data have the same shape, thus use TiledColumnStMan. 01597 // // Also store UVW with TiledColumnStMan. 01598 // Vector<String> tsmNames(1); 01599 // tsmNames[0] = MS::columnName(MS::DATA); 01600 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01601 // td.defineHypercolumn("TiledData", 3, tsmNames); 01602 // tsmNames[0] = MS::columnName(MS::FLAG); 01603 // td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq)); 01604 // td.defineHypercolumn("TiledFlag", 3, tsmNames); 01605 // tsmNames[0] = MS::columnName(MS::UVW); 01606 // td.defineHypercolumn("TiledUVW", 2, tsmNames); 01607 // // Setup the new table. 01608 // SetupNewTable newTab(msName, td, Table::New); 01609 // // Most columns vary slowly and use the IncrStMan. 01610 // IncrementalStMan incrStMan("ISMData"); 01611 // // A few columns use he StandardStMan (set an appropriate bucket size). 01612 // StandardStMan stanStMan("SSMData", 32768); 01613 // // Store all pol and freq and some rows in a single tile. 01614 // // autocorrelations are written, thus in total there are 01615 // // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an 01616 // // integer number of tiles. 01617 // TiledColumnStMan tiledData("TiledData", 01618 // IPosition(3,4,nchan,(nrant+1)/2)); 01619 // TiledColumnStMan tiledFlag("TiledFlag", 01620 // IPosition(3,4,nchan,8*(nrant+1)/2)); 01621 // TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,)); 01622 // IPosition(2,3,nrant*(nrant+1)/2)); 01623 // newTab.bindAll (incrStMan); 01624 // newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan); 01625 // newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan); 01626 // newTab.bindColumn(MS::columnName(MS::DATA),tiledData); 01627 // newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag); 01628 // newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW); 01629 // // Create the MS and its subtables. 01630 // // Get access to its columns. 01631 // MS* msp = new MeasurementSet(newTab); 01632 // // Create all subtables. 01633 // // Do this after the creation of optional subtables, 01634 // // so the MS will know about those optional sutables. 01635 // msp->createDefaultSubtables (Table::New); 01636 // return msp; 01637 // } 01638 // </srcblock> 01639 01640 // <h4>Some more performance considerations</h4> 01641 // Which storage managers to use and how to use them depends heavily on 01642 // the type of data and the access patterns to the data. Here follow some 01643 // guidelines: 01644 // <ol> 01645 // <li> Scalar data can be stored with the StandardStMan (SSM) or 01646 // IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column 01647 // in a MeasurementSet) it is best to use the ISM. Otherwise the SSM. 01648 // Note that very long strings (longer than the bucketsize) can only 01649 // be stored with the SSM. 01650 // <li> Any number of storage managers can be used. In fact, each column 01651 // can have a storage manager of its own resulting in column-wise 01652 // stored data which is more and more used in data base systems. 01653 // In that way a query or sort on that column is very fast, because 01654 // the buckets to read only contain data of that column. 01655 // In practice one can decide to combine a few frequently used columns 01656 // in a storage manager. 01657 // <li> Array data can be stored with any column manager. Small fixed size 01658 // arrays can be stored directly with the SSM 01659 // (or ISM if not changing much). 01660 // However, they can also be stored with a TiledStMan (TSM) as shown 01661 // for the UVW column in the example above. 01662 // <br> Large arrays should usually be stored with a TSM. However, 01663 // if it must be possible to change the shape of an array after it 01664 // was stored, the SSM (or ISM) must be used. Note that in that 01665 // case a lot of disk space can be wasted, because the SSM and ISM 01666 // store the array data at the end of the file if the array got 01667 // bigger and do not reuse the old space. The only way to 01668 // reclaim it is by making a deep copy of the entire table. 01669 // <li> If an array is stored with a TSM, it is important to decide 01670 // which TSM to use. 01671 // <ol> 01672 // <li> The TiledColumnStMan is the most efficient, but only suitable 01673 // for arrays having the same shape in the entire column. 01674 // <li> The TiledShapeStMan is suitable for columns where the arrays 01675 // can have a few shapes. 01676 // <li> The TiledCellStMan is suitable for columns where the arrays 01677 // can have many different shapes. 01678 // </ol> 01679 // This is discussed in more detail 01680 // <a href="#Tables:TiledStMan">above</a>. 01681 // <li> If storing an array with a TSM, it can be very important to 01682 // choose the right tile shape. Not only does this define the size 01683 // of a tile, but it also defines if access in other directions 01684 // than the natural direction can be fast. It is also discussed in 01685 // more detail <a href="#Tables:TiledStMan">above</a>. 01686 // <li> Columns can be combined in a single TiledStMan. For instance, combining DATA 01687 // and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG 01688 // is used on its own (e.g. in combination with CORRECTED_DATA), it is better 01689 // to separate them, otherwise tiles containing FLAG also contain DATA making the 01690 // tiles much bigger, thus more expensive to access. 01691 // </ol> 01692 01693 // </synopsis> 01694 // </module> 01695 01696 01697 01698 } //# NAMESPACE CASA - END 01699 01700 #endif