casa: casacore/tables/Tables.h Source File

Go to the documentation of this file.
00001 //# Tables.h: The Tables module - Casacore data storage
00002 //# Copyright (C) 1994-2010
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: Tables.h 21014 2011-01-06 08:57:49Z gervandiepen $
00027 
00028 #ifndef TABLES_TABLES_H
00029 #define TABLES_TABLES_H
00030 
00031 //# Includes
00032 //#   table description
00033 #include <tables/Tables/TableDesc.h>
00034 #include <tables/Tables/ColumnDesc.h>
00035 #include <tables/Tables/ScaColDesc.h>
00036 #include <tables/Tables/ArrColDesc.h>
00037 #include <tables/Tables/ScaRecordColDesc.h>
00038 
00039 //#   storage managers
00040 #include <tables/Tables/StManAipsIO.h>
00041 #include <tables/Tables/StandardStMan.h>
00042 #include <tables/Tables/StandardStManAccessor.h>
00043 #include <tables/Tables/IncrementalStMan.h>
00044 #include <tables/Tables/IncrStManAccessor.h>
00045 #include <tables/Tables/TiledDataStMan.h>
00046 #include <tables/Tables/TiledDataStManAccessor.h>
00047 #include <tables/Tables/TiledCellStMan.h>
00048 #include <tables/Tables/TiledColumnStMan.h>
00049 #include <tables/Tables/TiledShapeStMan.h>
00050 #include <tables/Tables/MemoryStMan.h>
00051 
00052 //#   virtual column engines
00053 #include <tables/Tables/RetypedArrayEngine.h>
00054 #include <tables/Tables/RetypedArraySetGet.h>
00055 #include <tables/Tables/ScaledArrayEngine.h>
00056 #include <tables/Tables/MappedArrayEngine.h>
00057 #include <tables/Tables/ForwardCol.h>
00058 #include <tables/Tables/ForwardColRow.h>
00059 #include <tables/Tables/CompressComplex.h>
00060 #include <tables/Tables/CompressFloat.h>
00061 #include <tables/Tables/VirtualTaQLColumn.h>
00062 
00063 //#   table access
00064 #include <tables/Tables/Table.h>
00065 #include <tables/Tables/TableLock.h>
00066 #include <tables/Tables/SetupNewTab.h>
00067 #include <tables/Tables/ScalarColumn.h>
00068 #include <tables/Tables/ArrayColumn.h>
00069 #include <tables/Tables/TableRow.h>
00070 #include <tables/Tables/TableCopy.h>
00071 #include <casa/Arrays/Array.h>
00072 #include <casa/Arrays/Slicer.h>
00073 #include <casa/Arrays/Slice.h>
00074 
00075 //#   keywords
00076 #include <tables/Tables/TableRecord.h>
00077 #include <casa/Containers/RecordField.h>
00078 
00079 //#   table lookup
00080 #include <tables/Tables/ColumnsIndex.h>
00081 #include <tables/Tables/ColumnsIndexArray.h>
00082 
00083 //#   table expressions (for selection of rows)
00084 #include <tables/Tables/ExprNode.h>
00085 #include <tables/Tables/ExprNodeSet.h>
00086 #include <tables/Tables/TableParse.h>
00087 
00088 //#   table vectors
00089 #include <tables/Tables/TableVector.h>
00090 #include <tables/Tables/TabVecMath.h>
00091 #include <tables/Tables/TabVecLogic.h>
00092 
00093 
00094 namespace casa { //# NAMESPACE CASA - BEGIN
00095 
00096 // <module>
00097 
00098 // <summary>
00099 // Tables are the data storage mechanism for Casacore
00100 // </summary>
00101 
00102 // <use visibility=export>
00103 
00104 // <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
00105 // </reviewed>
00106 
00107 // <prerequisite>
00108 //    <li> <linkto class="Record:description">Record</linkto> class
00109 // </prerequisite>
00110 
00111 // <etymology>
00112 // "Table" is a formal term from relational database theory: 
00113 //   <em> "The organizing principle in a relational database is the TABLE,
00114 //    a rectangular, row/column arrangement of data values."</em>
00115 // Casacore tables are extensions to traditional tables, but are similar
00116 // enough that we use the same name.  There is also a strong resemblance
00117 // between the uses of Casacore tables, and FITS binary tables, which
00118 // provides another reason to use "Tables" to describe the Casacore data
00119 // storage mechanism.
00120 // </etymology>
00121 
00122 // <synopsis> 
00123 // Tables are the fundamental storage mechanism for Casacore. This document
00124 // explains <A HREF="#Tables:motivation">why</A> they had to be made,
00125 // <A HREF="#Tables:properties">what</A> their properties are, and 
00126 // <A HREF="#Tables:open">how</A> to use them. The last subject is
00127 // discussed and illustrated in a sequence of sections:
00128 // <UL>
00129 //  <LI> <A HREF="#Tables:open">opening</A> an existing table,
00130 //  <LI> <A HREF="#Tables:read">reading</A> from a table,
00131 //  <LI> <A HREF="#Tables:creation">creating</A> a new table,
00132 //  <LI> <A HREF="#Tables:write">writing</A> into a table,
00133 //  <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
00134 //  <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
00135 //       (see also <A HREF="../notes/199.html">Table Query Language</A>),
00136 //  <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
00137 //  <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
00138 //  <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
00139 //       for concurrent access,
00140 //  <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
00141 //  <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
00142 //  <LI> <A HREF="#Tables:performance">performance and robustness</A> considerations.
00143 // </UL>
00144 
00145 
00146 // <ANCHOR NAME="Tables:motivation">
00147 // <motivation></ANCHOR>
00148 //
00149 // The Casacore tables are mainly based upon the ideas of Allen Farris,
00150 // as laid out in the
00151 // <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
00152 // AIPS++ Database document</A>, from where the following paragraph is taken:
00153 // 
00154 // <p>
00155 // Traditional relational database tables have two features that
00156 // decisively limit their applicability to scientific data.  First, an item of
00157 // data in a column of a table must be atomic -- it must have no internal
00158 // structure.  A consequence of this restriction is that relational
00159 // databases are unable to deal with arrays of data items.  Second, an
00160 // item of data in a column of a table must not have any direct or
00161 // implied linkages to other items of data or data aggregates.  This
00162 // restriction makes it difficult to model complex relationships between
00163 // collections of data.  While these restrictions may make it easy to
00164 // define a mathematically complete set of data manipulation operations,
00165 // they are simply intolerable in a scientific data-handling context.
00166 // Multi-dimensional arrays are frequently the most natural modes in
00167 // which to discuss and think about scientific data.  In addition,
00168 // scientific data often requires complex calibration operations that
00169 // must draw on large bodies of data about equipment and its performance
00170 // in various states.  The restrictions imposed by the relational model
00171 // make it very difficult to deal with complex problems of this nature.
00172 // <p>
00173 // 
00174 // In response to these limitations, and other needs, the Casacore tables were
00175 // designed.
00176 // </motivation>
00177 
00178 // <ANCHOR NAME="Tables:properties">
00179 // <h3>Table Properties</h3></ANCHOR>
00180 //
00181 // Casacore tables have the following properties:
00182 // <ul>
00183 //  <li> A table consists of a number of rows and columns.
00184 //       <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
00185 //       for the table as a whole and for individual columns. A keyword/value
00186 //       pair for a column could, for instance, define its unit.
00187 //  <li> Each table has a <A HREF="#Tables:Table Description">description</A>
00188 //       which specifies the number and type of columns, and maybe initial
00189 //       keyword sets and default values for the columns. 
00190 //  <li> A cell in a column may contain
00191 //       <UL>
00192 //        <LI> a scalar;
00193 //        <LI> a "direct" array -- which must have the same shape in all
00194 //             cells of a column, is usually small, and is stored in the
00195 //             table itself;
00196 //        <LI> an "indirect" array -- which may have different shapes in
00197 //             different cells of the same column, is arbitrarily large,
00198 //             and is stored in a separate file; or
00199 //       </UL>
00200 //  <li> A column may be
00201 //       <UL>
00202 //        <LI> "filled" -- containing actual data, or
00203 //        <LI> "virtual" -- containing a recipe telling how the data will
00204 //             be generated dynamically
00205 //       </UL>
00206 //  <li> Only the standard Casacore data types can be used in filled
00207 //       columns, be they scalars or arrays:  Bool, uChar, Short, uShort,
00208 //       Int, uInt, float, double, Complex, DComplex and String.
00209 //       Furthermore scalars containing
00210 //       <linkto class=TableRecord>record</linkto> values are possible
00211 //  <li> A column can have a default value, which will automatically be stored
00212 //       in a cell of the column, when a row is added to the table.
00213 //  <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
00214 //       reading, writing and generation of data. Each column in a table can
00215 //       be assigned its own data manager, which allows for optimization of
00216 //       the data storage per column. The choice of data manager determines
00217 //       whether a column is filled or virtual.
00218 //  <li> Table data are stored in a canonical format, so they can be read
00219 //       on any machine. To avoid needless swapping of bytes, the data can
00220 //       be stored in big endian (as used on e.g. SUN) or little endian
00221 //       (as used on Intel PC-s) canonical format. 
00222 //       By default it uses the format specified in the aipsrc variable
00223 //       <code>table.endianformat</code> which defaults to
00224 //       <code>Table::LocalEndian</code> (thus the endian format of the
00225 //       machine being used).
00226 //  <li> The SQL-like
00227 //       <a href="../notes/199.html">Table Query Language</a> (TaQL)
00228 //       can be used to do operations on tables like
00229 //       select, sort, update, insert, delete, and create.
00230 // </ul>
00231 //
00232 // Tables can be in one of three forms:
00233 // <ul>
00234 // <li> A plain table is a table stored on disk.
00235 //      It can be shared by multiple processes.
00236 // <li> A memory table is a table held in memory.
00237 //      It is a process specific table, thus not sharable.
00238 //      The <linkto class=Table>Table::copy</linkto> function can be used
00239 //      to turn a memory table into a plain table.
00240 // <li> A reference table is a table referencing a plain or memory table.
00241 //      It is the result of a selection or sort on another table.
00242 //      A reference table references the data in the other table, thus
00243 //      changing data in a reference table means that the data in the
00244 //      original table are changed.
00245 //      The <linkto class=Table>Table::deepCopy</linkto> function can be
00246 //      used to turn a reference table into a plain table.
00247 // </ul>
00248 // Concurrent access from different processes to the same plain table is
00249 // fully supported by means of a <A HREF="#Tables:LockSync">
00250 // locking/synchronization</A> mechanism. Concurrent access over NFS is also
00251 // supported.
00252 // <p>
00253 // A (somewhat primitive) mechanism is available to do a
00254 // <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
00255 // of a key. In the future this might be replaced by a proper B+-tree index
00256 // mechanism.
00257 
00258 // <ANCHOR NAME="Tables:open">
00259 // <h3>Opening an Existing Table</h3></ANCHOR>
00260 //
00261 // To open an existing table you just create a
00262 // <linkto class="Table:description">Table</linkto> object giving
00263 // the name of the table, like:
00264 //
00265 // <srcblock>
00266 //     Table readonly_table ("tableName");
00267 //     // or
00268 //     Table read_and_write_table ("tableName", Table::Update);
00269 // </srcblock>
00270 //
00271 // The constructor option determines whether the table will be opened as
00272 // readonly or as read/write. A readonly table file must be opened 
00273 // as readonly, otherwise an exception is thrown. The functions
00274 // <linkto class="Table">Table::isWritable(...)</linkto>
00275 // can be used to determine if a table is writable.
00276 //
00277 // When the table is opened, the data managers are reinstantiated
00278 // according to their definition at table creation.
00279 
00280 // <ANCHOR NAME="Tables:read">
00281 // <h3>Reading from a Table</h3></ANCHOR>
00282 //
00283 // You can read data from a table column with the "get" functions
00284 // in the classes
00285 // <linkto class="ROScalarColumn:description">ROScalarColumn&lt;T&gt;</linkto>
00286 // and
00287 // <linkto class="ROArrayColumn:description">ROArrayColumn&lt;T&gt;</linkto>.
00288 // For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
00289 // uShort, uInt, float, double, Complex, DComplex and String) you could
00290 // instead use 
00291 // <linkto class="ROTableColumn">ROTableColumn::getScalar(...)</linkto> or
00292 // <linkto class="ROTableColumn">ROTableColumn::asXXX(...)</linkto>.
00293 // These functions offer an extra: they do automatic data type promotion;
00294 // so that you can, for example, get a double value from a float column.
00295 //
00296 // These "get" functions are used in the same way as the simple"put"
00297 // functions described in the previous section.
00298 // <p>
00299 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
00300 // is derived from ROScalarColumn&lt;T&gt;, and
00301 // therefore has the same "get" functions. However, if a
00302 // ScalarColumn&lt;T&gt; object is constructed for a non-writable column,
00303 // an exception is thrown. Only ROScalarColumn&lt;T&gt; objects can be
00304 // constructed for nonwritable columns.
00305 // The same is true for
00306 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto> and
00307 // <linkto class="TableColumn:description">TableColumn</linkto>.
00308 // <p>
00309 // A typical program could look like:
00310 // <srcblock>
00311 // #include <tables/Tables/Table.h>
00312 // #include <tables/Tables/ScalarColumn.h>
00313 // #include <tables/Tables/ArrayColumn.h>
00314 // #include <casa/Arrays/Vector.h>
00315 // #include <casa/Arrays/Slicer.h>
00316 // #include <casa/Arrays/ArrayMath.h>
00317 // #include <iostream>
00318 // 
00319 // main()
00320 // {
00321 //     // Open the table (readonly).
00322 //     Table tab ("some.name");
00323 //
00324 //     // Construct the various column objects.
00325 //     // Their data type has to match the data type in the table description.
00326 //     ROScalarColumn<Int> acCol (tab, "ac");
00327 //     ROArrayColumn<Float> arr2Col (tab, "arr2");
00328 //
00329 //     // Loop through all rows in the table.
00330 //     uInt nrrow = tab.nrow();
00331 //     for (uInt i=0; i<nrow; i++) {
00332 //         // Read the row for both columns.
00333 //         cout << "Column ac in row i = " << acCol(i) << endl;
00334 //         Array<Float> array = arr2Col.get (i);
00335 //     }
00336 //
00337 //     // Show the entire column ac,
00338 //     // and show the 10th element of arr2 in each row..
00339 //     cout << ac.getColumn();
00340 //     cout << arr2.getColumn (Slicer(Slice(10)));
00341 // }
00342 // </srcblock>
00343 
00344 // <ANCHOR NAME="Tables:creation">
00345 // <h3>Creating a Table</h3></ANCHOR>
00346 //
00347 // The creation of a table is a multi-step process:
00348 // <ol>
00349 //  <li>
00350 //   Create a <A HREF="#Tables:Table Description">table description</A>.
00351 //  <li>
00352 //   Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
00353 //   object with the name of the new table.
00354 //  <li>
00355 //   Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
00356 //  <li>
00357 //   Bind each column to the appropriate data manager.
00358 //   The system will bind unbound columns to data managers which
00359 //   are created internally using the default data manager name
00360 //   defined in the column description.
00361 //  <li>
00362 //   Define the shape of direct columns (if that was not already done in the
00363 //   column description).
00364 //  <li>
00365 //   Create the <linkto class="Table:description">Table</linkto>
00366 //   object from the SetupNewTable object. Here, a final check is performed
00367 //   and the necessary files are created.
00368 // </ol>
00369 // The recipe above is meant for the creation a plain table, but the
00370 // creation of a memory table is exactly the same. The only difference
00371 // is that in call to construct the Table object the Table::Memory
00372 // type has to be given. Note that in the SetupNewTable object the columns
00373 // can be bound to any data manager. <src>MemoryTable</src> will rebind 
00374 // stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
00375 // storage manager, but virtual columns bindings are not changed.
00376 
00377 //
00378 // The following example shows how you can create a table. An example
00379 // specifically illustrating the creation of the
00380 // <A HREF="#Tables:Table Description">table description</A> is given
00381 // in that section. Other sections discuss the access to the table.
00382 //
00383 // <srcblock>
00384 // #include <tables/Tables/TableDesc.h>
00385 // #include <tables/Tables/SetupNewTab.h>
00386 // #include <tables/Tables/Table.h>
00387 // #include <tables/Tables/ScaColDesc.h>
00388 // #include <tables/Tables/ScaRecordColDesc.h>
00389 // #include <tables/Tables/ArrColDesc.h>
00390 // #include <tables/Tables/StandardStMan.h>
00391 // #include <tables/Tables/IncrementalStMan.h>
00392 // 
00393 // main()
00394 // {
00395 //     // Step1 -- Build the table description.
00396 //     TableDesc td("tTableDesc", "1", TableDesc::Scratch);
00397 //     td.comment() = "A test of class SetupNewTable";
00398 //     td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
00399 //     td.addColumn (ScalarColumnDesc<Int> ("ac"));
00400 //     td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
00401 //     td.addColumn (ScalarColumnDesc<Float> ("ae"));
00402 //     td.addColumn (ScalarRecordColumnDesc ("arec"));
00403 //     td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
00404 //     td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
00405 //     td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
00406 // 
00407 //     // Step 2 -- Setup a new table from the description.
00408 //     SetupNewTable newtab("newtab.data", td, Table::New);
00409 //
00410 //     // Step 3 -- Create storage managers for it.
00411 //     StandardStMan stmanStand_1;
00412 //     StandardStMan stmanStand_2;
00413 //     IncrementalStMan stmanIncr;
00414 // 
00415 //     // Step 4 -- First, bind all columns to the first storage
00416 //     // manager. Then, bind a few columns to another storage manager
00417 //     // (which will overwrite the previous bindings).
00418 //     newtab.bindAll (stmanStand_1);
00419 //     newtab.bindColumn ("ab", stmanStand_2);
00420 //     newtab.bindColumn ("ae", stmanIncr);
00421 //     newtab.bindColumn ("arr3", stmanIncr);
00422 // 
00423 //     // Step 5 -- Define the shape of the direct columns.
00424 //     // (this could have been done in the column description).
00425 //     newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
00426 //     newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
00427 // 
00428 //     // Step 6 -- Finally, create the table consisting of 10 rows.
00429 //     Table tab(newtab, 10);
00430 // 
00431 //     // Now we can fill the table, which is shown in a next section.
00432 //     // The Table destructor will flush the table to the files.
00433 // }
00434 // </srcblock>
00435 // To create a table in memory, only step 6 has to be modified slightly to:
00436 // <srcblock>
00437 //     Table tab(newtab, Table::Memory, 10);
00438 // </srcblock>
00439 
00440 // <ANCHOR NAME="Tables:write">
00441 // <h3>Writing into a Table</h3></ANCHOR>
00442 //
00443 // Once a table has been created or has been opened for read/write,
00444 // you want to write data into it. Before doing that you may have
00445 // to add one or more rows to the table.
00446 // <note role=tip> If a table was created with a given number of rows, you
00447 // do not need to add rows; you may not even be able to do so.
00448 // </note>
00449 //
00450 // When adding new rows to the table, either via the
00451 // <linkto class="Table">Table(...) constructor</linkto>
00452 // or via the
00453 // <linkto class="Table">Table::addRow(...)</linkto>
00454 // function, you can choose to have those rows initialized with the
00455 // default values given in the description.
00456 //
00457 // To actually write the data into the table you need the classes
00458 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto> and
00459 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
00460 // For each column you can construct one or
00461 // more of these objects. Their put(...) functions
00462 // let you write a value at a time or the entire column in one go.
00463 // For arrays you can "put" subsections of the arrays.
00464 //
00465 // As an alternative for scalars of a standard data type (i.e. Bool,
00466 // uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
00467 // and String) you could use the functions
00468 // <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
00469 // These functions offer an extra: automatic data type promotion; so that
00470 // you can, for example, put a float value in a double column.
00471 //
00472 // A typical program could look like:
00473 // <srcblock>
00474 // #include <tables/Tables/TableDesc.h>
00475 // #include <tables/Tables/SetupNewTab.h>
00476 // #include <tables/Tables/Table.h>
00477 // #include <tables/Tables/ScaColDesc.h>
00478 // #include <tables/Tables/ArrColDesc.h>
00479 // #include <tables/Tables/ScalarColumn.h>
00480 // #include <tables/Tables/ArrayColumn.h>
00481 // #include <casa/Arrays/Vector.h>
00482 // #include <casa/Arrays/Slicer.h>
00483 // #include <casa/Arrays/ArrayMath.h>
00484 // #include <iostream>
00485 // 
00486 // main()
00487 // {
00488 //     // First build the table description.
00489 //     TableDesc td("tTableDesc", "1", TableDesc::Scratch);
00490 //     td.comment() = "A test of class SetupNewTable";
00491 //     td.addColumn (ScalarColumnDesc<Int> ("ac"));
00492 //     td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
00493 // 
00494 //     // Setup a new table from the description,
00495 //     // and create the (still empty) table.
00496 //     // Note that since we do not explicitly bind columns to
00497 //     // data managers, all columns will be bound to the default
00498 //     // standard storage manager StandardStMan.
00499 //     SetupNewTable newtab("newtab.data", td, Table::New);
00500 //     Table tab(newtab);
00501 //
00502 //     // Construct the various column objects.
00503 //     // Their data type has to match the data type in the description.
00504 //     ScalarColumn<Int> ac (tab, "ac");
00505 //     ArrayColumn<Float> arr2 (tab, "arr2");
00506 //     Vector<Float> vec2(100);
00507 //
00508 //     // Write the data into the columns.
00509 //     // In each cell arr2 will be a vector of length 100.
00510 //     // Since its shape is not set explicitly, it is done implicitly.
00511 //     for (uInt i=0; i<10; i++) {
00512 //         tab.addRow();               // First add a row.
00513 //         ac.put (i, i+10);           // value is i+10 in row i
00514 //         indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
00515 //         arr2.put (i, vec2); 
00516 //     }
00517 //
00518 //     // Finally, show the entire column ac,
00519 //     // and show the 10th element of arr2.
00520 //     cout << ac.getColumn();
00521 //     cout << arr2.getColumn (Slicer(Slice(10)));
00522 //
00523 //     // The Table destructor writes the table.
00524 // }
00525 // </srcblock>
00526 //
00527 // In this example we added rows in the for loop, but we could also have
00528 // created 10 rows straightaway by constructing the Table object as:
00529 // <srcblock>
00530 //     Table tab(newtab, 10);
00531 // </srcblock>
00532 // in which case we would not include
00533 // <srcblock>
00534 //     tab.addRow()
00535 // </srcblock>
00536 //
00537 // The classes 
00538 // <linkto class="TableColumn:description">TableColumn</linkto>,
00539 // <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>, and
00540 // <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>
00541 // contain several functions to put values into a single cell or into the
00542 // whole column. This may look confusing, but is actually quite simple.
00543 // The functions can be divided in two groups:
00544 // <ol>
00545 //  <li>
00546 //   Put the given value into the column cell(s).
00547 //   <ul>
00548 //    <li>
00549 //     The simplest put functions,
00550 //     <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
00551 //     <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
00552 //     put a value into the given column cell. For convenience, there is an
00553 //     <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
00554 //     to put only a part of the array.
00555 //    <li>
00556 //     <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
00557 //     <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
00558 //     fill an entire column by putting the given value into all the cells
00559 //     of the column.
00560 //    <li>
00561 //     The simplest putColumn functions,
00562 //     <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
00563 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
00564 //     put an array of values into the column. There is a special
00565 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
00566 //     version which puts only a part of the arrays.
00567 //   </ul>
00568 //
00569 //  <li>
00570 //   Copy values from another column to this column.<BR>
00571 //   These functions have the advantage that the
00572 //   data type of the input and/or output column can be unknown.
00573 //   The generic (RO)TableColumn objects can be used for this purpose.
00574 //   The put(Column) function checks the data types and, if possible,
00575 //   converts them. If the conversion is not possible, it throws an
00576 //   exception.
00577 //   <ul>
00578 //    <li>
00579 //     The put functions copy the value in a cell of the input column
00580 //     to a cell in the output column. The row numbers of the cells
00581 //     in the columns can be different.
00582 //    <li>
00583 //     The putColumn functions copy the entire contents of the input column
00584 //     to the output column. The lengths of the columns must be equal.
00585 //   </ul>
00586 //   Each class has its own set of these functions.
00587 //   <ul>
00588 //    <li>
00589 //     <linkto class="TableColumn">TableColumn::put(...)</linkto> and
00590 //     <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
00591 //     are the most generic. They can be
00592 //     used if the data types of both input and output column are unknown.
00593 //     Note that these functions are virtual.
00594 //    <li>
00595 //     <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
00596 //     <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
00597 //     <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
00598 //     <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
00599 //     are less generic and therefore potentially more efficient.
00600 //     The most efficient variants are the ones taking a
00601 //     ROScalar/ArrayColumn&lt;T&gt;, because they require no data type
00602 //     conversion.
00603 //   </ul>
00604 // </ol>
00605 
00606 // <ANCHOR NAME="Tables:row-access">
00607 // <h3>Accessing rows in a Table</h3></ANCHOR>
00608 //
00609 // Apart from accessing a table column-wise as described in the
00610 // previous two sections, it is also possible to access a table row-wise.
00611 // The <linkto class=TableRow>TableRow</linkto> class makes it possible
00612 // to access multiple fields in a table row as a whole. Note that like the
00613 // XXColumn classes described above, there is also an ROTableRow class
00614 // for access to readonly tables.
00615 // <p>
00616 // On construction of a TableRow object it has to be specified which
00617 // fields (i.e. columns) are part of the row. For these fields a
00618 // fixed structured <linkto class=TableRecord>TableRecord</linkto>
00619 // object is constructed as part of the TableRow object. The TableRow::get
00620 // function will fill this record with the table data for the given row.
00621 // The user has access to the record and can use
00622 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
00623 // speedier access to the record.
00624 // <p>
00625 // The class could be used as shown in the following example.
00626 // <srcblock>
00627 // // Open the table as readonly and define a row object to contain
00628 // // the given columns.
00629 // // Note that the function stringToVector is a very convenient
00630 // // way to construct a Vector<String>.
00631 // // Show the description of the fields in the row.
00632 // Table table("Some.table");
00633 // ROTableRow row (table, stringToVector("col1,col2,col3"));
00634 // cout << row.record().description();
00635 // // Since the structure of the record is known, the RecordFieldPtr
00636 // // objects could be used to allow for easy and fast access to
00637 // // the record which is refilled for each get.
00638 // RORecordFieldPtr<String> col1(row.record(), "col1");
00639 // RORecordFieldPtr<Double> col2(row.record(), "col2");
00640 // RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
00641 // for (uInt i=0; i<table.nrow(); i++) {
00642 //     row.get (i);
00643 //     someString = *col1;
00644 //     somedouble = *col2;
00645 //     someArrayInt = *col3;
00646 // }
00647 // </srcblock>
00648 // The description of TableRow contains some more extensive examples.
00649 
00650 // <ANCHOR NAME="Tables:select and sort">
00651 // <h3>Table Selection and Sorting</h3></ANCHOR>
00652 //
00653 // The result of a select and sort of a table is another table,
00654 // which references the original table. This means that an update
00655 // of a sorted or selected table results in the update of the original
00656 // table. The result is, however, a table in itself, so all table
00657 // functions (including select and sort) can be used with it.
00658 // Note that a true copy of such a reference table can be made with
00659 // the <linkto class=Table>Table::deepCopy</linkto> function.
00660 // <p>
00661 // Rows or columns can be selected from a table. Columns can be selected
00662 // by the
00663 // <linkto class="Table">Table::project(...)</linkto>
00664 // function, while rows can be selected by the various
00665 // <linkto class="Table">Table operator()</linkto> functions.
00666 // Usually a row is selected by giving a select expression with
00667 // <linkto class="TableExprNode:description">TableExprNode</linkto>
00668 // objects. These objects represent the various nodes
00669 // in an expression, e.g. a constant, a column, or a subexpression.
00670 // The Table function
00671 // <linkto class="Table">Table::col(...)</linkto>
00672 // creates a TableExprNode object for a column. The function
00673 // <linkto class="Table">Table::key(...)</linkto>
00674 // does the same for a keyword by reading
00675 // the keyword value and storing it as a constant in an expression node.
00676 // All column nodes in an expression must belong to the same table,
00677 // otherwise an exception is thrown.
00678 // In the following example we select all rows with RA>10:
00679 // <srcblock>
00680 //    #include <tables/Tables/ExprNode.h>
00681 //    Table table ("Table.name");
00682 //    Table result = table (table.col("RA") > 10);
00683 // </srcblock>
00684 // while in the next one we select rows with RA and DEC in the given 
00685 // intervals:
00686 // <srcblock>
00687 //    Table result = table (table.col("RA") > 10
00688 //                       && table.col("RA") < 14
00689 //                       && table.col("DEC") >= -10
00690 //                       && table.col("DEC") <= 10);
00691 // </srcblock>
00692 // The following operators can be used to form arbitrarily
00693 // complex expressions:
00694 // <ul>
00695 //  <li> Relational operators ==, !=, >, >=, < and <=.
00696 //  <li> Logical operators &&, || and !.
00697 //  <li> Arithmetic operators +, -, *, /, %, and unary + and -.
00698 //  <li> Bit operators ^, &, |, and unary ~.
00699 //  <li> Operator() to take a subsection of an array.
00700 // </ul>
00701 // Many functions (like sin, max, conj) can be used in an expression.
00702 // Class <linkto class=TableExprNode>TableExprNode</linkto> shows
00703 // the available functions.
00704 // E.g.
00705 // <srcblock>
00706 //    Table result = table (sin (table.col("RA")) > 0.5);
00707 // </srcblock>
00708 // Function <src>in</src> can be used to select from a set of values.
00709 // A value set can be constructed using class
00710 // <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
00711 // <srcblock>
00712 //    TableExprNodeSet set;
00713 //    set.add (TableExprNodeSetElem ("abc"));
00714 //    set.add (TableExprNodeSetElem ("defg"));
00715 //    set.add (TableExprNodeSetElem ("h"));
00716 //    Table result = table (table.col("NAME).in (set));
00717 // </srcblock>
00718 // select rows with a NAME equal to <src>abc</src>,
00719 // <src>defg</src>, or <src>h</src>.
00720 //
00721 // <p>
00722 // You can sort a table on one or more columns containing scalars.
00723 // In this example we simply sort on column RA (default is ascending):
00724 // <srcblock>
00725 //    Table table ("Table.name");
00726 //    Table result = table.sort ("RA");
00727 // </srcblock>
00728 // Multiple
00729 // <linkto class="Table">Table::sort(...)</linkto>
00730 // functions exist which allow for more flexible control over the sort order.
00731 // In the next example we sort first on RA in descending order
00732 // and then on DEC in ascending order:
00733 // <srcblock>
00734 //    Table table ("Table.name");
00735 //    Block<String> sortKeys(2);
00736 //    Block<int>    sortOrders(2);
00737 //    sortKeys(0)   = "RA";
00738 //    sortOrders(0) = Sort::Descending;
00739 //    sortKeys(1)   = "DEC";
00740 //    sortOrders(1) = Sort::Ascending;
00741 //    Table result = table.sort (sortKeys, sortOrders);
00742 // </srcblock>
00743 //
00744 // Tables stemming from the same root, can be combined in several
00745 // ways with the help of the various logical
00746 // <linkto class="Table">Table operators</linkto> (operator|, etc.).
00747 
00748 // <h4>Table Query Language</h4>
00749 // The selection and sorting mechanism described above can only be used
00750 // in a hard-coded way in a C++ program.
00751 // There is, however, another way. Strings containing selection and
00752 // sorting commands can be used.
00753 // The syntax of these commands is based on SQL and is described in the
00754 // <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
00755 // The language supports UDFs (User Defined Functions) in dynamically
00756 // loadable libraries as explained in the note.
00757 // <br>A TaQL command can be executed with the static function
00758 // <src>tableCommand</src> defined in class
00759 // <linkto class=TableParse>TableParse</linkto>.
00760 
00761 // <ANCHOR NAME="Tables:concatenation">
00762 // <h3>Table Concatenation</h3></ANCHOR>
00763 // Tables with identical descriptions can be concatenated in a virtual way
00764 // using the Table concatenation constructor. Such a Table object behaves
00765 // as any other Table object, thus any operation can be performed on it.
00766 // An identical description means that the number of columns, the column names,
00767 // and their data types of the columns must be the same. The columns do not
00768 // need to be ordered in the same way nor to be stored in the same way.
00769 // <br>Note that if tables have different column names, it is possible
00770 // to form a projection (as described in the previous section) first
00771 // to make them appear identical.
00772 //
00773 // Sometimes a MeasurementSet is partitioned, for instance in chunks of
00774 // one hour. All those chunks can be virtually concatenated this way.
00775 // Note that all tables in the concatenation will be opened, thus one might
00776 // run out of file descriptors if there are many chunks.
00777 //
00778 // Similar to reference tables, it is possible to make a concatenated Table
00779 // persistent by using the <src>rename</src> function. It will not copy the
00780 // data; only the names of the tables used are written.
00781 //
00782 // The keywords of a concatenated table are taken from the first table.
00783 // It is possible to change or add keywords, but that is not persistent,
00784 // not even if the concatenated table is made persistent.
00785 // <br>The keywords holding subtables can be handled in a special way.
00786 // Normally the subtables of the concatenation are the subtables of the first
00787 // table are used, but is it possible to concatenate subtables as well by
00788 // giving their names in the constructor.
00789 // In this way the, say, SYSCAL subtable of a MeasurementSet can be
00790 // concatenated as well.
00791 // <srcblock>
00792 //   // Create virtual concatenation of ms0 and ms1.
00793 //   Block<String> names(2);
00794 //   names[0] = "ms0";
00795 //   names[1] = "ms1";
00796 //   // Also concatenate their SYSCAL subtables.
00797 //   Block<String> subNames(1, "SYSCAL");
00798 //   Table concTab (names, subNames);
00799 // </srcblock>
00800 
00801 // <ANCHOR NAME="Tables:iterate">
00802 // <h3>Table Iterators</h3></ANCHOR>
00803 //
00804 // You can iterate through a table in an arbitrary order by getting
00805 // a subset of the table consisting of the rows in which the iteration
00806 // columns have the same value.
00807 // An iterator object is created by constructing a
00808 // <linkto class="TableIterator:description">TableIterator</linkto>
00809 // object with the appropriate column names.
00810 //
00811 // In the next example we define an iteration on the columns Time and
00812 // Baseline. Each iteration step returns a table subset in which Time and
00813 // Baseline have the same value.
00814 //
00815 // <srcblock>
00816 //    // Iterate over Time and Baseline (by default in ascending order).
00817 //    // Time is the main iteration order, thus the first column specified.
00818 //    Table t;
00819 //    Table tab ("UV_Table.data");
00820 //    Block<String> iv0(2);
00821 //    iv0[0] = "Time";
00822 //    iv0[1] = "Baseline";
00823 //    //
00824 //    // Create the iterator. This will prepare the first subtable.
00825 //    TableIterator iter(tab, iv0);
00826 //    Int nr = 0;
00827 //    while (!iter.pastEnd()) {
00828 //        // Get the first subtable.
00829 //        // This will contain rows with equal Time and Baseline.
00830 //        t = iter.table();
00831 //        cout << t.nrow() << " ";
00832 //        nr++;
00833 //        // Prepare the next subtable with the next Time,Baseline value.
00834 //        iter.next();
00835 //    }
00836 //    cout << endl << nr << " iteration steps" << endl;
00837 // </srcblock>
00838 //
00839 // You can define more than one iterator on the same table; they operate
00840 // independently.
00841 //
00842 // Note that the result of each iteration step is a table in itself which
00843 // references the original table, just as in the case of a sort or select.
00844 // This means that the resulting table can be used again in a sort, select,
00845 // iteration, etc..
00846 
00847 // <ANCHOR NAME="Tables:vectors">
00848 // <h3>Table Vectors</h3></ANCHOR>
00849 //
00850 // A table vector makes it possible to treat a column in a table
00851 // as a vector. Almost all operators and functions defined for normal
00852 // vectors, are also defined for table vectors. So it is, for instance,
00853 // possible to add a constant to a table vector. This has the effect
00854 // that the underlying column gets changed.
00855 //
00856 // You can use the templated classes
00857 // <linkto class="ROTableVector:description">ROTableVector</linkto> and
00858 // <linkto class="TableVector:description">TableVector</linkto> and
00859 // to define a table vector (readonly and read/write, respectively) for
00860 // a scalar column. Columns containing arrays or tables are not supported.
00861 // The data type of the (RO)TableVector object must match the
00862 // data type of the column.
00863 // A table vector can also hold a normal vector so that (temporary)
00864 // results of table vector operations can be handled.
00865 //
00866 // In the following example we double the data in column COL1 and
00867 // store the result in a temporary table vector.
00868 // <srcblock>
00869 //    // Create a table vector for column COL1.
00870 //    // It has to be a ROTableVector, because the table is opened
00871 //    // as readonly.
00872 //    Table tab ("Table.data");
00873 //    ROTableVector<Int> tabvec(tab, "COL1");
00874 //    // Multiply it by a constant.
00875 //    // The result has to be stored in a TableVector,
00876 //    // since a ROTableVector cannot be written to.
00877 //    TableVector<Int> temp = 2 * tabvec;
00878 // </srcblock>
00879 //
00880 // In the next example we double the data in COL1 and put the result back
00881 // in the column.
00882 // <srcblock>
00883 //    // Create a table vector for column COL1.
00884 //    // It has to be a TableVector to be able to change the column.
00885 //    Table tab ("Table.data", Table::Update);
00886 //    TableVector<Int> tabvec(tab, "COL1");
00887 //    // Multiply it by a constant.
00888 //    tabvec *= 2;
00889 // </srcblock>
00890 
00891 // <ANCHOR NAME="Tables:keywords">
00892 // <h3>Table Keywords</h3></ANCHOR>
00893 //
00894 // Any number of keyword/value pairs may be attached to the table as a whole,
00895 // or to any individual column. They may be freely added, retrieved,
00896 // re-assigned, or deleted. They are, in essence, a self-resizing list of
00897 // values (any of the primitive types) indexed by Strings (the keyword).
00898 //
00899 // A table keyword/value pair might be
00900 // <srcblock>
00901 //      Observer = Grote Reber
00902 //      Date = 10 october 1942
00903 // </srcblock>
00904 // Column keyword/value pairs might be
00905 // <srcblock>
00906 //      Units = mJy
00907 //      Reference Pixel = 320
00908 // </srcblock>
00909 // The class 
00910 // <linkto class="TableRecord:description">TableRecord</linkto>
00911 // represents the keywords in a table.
00912 // It is (indirectly) derived from the standard record classes in the class
00913 // <linkto class="Record:description">Record</linkto>
00914 
00915 // <ANCHOR NAME="Tables:Table Description">
00916 // <h3>Table Description</h3></ANCHOR>
00917 //
00918 // A table contains a description of itself, which defines the layout of the
00919 // columns and the keyword sets for the table and for the individual columns.
00920 // It may also define initial keyword sets and default values for the columns.
00921 // Such a default value is automatically stored in a cell in the table column,
00922 // whenever a row is added to the table.
00923 //
00924 // The creation of the table descriptor is the first step in the creation of
00925 // a new table. The description is part of the table itself, but may also
00926 // exist in a separate file. This is useful if you need to create a number
00927 // of tables with the same structure; in other circumstances it probably
00928 // should be avoided.
00929 //
00930 // The public classes to set up a table description are:
00931 // <ul>
00932 //  <li> <linkto class="TableDesc:description">TableDesc</linkto>
00933 //       -- holds the table description.
00934 //  <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
00935 //       -- holds a generic column description.
00936 //  <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc&lt;T&gt;
00937 //       </linkto>
00938 //       -- defines a column containing a scalar value.
00939 //  <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
00940 //       </linkto>
00941 //       -- defines a column containing a scalar record value.
00942 //  <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc&lt;T&gt;
00943 //       </linkto>
00944 //       -- defines a column containing an (in)direct array.
00945 // </ul>
00946 //
00947 // Here follows a typical example of the construction of a table
00948 // description. For more specialized things -- like the definition of a
00949 // default data manager -- we refer to the descriptions of the above
00950 // mentioned classes.
00951 //
00952 // <srcblock>
00953 // #include <tables/Tables/TableDesc.h>
00954 // #include <tables/Tables/ScaColDesc.h>
00955 // #include <tables/Tables/ArrColDesc.h>
00956 // #include <aips/Tables/ScaRecordTabDesc.h>
00957 // #include <tables/Tables/TableRecord.h>
00958 // #include <casa/Arrays/IPosition.h>
00959 // #include <casa/Arrays/Vector.h>
00960 //
00961 // main()
00962 // {
00963 //     // Create a new table description
00964 //     // Define a comment for the table description.
00965 //     // Define some keywords.
00966 //     ColumnDesc colDesc1, colDesc2;
00967 //     TableDesc td("tTableDesc", "1", TableDesc::New);
00968 //     td.comment() = "A test of class TableDesc";
00969 //     td.rwKeywordSet().define ("ra" float(3.14));
00970 //     td.rwKeywordSet().define ("equinox", double(1950));
00971 //     td.rwKeywordSet().define ("aa", Int(1));
00972 //
00973 //     // Define an integer column ab.
00974 //     td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
00975 //
00976 //     // Add a scalar integer column ac, define keywords for it
00977 //     // and define a default value 0.
00978 //     // Overwrite the value of keyword unit.
00979 //     ScalarColumnDesc<Int> acColumn("ac");
00980 //     acColumn.rwKeywordSet().define ("scale" Complex(0,0));
00981 //     acColumn.rwKeywordSet().define ("unit", "");
00982 //     acColumn.setDefault (0);
00983 //     td.addColumn (acColumn);
00984 //     td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
00985 //
00986 //     // Add a scalar string column ad and define its comment string.
00987 //     td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
00988 //
00989 //     // Now define array columns.
00990 //     // This one is indirect and has no dimensionality mentioned yet.
00991 //     td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
00992 //     // This one is indirect and has 3-dim arrays.
00993 //     td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
00994 //     // This one is direct and has 2-dim arrays with axes length 4 and 7.
00995 //     td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
00996 //                                          IPosition(2,4,7),
00997 //                                          ColumnDesc::Direct));
00998 //
00999 //     // Add columns containing records.
01000 //     td.addColumn (ScalarRecordColumnDesc ("Rec1"));
01001 // }
01002 // </srcblock>
01003 
01004 // <ANCHOR NAME="Tables:Data Managers">
01005 // <h3>Data Managers</h3></ANCHOR>
01006 //
01007 // Data managers take care of the actual access to the data in a column.
01008 // There are two kinds of data managers:
01009 // <ol>
01010 //  <li> <A HREF="#Tables:storage managers">Storage managers</A> --
01011 //   which store the data as such. They can only handle the standard
01012 //   data type (Bool,...,String) as discussed in the section about the
01013 //   <A HREF="#Tables:properties">table properties</A>).
01014 //  <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
01015 //   -- which manipulate the data.
01016 //   An engine could be a simple thing like scaling the data (as done
01017 //   in classic AIPS to reduce data storage), but it could also be an
01018 //   elaborate thing like applying corrections on-the-fly.
01019 //   <br>An engine must be used to store data objects with a non-standard type.
01020 //   It has to break down the object into items with standard data types
01021 //   which can be stored with a storage manager.
01022 // </ol>
01023 // In general the user of a table does not need to be aware which
01024 // data managers are being used underneath. Only when the table is created
01025 // data managers have to be bound to the columns. Thereafter it is
01026 // completely transparent.
01027 //
01028 // Data managers needs to be registered, so they can be found when a table is
01029 // opened. All data managers mentioned below are part of the system and
01030 // pre-registered.
01031 // It is, however, also possible to load data managers on demand. If a data
01032 // manager is not registered it is tried to load a shared library with the
01033 // part of the data manager name (in lowercase) before a dot or left arrow.
01034 // The dot makes it possible to have multiple data managers in a shared library,
01035 // while the left arrow is meant for templated data manager classes.
01036 // <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
01037 // library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
01038 // successful, its function <src>register_bitflagsengine()</src> will be
01039 // executed which should register the data manager(s). Thereafter it is known
01040 // and will be used. For example in a file Register.h and Register.cc:
01041 // <srcblock>
01042 //   // Declare in .h file as C function, so no name mangling is done.
01043 //   extern "C" {
01044 //     void register_bitflagsengine();
01045 //   }
01046 //   // Implement in .cc file.
01047 //   void register_bitflagsengine()
01048 //   {
01049 //     BitFlagsEngine<uChar>::registerClass();
01050 //     BitFlagsEngine<Short>::registerClass();
01051 //     BitFlagsEngine<Int>::registerClass();
01052 //   }
01053 // </srcblock>
01054 // There are several functions that can give information which data managers
01055 // are used for which columns and to obtain the characteristics and properties
01056 // of them. Class RODataManAccessor and derived classes can be used for it
01057 // as well as the functions <src>dataManagerInfo</src> and
01058 // <src>showStructure</src> in class Table.
01059 
01060 // <ANCHOR NAME="Tables:storage managers">
01061 // <h3>Storage Managers</h3></ANCHOR>
01062 //
01063 // Several storage managers are currently supported.
01064 // The default and preferred storage manager is <src>StandardStMan</src>.
01065 // Other storage managers should only be used if they pay off in
01066 // file space (like <src>IncrementalStMan</src> for slowly varying data)
01067 // or access speed (like the tiled storage managers for large data arrays).
01068 // <br>The storage managers store the data in a big or little endian
01069 // canonical format. The format can be specified when the table is created.
01070 // By default it uses the endian format as specified in the aipsrc variable
01071 // <code>table.endianformat</code> which can have the value local, big,
01072 // or little. The default is local.
01073 // <ol>
01074 //  <li>
01075 //   <linkto class="StandardStMan:description">StandardStMan</linkto>
01076 //   stores all the values in so-called buckets (equally sized chunks
01077 //   in the file). It requires little memory.
01078 //   <br>It replaces the old <src>StManAipsIO</src>.
01079 //
01080 //  <li>
01081 //   <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
01082 //   uses a storage mechanism resembling "incremental backups". A value
01083 //   is only stored if it is different from the previous row. It is
01084 //   very well suited for slowly varying data.
01085 //   <br>The class <linkto class="ROIncrementalStManAccessor:description">
01086 //   ROIncrementalStManAccessor</linkto> can be used to tune the
01087 //   behaviour of the <src>IncrementalStMan</src>. It contains functions
01088 //   to deal with the cache size and to show the behaviour of the cache.
01089 //
01090 //  <li>
01091 //   The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
01092 //   store the data as a tiled hypercube allowing for more or less equally
01093 //   efficient data access along all main axes. It can be used for
01094 //   UV-data as well as for image data.
01095 //
01096 //  <li>
01097 //   <linkto class="StManAipsIO:description">StManAipsIO</linkto>
01098 //   uses <src>AipsIO</src> to store the data in the columns.
01099 //   It supports all table functionality, but its I/O is probably not
01100 //   as efficient as other storage managers. It also requires that
01101 //   a large part of the table fits in memory.
01102 //   <br>It should not be used anymore, because it uses a lot of memory
01103 //   for larger tables and because it is not very robust in case an
01104 //   application or system crashes.
01105 //
01106 //  <li>
01107 //   <linkto class="MemoryStMan:description">MemoryStMan</linkto>
01108 //   holds the data in memory. It means that data 'stored' with this
01109 //   storage manager are NOT persistent.
01110 //   <br>This storage manager is primarily meant for tables held in
01111 //   memory, but it can also be useful for temporary columns in
01112 //   normal tables. Note, however, that if a table is accessed
01113 //   concurrently from multiple processes, MemoryStMan data cannot be
01114 //   synchronized.
01115 // </ol>
01116 //
01117 // The storage manager framework makes it possible to support arbitrary files
01118 // as tables. This has been used in a case where a file is filled
01119 // by the data acquisition system of a telescope. The file is simultaneously
01120 // used as a table using a dedicated storage manager. The table
01121 // system and storage manager provide a sync function to synchronize
01122 // the processes, i.e. to make the table system aware of changes
01123 // in the file size (thus in the table size) by the filling process.
01124 //
01125 // <note role=tip>
01126 // Not all data managers support all the table functionality. So, the choice
01127 // of a data manager can greatly influence the type of operations you can do
01128 // on the table as a whole.
01129 // For example, if a column uses the tiled storage manager,
01130 // it is not possible to delete rows from the table, because that storage
01131 // manager will not support deletion of rows.
01132 // However, it is always possible to delete all columns of a data
01133 // manager in one single call.
01134 // </note>
01135 
01136 // <ANCHOR NAME="Tables:TiledStMan">
01137 // <h3>Tiled Storage Manager</h3></ANCHOR>
01138 // The Tiled Storage Managers allow one to store the data of
01139 // one or more columns in a tiled way. Tiling means
01140 // that the data are stored without a preferred order to make access
01141 // along the different main axes equally efficient. This is done by
01142 // storing the data in so-called tiles (i.e. equally shaped subsets of an
01143 // array) to increase data locality. The user can define the tile shape
01144 // to optimize for the most frequently used access.
01145 // <p>
01146 // The Tiled Storage Manager has the following properties:
01147 // <ul>
01148 //  <li> There can be more than one Tiled Storage Manager in
01149 //       a table; each with its own (unique) name.
01150 //  <li> Each Tiled Storage Manager can store an
01151 //       N-dimensional so-called hypercolumn.
01152 //       Elaborate hypercolumns can be defined using
01153 //       <linkto file="TableDesc.h#defineHypercolumn">
01154 //       TableDesc::defineHypercolumn</linkto>).
01155 //       <br>Note that defining a hypercolumn is only necessary if it
01156 //       contains multiple columns or if the TiledDataStMan is used.
01157 //       It means that in practice it is hardly ever needed to define a
01158 //       hypercolumn.
01159 //       <br>A hypercolumn consists of up to three types of columns:
01160 //       <dl>
01161 //        <dt> Data columns
01162 //        <dd> contain the data to be stored in a tiled way. This will
01163 //             be done in tiled hypercubes.
01164 //             There must be at least one data column.
01165 //             <br> For example: a table contains UV-data with
01166 //                  data columns "Visibility" and "Weight".
01167 //        <dt> Coordinate columns
01168 //        <dd> define the world coordinates of the pixels in the data columns.
01169 //             Coordinate columns are optional, but if given there must
01170 //             be N coordinate columns for an N-dimensional hypercolumn.
01171 //             <br>
01172 //             For example: the data in the example above is 4-dimensional
01173 //             and has coordinate columns "Time", "Baseline", "Frequency",
01174 //             and "Polarization".
01175 //        <dt> Id columns
01176 //        <dd> are needed if TiledDataStMan is used.
01177 //             Different rows in the data columns can be stored in different
01178 //             hypercubes. The values in the id column(s) uniquely identify
01179 //             the hypercube a row is stored in.
01180 //             <br>
01181 //             For example: the line and continuum data in a MeasurementSet
01182 //             table need to be stored in 2 different hypercubes (because
01183 //             their shapes are different (see below)). A column containing
01184 //             the type (line or continuum) has to be used as an id column.
01185 //       </dl>
01186 //  <li> If multiple data columns are used, the shape of their data
01187 //       must be conforming in each individual row.
01188 //       If data in different rows have different shapes, they must be
01189 //       stored in different hypercubes, because a hypercube can only hold
01190 //       data with conforming shapes.
01191 //       <br>
01192 //       Thus in the example above, rows with line data will have conforming
01193 //       shapes and can be stored in one hypercube. The continuum data
01194 //       will have another shape and can be stored in another hypercube.
01195 //       <br>
01196 //       The storage manager keeps track of the mapping of rows to/from
01197 //       hypercubes.
01198 //  <li> Each hypercube can be tiled in its own way. It is not required
01199 //       that an integer number of tiles fits in the hypercube. The last
01200 //       tiles will be padded as needed.
01201 //  <li> The last axis of a hypercube can be extensible. This means that
01202 //       the size of that axis does not need to be defined when the
01203 //       hypercube is defined in the storage manager. Instead, the hypercube
01204 //       can be extended when another chunk of data has to be stored.
01205 //       This can be very useful in, for example, a (quasi-)realtime
01206 //       environment where the size of the time axis is not known.
01207 //  <li> If coordinate columns are defined, they describe the coordinates
01208 //       of the axes of the hypercubes. Each hypercube has its own set of
01209 //       coordinates.
01210 //  <li> Data and id columns have to be stored with the Tiled
01211 //       Storage Manager. However, coordinate columns do not need to be
01212 //       stored with the Tiled Storage Manager.
01213 //       Especially in the case where the coordinates for a hypercube axis
01214 //       are varying (i.e. dependent on other axes), another storage manager
01215 //       has to be used (because the Tiled Storage Manager can only
01216 //       hold constant coordinates).
01217 // </ul>
01218 // <p>
01219 // The following Tiled Storage Managers are available:
01220 // <dl>
01221 //  <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
01222 //  <dd> can be seen as a specialization of <src>TiledDataStMan</src>
01223 //       by using the array shape as the id value.
01224 //       Similarly to <src>TiledDataStMan</src> it can maintain multiple
01225 //       hypercubes and store multiple rows in a hypercube, but it is
01226 //       easier to use, because the special <src>addHypercube</src> and
01227 //       <src>extendHypercube</src> functions are not needed.
01228 //       An hypercube is automatically added when a new array shape is
01229 //       encountered.
01230 //       <br>
01231 //       This storage manager could be used for a table with a column
01232 //       containing line and continuum data, which will result
01233 //       in 2 hypercubes.
01234 //  <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
01235 //  <dd> creates (automatically) a new hypercube for each row.
01236 //       Thus each row of the hypercolumn is stored in a separate hypercube.
01237 //       Note that the row number serves as the id value. So an id column
01238 //       is not needed, although there are multiple hypercubes.
01239 //       <br>
01240 //       This storage manager is meant for tables where the data arrays
01241 //       in the different rows are not accessed together. One can think
01242 //       of a column containing images. Each row contains an image and
01243 //       only one image is shown at a time.
01244 //  <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
01245 //  <dd> creates one hypercube for the entire hypercolumn. Thus all cells
01246 //       in the hypercube have to have the same shape and therefore this
01247 //       storage manager is only possible if all columns in the hypercolumn
01248 //       have the attribute FixedShape.
01249 //       <br>
01250 //       This storage manager could be used for a table with a column
01251 //       containing images for the Stokes parameters I, Q, U, and V.
01252 //       By storing them in one hypercube, it is possible to retrieve
01253 //       the 4 Stokes values for a subset of the image or for an individual
01254 //       pixel in a very efficient way.
01255 //  <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
01256 //  <dd> allows one to control the creation and extension of hypercubes.
01257 //       This is done by means of the class
01258 //       <linkto class=TiledDataStManAccessor:description>
01259 //       TiledDataStManAccessor</linkto>.
01260 //       It makes it possible to store, say, row 0-9 in hypercube A,
01261 //       row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
01262 //       <br>
01263 //       The drawback of this storage manager is that its hypercubes are not
01264 //       automatically extended when adding new rows. The special functions
01265 //       <src>addHypercube</src> and <src>extendHypercube</src> have to be
01266 //       used making it somewhat tedious to use.
01267 //       Therefore this storage manager may become obsolete in the near future.
01268 // </dl>
01269 // The Tiled Storage Managers have 3 ways to access and cache the data.
01270 // Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
01271 // access choice and use it in a Table constructor.
01272 // <ul>
01273 //  <li> The old way (the only way until January 2010) uses a cache
01274 //       of its own to keep tiles that might need to be reused. It will always
01275 //       access entire tiles, even if only a small part is needed.
01276 //       It is possible to define a maximum cache size. The description of class
01277 //       <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
01278 //       contains a discussion about the effect of defining a maximum cache
01279 //       size.
01280 //  <li> Memory-mapping the data files. In this way the operating system
01281 //       takes care of the IO and caching. However, the limited address space
01282 //       may preclude using it for large tables on 32-bit systems.
01283 //  <li> Use buffered IO and let the kernel's file cache take care of caching.
01284 //       It will access the data in chunks of the given buffer size, so the
01285 //       entire tile does not need to be accessed if only a small part is
01286 //       needed.
01287 // </ul>
01288 // Apart from reading, all access ways described above can also handle writing
01289 // and extending tables. They create fully equal files. Both little and big
01290 // endian data can be read or written.
01291 
01292 // <ANCHOR NAME="Tables:virtual column engines">
01293 // <h3>Virtual Column Engines</h3></ANCHOR>
01294 //
01295 // Virtual column engines are used to implement the virtual (i.e.
01296 // calculated-on-the-fly) columns. The Table system provides
01297 // an abstract base class (or "interface class")
01298 // <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
01299 // that specifies the protocol for these engines.
01300 // The programmer must derive a concrete class to implement
01301 // the application-specific virtual column.
01302 // <p>
01303 // For example: the programmer
01304 // needs a column in a table which is the difference between two other
01305 // columns.  (Perhaps these two other columns are updated periodically
01306 // during the execution of a program.)  A good way to handle this would
01307 // be to have a virtual column in the table, and write a virtual column
01308 // engine which knows how to calculate the difference between corresponding
01309 // cells of the two other columns. So the result is that accessing a
01310 // particular cell of the virtual column invokes the virtual column engine,
01311 // which then gets the values from the other two columns, and returns their
01312 // difference. This particular example could be done using 
01313 // <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
01314 // <p>
01315 // Several virtual column engines exist:
01316 // <ol>
01317 //  <li> The class
01318 //   <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
01319 //   makes it possible to define a column as an arbitrary expression of
01320 //   other columns. It uses the <a href="../notes/199.html">TaQL</a>
01321 //   CALC command. The virtual column can be a scalar or an array and
01322 //   can have one of the standard data types supported by the Table System.
01323 //  <li> The class
01324 //   <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
01325 //   maps an integer bit flags column to a Bool column. A read and write mask
01326 //   can be defined telling which bits to take into account when mapping
01327 //   to and from Bool (thus when reading or writing the Bool).
01328 //  <li> The class
01329 //   <linkto class="CompressFloat:description">CompressFloat</linkto>
01330 //   compresses a single precision floating point array by scaling the
01331 //   values to shorts (16-bit integer).
01332 //  <li> The class
01333 //   <linkto class="CompressComplex:description">CompressComplex</linkto>
01334 //   compresses a single precision complex array by scaling the
01335 //   values to shorts (16-bit integer). In fact, the 2 parts of the complex
01336 //   number are combined to an 32-bit integer.
01337 //  <li> The class
01338 //   <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
01339 //   does the same as CompressComplex, but optimizes for the case where the
01340 //   imaginary part is zero (which is often the case for Single Dish data).
01341 //  <li> The double templated class
01342 //   <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
01343 //   scales the data in an array from, for example,
01344 //   float to short before putting it.
01345 //  <li> The double templated class
01346 //   <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
01347 //   converts the data from one data type to another. Sometimes it might be
01348 //   needed to store the residual data in an MS in double precision.
01349 //   Because the imaging task can only handle single precision, this enigne
01350 //   can be used to map the data from double to single precision.
01351 //  <li> The double templated class
01352 //   <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
01353 //   converts the data from one data type to another with the possibility
01354 //   to reduce the number of dimensions. For example, it can be used to
01355 //   store an 2-d array of StokesVector objects as a 3-d array of floats
01356 //   by treating the 4 data elements as an extra array axis. If the
01357 //   StokesVector class is simple, it can be done very efficiently.
01358 //  <li> The class
01359 //   <linkto class="ForwardColumnEngine:description">
01360 //   ForwardColumnEngine</linkto>
01361 //   forwards the gets and puts on a row in a column to the same row
01362 //   in a column with the same name in another table. This provides
01363 //   a virtual copy of the referenced column.
01364 //  <li> The class
01365 //   <linkto class="ForwardColumnIndexedRowEngine:description">
01366 //   ForwardColumnIndexedRowEngine</linkto>
01367 //   is similar to <src>ForwardColumnEngine.</src>.
01368 //   However, instead of forwarding it to the same row it uses a
01369 //   a column to map its row number to a row number in the referenced
01370 //   table. In this way multiple rows can share the same data.
01371 //   This data manager only allows for get operations.
01372 //  <li> The calibration module has implemented a virtual column engine
01373 //   to do on-the-fly calibration in a transparent way.
01374 // </ol>
01375 // To handle arbitrary data types the templated abstract base class
01376 // <linkto class="VSCEngine:description">VSCEngine</linkto>
01377 // has been written. An example of how to use this class can be
01378 // found in the demo program <src>dVSCEngine.cc</src>.
01379 
01380 // <ANCHOR NAME="Tables:LockSync">
01381 // <h3>Table locking and synchronization</h3></ANCHOR>
01382 //
01383 // Multiple concurrent readers and writers (also via NFS) of a
01384 // table are supported by means of a locking/synchronization mechanism.
01385 // This mechanism is not very sophisticated in the sense that it is
01386 // very coarsely grained. When locking, the entire table gets locked.
01387 // A special lock file is used to lock the table. This lock file also
01388 // contains some synchronization data.
01389 // <p>
01390 // Five ways of locking are supported (see class
01391 // <linkto class=TableLock>TableLock</linkto>):
01392 // <dl>
01393 //  <dt> TableLock::PermanentLocking(Wait)
01394 //  <dd> locks the table permanently (from open till close). This means
01395 //       that one writer OR multiple readers are possible.
01396 //  <dt> TableLock::AutoLocking
01397 //  <dd> does the locking automatically. This is the default mode.
01398 //       This mode makes it possible that a table is shared amongst
01399 //       processes without the user needing to write any special code.
01400 //       It also means that a lock is only released when needed.
01401 //  <dt> TableLock::AutoNoReadLocking
01402 //  <dd> is similar to AutoLocking. However, no lock is acquired when
01403 //       reading the table making it possible to read the table while
01404 //       another process holds a write-lock. It also means that for read
01405 //       purposes no automatic synchronization is done when the table is
01406 //       updated in another process.
01407 //       Explicit synchronization can be done by means of the function
01408 //       <src>Table::resync</src>.
01409 //  <dt> TableLock::UserLocking
01410 //  <dd> requires that the programmer explicitly acquires and releases
01411 //       a lock on the table. This makes some kind of transaction
01412 //       processing possible. E.g. set a write lock, add a row,
01413 //       write all data into the row and release the lock.
01414 //       The Table functions <src>lock</src> and <src>unlock</src>
01415 //       have to be used to acquire and release a (read or write) lock.
01416 //  <dt> TableLock::UserNoReadLocking
01417 //  <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
01418 //       no lock is needed to read the table.
01419 // </dl>
01420 // Synchronization of the processes accessing the same table is done
01421 // by means of the lock file. When a lock is released, the storage
01422 // managers flush their data into the table files. Some synchronization data
01423 // is written into the lock file telling the new number of table rows
01424 // and telling which storage managers have written data.
01425 // This information is read when another process acquires the lock
01426 // and is used to determine which storage managers have to refresh
01427 // their internal caches.
01428 // <br>Note that for the NoReadLocking modes (see above) explicit
01429 // synchronization might be needed using <src>Table::resync</src>.
01430 // <p>
01431 // The function <src>Table::hasDataChanged</src> can be used to check
01432 // if a table is (being) changed by another process. In this way
01433 // a program can react on it. E.g. the table browser can refresh its
01434 // screen when the underlying table is changed.
01435 // <p>
01436 // In general the default locking option will do.
01437 // From the above it should be clear that heavy concurrent access
01438 // results in a lot of flushing, thus will have a negative impact on
01439 // performance. If uninterrupted access to a table is needed,
01440 // the <src>PermanentLocking</src> option should be used.
01441 // If transaction-like processing is done (e.g. updating a table
01442 // containing an observation catalogue), the <src>UserLocking</src>
01443 // option is probably best.
01444 // <p>
01445 // Creation or deletion of a table is not possible if that table
01446 // is still open in another process. The function
01447 // <src>Table::isMultiUsed()</src> can be used to check if a table
01448 // is open in other processes.
01449 // <br>
01450 // The function <src>deleteTable</src> should be used to delete
01451 // a table. Before deleting the table it ensures that it is writable
01452 // and that it is not open in the current or another process
01453 // <p>
01454 // The following example wants to read the table uninterrupted, thus it uses
01455 // the <src>PermanentLocking</src> option. It also wants to wait
01456 // until the lock is actually acquired.
01457 // Note that the destructor closes the table and releases the lock.
01458 // <srcblock>
01459 // // Open the table (readonly).
01460 // // Acquire a permanent (read) lock.
01461 // // It waits until the lock is acquired.
01462 // Table tab ("some.name",
01463 //            TableLock(TableLock::PermanentLockingWait));
01464 // </srcblock>
01465 //
01466 // The following example uses the automatic locking..
01467 // It tells the system to check about every 20 seconds if another
01468 // process wants access to the table.
01469 // <srcblock>
01470 // // Open the table (readonly).
01471 // Table tab ("some.name",
01472 //            TableLock(TableLock::AutoLocking, 20));
01473 // </srcblock>
01474 //
01475 // The following example gets data (say from a GUI) and writes it
01476 // as a row into the table. The lock the table as little as possible
01477 // the lock is acquired just before writing and released immediately
01478 // thereafter.
01479 // <srcblock>
01480 // // Open the table (writable).
01481 // Table tab ("some.name",
01482 //            TableLock(TableLock::UserLocking),
01483 //            Table::Update);
01484 // while (True) {
01485 //     get input data
01486 //     tab.lock();     // Acquire a write lock and wait for it.
01487 //     tab.addRow();
01488 //     write data into the row
01489 //     tab.unlock();   // Release the lock.
01490 // }
01491 // </srcblock>
01492 //
01493 // The following example deletes a table if it is not used in
01494 // another process.
01495 // <srcblock>
01496 // Table tab ("some.name");
01497 // if (! tab.isMultiUsed()) {
01498 //     tab.markForDelete();
01499 // }
01500 // </srcblock>
01501 
01502 // <ANCHOR NAME="Tables:KeyLookup">
01503 // <h3>Table lookup based on a key</h3></ANCHOR>
01504 //
01505 // Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
01506 // user a means to find the rows matching a given key or key range.
01507 // It is a somewhat primitive replacement of a B-tree index and in the
01508 // future it may be replaced by a proper B+-tree implementation.
01509 // <p>
01510 // The <src>ColumnsIndex</src> class makes it possible to build an
01511 // in-core index on one or more columns. Looking a key or key range
01512 // is done using a binary search on that index. It returns a vector
01513 // containing the row numbers of the rows matching the key (range).
01514 // <p>
01515 // The class is not capable of tracing changes in the underlying column(s).
01516 // It detects a change in the number of rows and updates the index
01517 // accordingly. However, it has to be told explicitly when a value
01518 // in the underlying column(s) changes.
01519 // <p>
01520 // The following example shows how the class can be used.
01521 // <example>
01522 // Suppose one has an antenna table with key ANTENNA.
01523 // <srcblock>
01524 // // Open the table and make an index for column ANTENNA.
01525 // Table tab("antenna.tab")
01526 // ColumnsIndex colInx(tab, "ANTENNA");
01527 // // Make a RecordFieldPtr for the ANTENNA field in the index key record.
01528 // // Its data type has to match the data type of the column.
01529 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
01530 // // Now loop in some way and find the row for the antenna
01531 // // involved in that loop.
01532 // Bool found;
01533 // while (...) {
01534 //     // Fill the key field and get the row number.
01535 //     // ANTENNA is a unique key, so only one row number matches.
01536 //     // Otherwise function getRowNumbers had to be used.
01537 //     *antFld = antenna;
01538 //     uInt antRownr = colInx.getRowNumber (found);
01539 //     if (!found) {
01540 //         cout << "Antenna " << antenna << " is unknown" << endl;
01541 //     } else {
01542 //         // antRownr can now be used to get data from that row in
01543 //         // the antenna table.
01544 //     }
01545 // }
01546 // </srcblock>
01547 // </example>
01548 // <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
01549 // advanced example. It shows how to use a private compare function
01550 // to adjust the lookup if the index does not contain single
01551 // key values, but intervals instead. This is useful if a row in
01552 // a (sub)table is valid for, say, a time range instead of a single
01553 // timestamp.
01554 
01555 // <ANCHOR NAME="Tables:performance">
01556 // <h3>Performance and robustness considerations</h3></ANCHOR>
01557 //
01558 // The Table System resembles a database system, but it is not as robust.
01559 // It lacks the transaction and logging facilities common to data base systems.
01560 // It means that in case of a crash data might be lost.
01561 // To reduce the risk of data loss to
01562 // a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
01563 // with an <tt>fsync</tt> to ensure that all data are really written.
01564 // However, that can degrade the performance because it involves extra writes.
01565 // So one should find the right balance between robustness and performance.
01566 //
01567 // To get a good feeling for the performance issues, it is important to
01568 // understand some of the internals of the Table System.
01569 // <br>The storage managers drive the performance. All storage managers use
01570 // buckets (called tiles for the TiledStMan) which contain the data.
01571 // All IO is done by bucket. The bucket/tile size is defined when creating
01572 // the storage manager objects. Sometimes the default will do, but usually
01573 // it is better to set it explicitly.
01574 //
01575 // It is best to do a flush when a tile is full.
01576 // For example: <br>
01577 // When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
01578 // or N*(N+1) if auto-correlations are stored as well) it makes sense to
01579 // store, say, N/2 rows in a tile and do a flush each time all baselines
01580 // are written. In that way tiles are fully filled when doing the flush, so
01581 // no extra IO is involved.
01582 // <br>Here is some code showing this when creating a MeasurementSet.
01583 // The code should speak for itself.
01584 // <srcblock>
01585 // MS* createMS (const String& msName, int nrchan, int nrant)
01586 // {
01587 //   // Get the MS main default table description.
01588 //   TableDesc td = MS::requiredTableDesc();
01589 //   // Add the data column and its unit.
01590 //   MS::addColumnToDesc(td, MS::DATA, 2);
01591 //   td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
01592 //                                                 define("UNIT","Jy");
01593 //   // Store the DATA and FLAG column in two separate files.
01594 //   // In this way accessing FLAG only is much cheaper than
01595 //   // when combining DATA and FLAG.
01596 //   // All data have the same shape, thus use TiledColumnStMan.
01597 //   // Also store UVW with TiledColumnStMan.
01598 //   Vector<String> tsmNames(1);
01599 //   tsmNames[0] = MS::columnName(MS::DATA);
01600 //   td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
01601 //   td.defineHypercolumn("TiledData", 3, tsmNames);
01602 //   tsmNames[0] = MS::columnName(MS::FLAG);
01603 //   td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
01604 //   td.defineHypercolumn("TiledFlag", 3, tsmNames);
01605 //   tsmNames[0] = MS::columnName(MS::UVW);
01606 //   td.defineHypercolumn("TiledUVW", 2, tsmNames);
01607 //   // Setup the new table.
01608 //   SetupNewTable newTab(msName, td, Table::New);
01609 //   // Most columns vary slowly and use the IncrStMan.
01610 //   IncrementalStMan incrStMan("ISMData");
01611 //   // A few columns use he StandardStMan (set an appropriate bucket size).
01612 //   StandardStMan    stanStMan("SSMData", 32768);
01613 //   // Store all pol and freq and some rows in a single tile.
01614 //   // autocorrelations are written, thus in total there are
01615 //   // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
01616 //   // integer number of tiles.
01617 //   TiledColumnStMan tiledData("TiledData",
01618 //                              IPosition(3,4,nchan,(nrant+1)/2));
01619 //   TiledColumnStMan tiledFlag("TiledFlag",
01620 //                              IPosition(3,4,nchan,8*(nrant+1)/2));
01621 //   TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
01622 //                             IPosition(2,3,nrant*(nrant+1)/2));
01623 //   newTab.bindAll (incrStMan);
01624 //   newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
01625 //   newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
01626 //   newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
01627 //   newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
01628 //   newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
01629 //   // Create the MS and its subtables.
01630 //   // Get access to its columns.
01631 //   MS* msp = new MeasurementSet(newTab);
01632 //   // Create all subtables.
01633 //   // Do this after the creation of optional subtables,
01634 //   // so the MS will know about those optional sutables.
01635 //   msp->createDefaultSubtables (Table::New);
01636 //   return msp;
01637 // }
01638 // </srcblock>
01639 
01640 // <h4>Some more performance considerations</h4>
01641 // Which storage managers to use and how to use them depends heavily on
01642 // the type of data and the access patterns to the data. Here follow some
01643 // guidelines:
01644 // <ol>
01645 //  <li> Scalar data can be stored with the StandardStMan (SSM) or
01646 //       IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
01647 //       in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
01648 //       Note that very long strings (longer than the bucketsize) can only
01649 //       be stored with the SSM.
01650 //  <li> Any number of storage managers can be used. In fact, each column
01651 //       can have a storage manager of its own resulting in column-wise
01652 //       stored data which is more and more used in data base systems.
01653 //       In that way a query or sort on that column is very fast, because
01654 //       the buckets to read only contain data of that column.
01655 //       In practice one can decide to combine a few frequently used columns
01656 //       in a storage manager.
01657 //  <li> Array data can be stored with any column manager. Small fixed size
01658 //       arrays can be stored directly with the SSM
01659 //       (or ISM if not changing much).
01660 //       However, they can also be stored with a TiledStMan (TSM) as shown
01661 //       for the UVW column in the example above.
01662 //       <br> Large arrays should usually be stored with a TSM. However,
01663 //       if it must be possible to change the shape of an array after it
01664 //       was stored, the SSM (or ISM) must be used. Note that in that
01665 //       case a lot of disk space can be wasted, because the SSM and ISM
01666 //       store the array data at the end of the file if the array got
01667 //       bigger and do not reuse the old space. The only way to
01668 //       reclaim it is by making a deep copy of the entire table.
01669 //  <li> If an array is stored with a TSM, it is important to decide
01670 //       which TSM to use.
01671 //       <ol>
01672 //        <li> The TiledColumnStMan is the most efficient, but only suitable
01673 //         for arrays having the same shape in the entire column.
01674 //        <li> The TiledShapeStMan is suitable for columns where the arrays
01675 //         can have a few shapes.
01676 //        <li> The TiledCellStMan is suitable for columns where the arrays
01677 //         can have many different shapes.
01678 //       </ol>
01679 //       This is discussed in more detail
01680 //       <a href="#Tables:TiledStMan">above</a>.
01681 //  <li> If storing an array with a TSM, it can be very important to
01682 //       choose the right tile shape. Not only does this define the size
01683 //       of a tile, but it also defines if access in other directions
01684 //       than the natural direction can be fast. It is also discussed in
01685 //       more detail <a href="#Tables:TiledStMan">above</a>.
01686 //  <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
01687 //       and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
01688 //       is used on its own (e.g. in combination with CORRECTED_DATA), it is better
01689 //       to separate them, otherwise tiles containing FLAG also contain DATA making the
01690 //       tiles much bigger, thus more expensive to access.
01691 // </ol>
01692 
01693 // </synopsis>
01694 // </module>
01695 
01696 
01697 
01698 } //# NAMESPACE CASA - END
01699 
01700 #endif