casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ColumnsIndex.h
Go to the documentation of this file.
00001 //# ColumnsIndex.h: Index to one or more columns in a table
00002 //# Copyright (C) 1998,1999,2001,2002
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: ColumnsIndex.h 21298 2012-12-07 14:53:03Z gervandiepen $
00027 
00028 #ifndef TABLES_COLUMNSINDEX_H
00029 #define TABLES_COLUMNSINDEX_H
00030 
00031 
00032 //# Includes
00033 #include <tables/Tables/Table.h>
00034 #include <casa/Arrays/Vector.h>
00035 #include <casa/Containers/Block.h>
00036 #include <casa/Containers/Record.h>
00037 
00038 namespace casa { //# NAMESPACE CASA - BEGIN
00039 
00040 //# Forward Declarations
00041 class String;
00042 class TableColumn;
00043 template<typename T> class RecordFieldPtr;
00044 
00045 // <summary>
00046 // Index to one or more columns in a table.
00047 // </summary>
00048 
00049 // <use visibility=export>
00050 
00051 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndex.cc" demos="">
00052 // </reviewed>
00053 
00054 // <prerequisite>
00055 //   <li> <linkto class=Table>Table</linkto>
00056 //   <li> <linkto class=Record>Record</linkto>
00057 //   <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto>
00058 // </prerequisite>
00059 
00060 // <synopsis>
00061 // This class makes it possible to use transient indices on top
00062 // of tables in order to speed up the process of finding rows
00063 // based on a given key or key range.
00064 // When constructing a <src>ColumnsIndex</src> object, one has to define
00065 // which columns form the key for this index on the given
00066 // <src>table</src> object.
00067 // Only scalar columns are supported. The data in the given columns
00068 // will be read, sorted (if needed), and stored in memory.
00069 // When looking up a key or key range, the class will use a fast binary
00070 // search on the data held in memory.
00071 // <p>
00072 // The <src>ColumnsIndex</src> object contains a
00073 // <linkto class=Record>Record</linkto> object which can be used
00074 // to define the key to be looked up. The record contains a field for
00075 // each column in the index (with the same name and data type).
00076 // The fastest way to fill the key is by creating a
00077 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for
00078 // each field in the record (see the example) and fill it as needed.
00079 // However, one can also use the <src>Record::define</src> function,
00080 // but that is slower.
00081 // <br>
00082 // A second record is available to define the upper key
00083 // when a key range has to be looked up. The keys can be accessed
00084 // using the various <src>accessKey</src> functions.
00085 // <p>
00086 // When a key is defined, the <src>getRowNumbers</src> function can be
00087 // used to find the table rows containing the given key (range).
00088 // Function <src>getRowNumber</src> can be used if all keys in the index
00089 // are unique (which can be tested with the <src>isUnique</src> function).
00090 // <p>
00091 // Instead of using the internal records holding the keys, one can also
00092 // pass its own Record object to <src>getRowNumbers</src>.
00093 // However, it will be slower.
00094 // <p>
00095 // When constructing the object, the user can supply his own compare
00096 // function. The default compare function compares each field of the
00097 // key in the normal way. A user's compare function makes it possible
00098 // to compare in a special way. E.g. one could use near instead of ==
00099 // on floating point fields. Another example (which is shown in one
00100 // of the examples below) makes it possible to find a key in an
00101 // index consisting of a time and width.
00102 // <p>
00103 // After an index is created, it is possible to change the data
00104 // in the underlying columns. However, the <src>ColumnsIndex</src> can
00105 // not detect if the column data have changed. It can only detect if
00106 // the number of rows has changed. If the column data have changed,
00107 // the user has to use the <src>setChanged</src> function to indicate
00108 // that all columns or a particular column has changed.
00109 // <br>If data have changed, the entire index will be recreated by
00110 // rereading and optionally resorting the data. This will be deferred
00111 // until the next key lookup.
00112 // </synopsis>
00113 
00114 // <example>
00115 // Suppose one has an antenna table with key ANTENNA.
00116 // <srcblock>
00117 // // Open the table and make an index for column ANTENNA.
00118 // Table tab("antenna.tab")
00119 // ColumnsIndex colInx(tab, "ANTENNA");
00120 // // Make a RecordFieldPtr for the ANTENNA field in the index key record.
00121 // // Its data type has to match the data type of the column.
00122 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
00123 // // Now loop in some way and find the row for the antenna
00124 // // involved in that loop.
00125 // Bool found;
00126 // while (...) {
00127 //     // Fill the key field and get the row number.
00128 //     // ANTENNA is a unique key, so only one row number matches.
00129 //     // Otherwise function getRowNumbers had to be used.
00130 //     *antFld = antenna;
00131 //     uInt antRownr = colInx.getRowNumber (found);
00132 //     if (!found) {
00133 //         cout << "Antenna " << antenna << " is unknown" << endl;
00134 //     } else {
00135 //         // antRownr can now be used to get data from that row in
00136 //         // the antenna table.
00137 //     }
00138 // }
00139 // </srcblock>
00140 //
00141 // The following example shows how multiple keys can be used and how
00142 // a search on a range can be done.
00143 // <srcblock>
00144 // Table tab("sometable")
00145 // // Note that TIME is the main key.
00146 // // Also note that stringToVector (in ArrayUtil.h) is a handy
00147 // // way to convert a String to a Vector<String>.
00148 // ColumnsIndex colInx(tab, stringToVector("TIME,ANTENNA"));
00149 // // Make a RecordFieldPtr for the fields in lower and upper key records.
00150 // RecordFieldPtr<Double> timeLow(colInx.accessLowerKey(), "TIME");
00151 // RecordFieldPtr<Int> antLow(colInx.accessLowerKey(), "ANTENNA");
00152 // RecordFieldPtr<Double> timeUpp(colInx.accessUpperKey(), "TIME");
00153 // RecordFieldPtr<Int> antUpp(colInx.accessUpperKey(), "ANTENNA");
00154 // while (...) {
00155 //     // Fill the key fields.
00156 //     *timeLow = ...;
00157 //     *antLow = ...;
00158 //     *timeUpp = ...;
00159 //     *antUpp = ...;
00160 //     // Find the row numbers for keys between low and upp (inclusive).
00161 //     Vector<uInt> rows = colInx.getRowNumbers (True, True);
00162 // }
00163 // </srcblock>
00164 //
00165 // The following example shows how a specific compare function
00166 // could look like. A function like this will actually be used in the
00167 // calibration software.
00168 // <br>
00169 // The table for which the index is built, has rows with the TIME as its key.
00170 // However, each row is valid for a given interval, where TIME gives
00171 // the middle of the interval and WIDTH the length of the interval.
00172 // This means that the compare function has to test whether the key
00173 // is part of the interval.
00174 // <srcblock>
00175 // Int myCompare (const Block<void*>& fieldPtrs,
00176 //                const Block<void*>& dataPtrs,
00177 //                const Block<Int>& dataTypes,
00178 //                Int index)
00179 // {
00180 //   // Assert (for performance only in debug mode) that the correct
00181 //   // fields are used.
00182 //   DebugAssert (dataTypes.nelements() == 2, AipsError);
00183 //   DebugAssert (dataTypes[0] == TpDouble  &&  dataTypes[1] == TpDouble,
00184 //                AipsError);
00185 //   // Now get the key to be looked up
00186 //   // (an awfully looking cast has to be used).
00187 //   const Double key = *(*(const RecordFieldPtr<Double>*)(fieldPtrs[0]));
00188 //   // Get the time and width of the entry to be compared.
00189 //   const Double time = ((const Double*)(dataPtrs[0]))[index];
00190 //   const Double width = ((const Double*)(dataPtrs[1]))[index];
00191 //   const Double start = time - width/2;
00192 //   const Double end = time + width/2;
00193 //   // Test if the key is before, after, or in the interval
00194 //   // (representing less, greater, equal).
00195 //   if (key < start) {
00196 //     return -1;
00197 //   } else if (key > end) {
00198 //     return 1;
00199 //   }
00200 //   return 0;
00201 // }
00202 //
00203 // // Now use this compare function in an actual index.
00204 // Table tab("sometable")
00205 // ColumnsIndex colInx(tab, stringToVector("TIME,WIDTH"), myCompare);
00206 // // Make a RecordFieldPtr for the TIME field in the key record.
00207 // // Note that although the WIDTH is part of the index, it is
00208 // // not an actual key. So it does not need to be filled in.
00209 // RecordFieldPtr<Double> time(colInx.accessLowerKey(), "TIME");
00210 // Bool found;
00211 // while (...) {
00212 //     // Fill the key field.
00213 //     *time = ...;
00214 //     // Find the row number for this time.
00215 //     uInt rownr = colInx.getRowNumber (found);
00216 // }
00217 // </srcblock>
00218 // </example>
00219 
00220 // <motivation>
00221 // The calibration software needs to lookup keys in calibration tables
00222 // very frequently. This class makes that process much easier and faster.
00223 // </motivation>
00224 
00225 
00226 class ColumnsIndex
00227 {
00228 public:
00229     // Define the signature of a comparison function.
00230     // The first block contains pointers to <src>RecordFieldPtr<T></src>
00231     // objects holding the key to be looked up.
00232     // The second block contains pointers to the column data.
00233     // The <src>index</src> argument gives the index in the column data.
00234     // The third block contains data types of those blocks (TpBool, etc.).
00235     // The function should return -1 if key is less than data,
00236     // 0 if equal, 1 if greater.
00237     // <br>An example above shows how a compare function can be used.
00238     typedef Int Compare (const Block<void*>& fieldPtrs,
00239                          const Block<void*>& dataPtrs,
00240                          const Block<Int>& dataTypes,
00241                          Int index);
00242 
00243     // Create an index on the given table for the given column.
00244     // The column has to be a scalar column.
00245     // If <src>noSort==True</src>, the table is already in order of that
00246     // column and the sort step will not be done.
00247     // The default compare function is provided by this class. It simply
00248     // compares each field in the key.
00249     ColumnsIndex (const Table&, const String& columnName,
00250                   Compare* compareFunction = 0, Bool noSort = False);
00251 
00252     // Create an index on the given table for the given columns, thus
00253     // the key is formed by multiple columns.
00254     // The columns have to be scalar columns.
00255     // If <src>noSort==True</src>, the table is already in order of those
00256     // columns and the sort step will not be done.
00257     // The default compare function is provided by this class. It simply
00258     // compares each field in the key.
00259     ColumnsIndex (const Table&, const Vector<String>& columnNames,
00260                   Compare* compareFunction = 0, Bool noSort = False);
00261 
00262     // Copy constructor (copy semantics).
00263     ColumnsIndex (const ColumnsIndex& that);
00264 
00265     ~ColumnsIndex();
00266 
00267     // Assignment (copy semantics).
00268     ColumnsIndex& operator= (const ColumnsIndex& that);
00269 
00270     // Are all keys in the index unique?
00271     Bool isUnique() const;
00272 
00273     // Return the names of the columns forming the index.
00274     Vector<String> columnNames() const;
00275 
00276     // Get the table for which this index is created.
00277     const Table& table() const;
00278 
00279     // Something has changed in the table, so the index has to be recreated.
00280     // The 2nd version indicates that a specific column has changed,
00281     // so only that column is reread. If that column is not part of the
00282     // index, nothing will be done.
00283     // <br>Note that the class itself is keeping track if the number of
00284     // rows in the table changes.
00285     // <group>
00286     void setChanged();
00287     void setChanged (const String& columnName);
00288     // </group>
00289 
00290     // Access the key values.
00291     // These functions allow you to create RecordFieldPtr<T> objects
00292     // for each field in the key. In this way you can quickly fill in
00293     // the key.
00294     // <br>The records have a fixed type, so you cannot add or delete fields.
00295     // <group>
00296     Record& accessKey();
00297     Record& accessLowerKey();
00298     Record& accessUpperKey();
00299     // </group>
00300 
00301     // Find the row number matching the key. All keys have to be unique,
00302     // otherwise an exception is thrown.
00303     // If no match is found, <src>found</src> is set to False.
00304     // The 2nd version makes it possible to pass in your own Record
00305     // instead of using the internal record via the <src>accessKey</src>
00306     // functions. Note that the given Record will be copied to the internal
00307     // record, thus overwrites it.
00308     // <group>
00309     uInt getRowNumber (Bool& found);
00310     uInt getRowNumber (Bool& found, const Record& key);
00311     // </group>
00312 
00313     // Find the row numbers matching the key. It should be used instead
00314     // of <src>getRowNumber</src> if the same key can exist multiple times.
00315     // The 2nd version makes it possible to pass in your own Record
00316     // instead of using the internal record via the <src>accessKey</src>
00317     // functions. Note that the given Record will be copied to the internal
00318     // record, thus overwrites it.
00319     // <group>
00320     Vector<uInt> getRowNumbers();
00321     Vector<uInt> getRowNumbers (const Record& key);
00322     // </group>
00323 
00324     // Find the row numbers matching the key range. The boolean arguments
00325     // tell if the lower and upper key are part of the range.
00326     // The 2nd version makes it possible to pass in your own Records
00327     // instead of using the internal records via the
00328     // <src>accessLower/UpperKey</src> functions.
00329     // Note that the given Records will be copied to the internal
00330     // records, thus overwrite them.
00331     // <group>
00332     Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive);
00333     Vector<uInt> getRowNumbers (const Record& lower, const Record& upper,
00334                                 Bool lowerInclusive, Bool upperInclusive);
00335     // </group>
00336 
00337     // Fill the internal key field from the corresponding external key.
00338     // The data type may differ.
00339     static void copyKeyField (void* field, int dtype, const Record& key);
00340 
00341 protected:
00342     // Copy that object to this.
00343     void copy (const ColumnsIndex& that);
00344 
00345     // Delete all data in the object.
00346     void deleteObjects();
00347 
00348     // Add a column to the record description for the keys.
00349     void addColumnToDesc (RecordDesc& description,
00350                           const TableColumn& column);
00351 
00352     // Create the various members in the object.
00353     void create (const Table& table, const Vector<String>& columnNames,
00354                  Compare* compareFunction, Bool noSort);
00355 
00356     // Make the various internal <src>RecordFieldPtr</src> objects.
00357     void makeObjects (const RecordDesc& description);
00358 
00359     // Read the data of the columns forming the index, sort them and
00360     // form the index.
00361     void readData();
00362 
00363     // Do a binary search on <src>itsUniqueIndex</src> for the key in
00364     // <src>fieldPtrs</src>.
00365     // If the key is found, <src>found</src> is set to True and the index
00366     // in <src>itsUniqueIndex</src> is returned.
00367     // If not found, <src>found</src> is set to False and the index
00368     // of the next higher key is returned.
00369     uInt bsearch (Bool& found, const Block<void*>& fieldPtrs) const;
00370 
00371     // Compare the key in <src>fieldPtrs</src> with the given index entry.
00372     // -1 is returned when less, 0 when equal, 1 when greater.
00373     static Int compare (const Block<void*>& fieldPtrs,
00374                         const Block<void*>& dataPtrs,
00375                         const Block<Int>& dataTypes,
00376                         Int index);
00377 
00378     // Fill the row numbers vector for the given start till end in the
00379     // <src>itsUniqueIndex</src> vector (end is not inclusive).
00380     void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end) const;
00381 
00382 private:
00383     // Fill the internal key fields from the corresponding external key.
00384     void copyKey (Block<void*> fields, const Record& key);
00385 
00386     // Fill the internal key field from the corresponding external key.
00387     // The data type may differ.
00388     template <typename T>
00389     static void copyKeyField (RecordFieldPtr<T>& field, const Record& key)
00390     {
00391       key.get (field.name(), *field);
00392     }
00393 
00394     Table  itsTable;
00395     uInt   itsNrrow;
00396     Record* itsLowerKeyPtr;
00397     Record* itsUpperKeyPtr;
00398     Block<Int>   itsDataTypes;
00399     Block<void*> itsDataVectors;
00400     Block<void*> itsData;              //# pointer to data in itsDataVectors
00401     //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*.
00402     //# They are used for fast access to the records.
00403     Block<void*> itsLowerFields;
00404     Block<void*> itsUpperFields;
00405     Block<Bool>  itsColumnChanged;
00406     Bool         itsChanged;
00407     Bool         itsNoSort;            //# True = sort is not needed
00408     Compare*     itsCompare;           //# Compare function
00409     Vector<uInt> itsDataIndex;         //# Row numbers of all keys
00410     //# Indices in itsDataIndex for each unique key
00411     Vector<uInt> itsUniqueIndex;
00412     uInt*        itsDataInx;           //# pointer to data in itsDataIndex
00413     uInt*        itsUniqueInx;         //# pointer to data in itsUniqueIndex
00414 };
00415 
00416 
00417 inline Bool ColumnsIndex::isUnique() const
00418 {
00419     return (itsDataIndex.nelements() == itsUniqueIndex.nelements());
00420 }
00421 inline const Table& ColumnsIndex::table() const
00422 {
00423     return itsTable;
00424 }
00425 inline Record& ColumnsIndex::accessKey()
00426 {
00427     return *itsLowerKeyPtr;
00428 }
00429 inline Record& ColumnsIndex::accessLowerKey()
00430 {
00431     return *itsLowerKeyPtr;
00432 }
00433 inline Record& ColumnsIndex::accessUpperKey()
00434 {
00435     return *itsUpperKeyPtr;
00436 }
00437 
00438 
00439 } //# NAMESPACE CASA - END
00440 
00441 #endif