casa
$Rev:20696$
|
00001 //# ColumnsIndex.h: Index to one or more columns in a table 00002 //# Copyright (C) 1998,1999,2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: ColumnsIndex.h 21298 2012-12-07 14:53:03Z gervandiepen $ 00027 00028 #ifndef TABLES_COLUMNSINDEX_H 00029 #define TABLES_COLUMNSINDEX_H 00030 00031 00032 //# Includes 00033 #include <tables/Tables/Table.h> 00034 #include <casa/Arrays/Vector.h> 00035 #include <casa/Containers/Block.h> 00036 #include <casa/Containers/Record.h> 00037 00038 namespace casa { //# NAMESPACE CASA - BEGIN 00039 00040 //# Forward Declarations 00041 class String; 00042 class TableColumn; 00043 template<typename T> class RecordFieldPtr; 00044 00045 // <summary> 00046 // Index to one or more columns in a table. 00047 // </summary> 00048 00049 // <use visibility=export> 00050 00051 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="tColumnsIndex.cc" demos=""> 00052 // </reviewed> 00053 00054 // <prerequisite> 00055 // <li> <linkto class=Table>Table</linkto> 00056 // <li> <linkto class=Record>Record</linkto> 00057 // <li> <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> 00058 // </prerequisite> 00059 00060 // <synopsis> 00061 // This class makes it possible to use transient indices on top 00062 // of tables in order to speed up the process of finding rows 00063 // based on a given key or key range. 00064 // When constructing a <src>ColumnsIndex</src> object, one has to define 00065 // which columns form the key for this index on the given 00066 // <src>table</src> object. 00067 // Only scalar columns are supported. The data in the given columns 00068 // will be read, sorted (if needed), and stored in memory. 00069 // When looking up a key or key range, the class will use a fast binary 00070 // search on the data held in memory. 00071 // <p> 00072 // The <src>ColumnsIndex</src> object contains a 00073 // <linkto class=Record>Record</linkto> object which can be used 00074 // to define the key to be looked up. The record contains a field for 00075 // each column in the index (with the same name and data type). 00076 // The fastest way to fill the key is by creating a 00077 // <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> object for 00078 // each field in the record (see the example) and fill it as needed. 00079 // However, one can also use the <src>Record::define</src> function, 00080 // but that is slower. 00081 // <br> 00082 // A second record is available to define the upper key 00083 // when a key range has to be looked up. The keys can be accessed 00084 // using the various <src>accessKey</src> functions. 00085 // <p> 00086 // When a key is defined, the <src>getRowNumbers</src> function can be 00087 // used to find the table rows containing the given key (range). 00088 // Function <src>getRowNumber</src> can be used if all keys in the index 00089 // are unique (which can be tested with the <src>isUnique</src> function). 00090 // <p> 00091 // Instead of using the internal records holding the keys, one can also 00092 // pass its own Record object to <src>getRowNumbers</src>. 00093 // However, it will be slower. 00094 // <p> 00095 // When constructing the object, the user can supply his own compare 00096 // function. The default compare function compares each field of the 00097 // key in the normal way. A user's compare function makes it possible 00098 // to compare in a special way. E.g. one could use near instead of == 00099 // on floating point fields. Another example (which is shown in one 00100 // of the examples below) makes it possible to find a key in an 00101 // index consisting of a time and width. 00102 // <p> 00103 // After an index is created, it is possible to change the data 00104 // in the underlying columns. However, the <src>ColumnsIndex</src> can 00105 // not detect if the column data have changed. It can only detect if 00106 // the number of rows has changed. If the column data have changed, 00107 // the user has to use the <src>setChanged</src> function to indicate 00108 // that all columns or a particular column has changed. 00109 // <br>If data have changed, the entire index will be recreated by 00110 // rereading and optionally resorting the data. This will be deferred 00111 // until the next key lookup. 00112 // </synopsis> 00113 00114 // <example> 00115 // Suppose one has an antenna table with key ANTENNA. 00116 // <srcblock> 00117 // // Open the table and make an index for column ANTENNA. 00118 // Table tab("antenna.tab") 00119 // ColumnsIndex colInx(tab, "ANTENNA"); 00120 // // Make a RecordFieldPtr for the ANTENNA field in the index key record. 00121 // // Its data type has to match the data type of the column. 00122 // RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA"); 00123 // // Now loop in some way and find the row for the antenna 00124 // // involved in that loop. 00125 // Bool found; 00126 // while (...) { 00127 // // Fill the key field and get the row number. 00128 // // ANTENNA is a unique key, so only one row number matches. 00129 // // Otherwise function getRowNumbers had to be used. 00130 // *antFld = antenna; 00131 // uInt antRownr = colInx.getRowNumber (found); 00132 // if (!found) { 00133 // cout << "Antenna " << antenna << " is unknown" << endl; 00134 // } else { 00135 // // antRownr can now be used to get data from that row in 00136 // // the antenna table. 00137 // } 00138 // } 00139 // </srcblock> 00140 // 00141 // The following example shows how multiple keys can be used and how 00142 // a search on a range can be done. 00143 // <srcblock> 00144 // Table tab("sometable") 00145 // // Note that TIME is the main key. 00146 // // Also note that stringToVector (in ArrayUtil.h) is a handy 00147 // // way to convert a String to a Vector<String>. 00148 // ColumnsIndex colInx(tab, stringToVector("TIME,ANTENNA")); 00149 // // Make a RecordFieldPtr for the fields in lower and upper key records. 00150 // RecordFieldPtr<Double> timeLow(colInx.accessLowerKey(), "TIME"); 00151 // RecordFieldPtr<Int> antLow(colInx.accessLowerKey(), "ANTENNA"); 00152 // RecordFieldPtr<Double> timeUpp(colInx.accessUpperKey(), "TIME"); 00153 // RecordFieldPtr<Int> antUpp(colInx.accessUpperKey(), "ANTENNA"); 00154 // while (...) { 00155 // // Fill the key fields. 00156 // *timeLow = ...; 00157 // *antLow = ...; 00158 // *timeUpp = ...; 00159 // *antUpp = ...; 00160 // // Find the row numbers for keys between low and upp (inclusive). 00161 // Vector<uInt> rows = colInx.getRowNumbers (True, True); 00162 // } 00163 // </srcblock> 00164 // 00165 // The following example shows how a specific compare function 00166 // could look like. A function like this will actually be used in the 00167 // calibration software. 00168 // <br> 00169 // The table for which the index is built, has rows with the TIME as its key. 00170 // However, each row is valid for a given interval, where TIME gives 00171 // the middle of the interval and WIDTH the length of the interval. 00172 // This means that the compare function has to test whether the key 00173 // is part of the interval. 00174 // <srcblock> 00175 // Int myCompare (const Block<void*>& fieldPtrs, 00176 // const Block<void*>& dataPtrs, 00177 // const Block<Int>& dataTypes, 00178 // Int index) 00179 // { 00180 // // Assert (for performance only in debug mode) that the correct 00181 // // fields are used. 00182 // DebugAssert (dataTypes.nelements() == 2, AipsError); 00183 // DebugAssert (dataTypes[0] == TpDouble && dataTypes[1] == TpDouble, 00184 // AipsError); 00185 // // Now get the key to be looked up 00186 // // (an awfully looking cast has to be used). 00187 // const Double key = *(*(const RecordFieldPtr<Double>*)(fieldPtrs[0])); 00188 // // Get the time and width of the entry to be compared. 00189 // const Double time = ((const Double*)(dataPtrs[0]))[index]; 00190 // const Double width = ((const Double*)(dataPtrs[1]))[index]; 00191 // const Double start = time - width/2; 00192 // const Double end = time + width/2; 00193 // // Test if the key is before, after, or in the interval 00194 // // (representing less, greater, equal). 00195 // if (key < start) { 00196 // return -1; 00197 // } else if (key > end) { 00198 // return 1; 00199 // } 00200 // return 0; 00201 // } 00202 // 00203 // // Now use this compare function in an actual index. 00204 // Table tab("sometable") 00205 // ColumnsIndex colInx(tab, stringToVector("TIME,WIDTH"), myCompare); 00206 // // Make a RecordFieldPtr for the TIME field in the key record. 00207 // // Note that although the WIDTH is part of the index, it is 00208 // // not an actual key. So it does not need to be filled in. 00209 // RecordFieldPtr<Double> time(colInx.accessLowerKey(), "TIME"); 00210 // Bool found; 00211 // while (...) { 00212 // // Fill the key field. 00213 // *time = ...; 00214 // // Find the row number for this time. 00215 // uInt rownr = colInx.getRowNumber (found); 00216 // } 00217 // </srcblock> 00218 // </example> 00219 00220 // <motivation> 00221 // The calibration software needs to lookup keys in calibration tables 00222 // very frequently. This class makes that process much easier and faster. 00223 // </motivation> 00224 00225 00226 class ColumnsIndex 00227 { 00228 public: 00229 // Define the signature of a comparison function. 00230 // The first block contains pointers to <src>RecordFieldPtr<T></src> 00231 // objects holding the key to be looked up. 00232 // The second block contains pointers to the column data. 00233 // The <src>index</src> argument gives the index in the column data. 00234 // The third block contains data types of those blocks (TpBool, etc.). 00235 // The function should return -1 if key is less than data, 00236 // 0 if equal, 1 if greater. 00237 // <br>An example above shows how a compare function can be used. 00238 typedef Int Compare (const Block<void*>& fieldPtrs, 00239 const Block<void*>& dataPtrs, 00240 const Block<Int>& dataTypes, 00241 Int index); 00242 00243 // Create an index on the given table for the given column. 00244 // The column has to be a scalar column. 00245 // If <src>noSort==True</src>, the table is already in order of that 00246 // column and the sort step will not be done. 00247 // The default compare function is provided by this class. It simply 00248 // compares each field in the key. 00249 ColumnsIndex (const Table&, const String& columnName, 00250 Compare* compareFunction = 0, Bool noSort = False); 00251 00252 // Create an index on the given table for the given columns, thus 00253 // the key is formed by multiple columns. 00254 // The columns have to be scalar columns. 00255 // If <src>noSort==True</src>, the table is already in order of those 00256 // columns and the sort step will not be done. 00257 // The default compare function is provided by this class. It simply 00258 // compares each field in the key. 00259 ColumnsIndex (const Table&, const Vector<String>& columnNames, 00260 Compare* compareFunction = 0, Bool noSort = False); 00261 00262 // Copy constructor (copy semantics). 00263 ColumnsIndex (const ColumnsIndex& that); 00264 00265 ~ColumnsIndex(); 00266 00267 // Assignment (copy semantics). 00268 ColumnsIndex& operator= (const ColumnsIndex& that); 00269 00270 // Are all keys in the index unique? 00271 Bool isUnique() const; 00272 00273 // Return the names of the columns forming the index. 00274 Vector<String> columnNames() const; 00275 00276 // Get the table for which this index is created. 00277 const Table& table() const; 00278 00279 // Something has changed in the table, so the index has to be recreated. 00280 // The 2nd version indicates that a specific column has changed, 00281 // so only that column is reread. If that column is not part of the 00282 // index, nothing will be done. 00283 // <br>Note that the class itself is keeping track if the number of 00284 // rows in the table changes. 00285 // <group> 00286 void setChanged(); 00287 void setChanged (const String& columnName); 00288 // </group> 00289 00290 // Access the key values. 00291 // These functions allow you to create RecordFieldPtr<T> objects 00292 // for each field in the key. In this way you can quickly fill in 00293 // the key. 00294 // <br>The records have a fixed type, so you cannot add or delete fields. 00295 // <group> 00296 Record& accessKey(); 00297 Record& accessLowerKey(); 00298 Record& accessUpperKey(); 00299 // </group> 00300 00301 // Find the row number matching the key. All keys have to be unique, 00302 // otherwise an exception is thrown. 00303 // If no match is found, <src>found</src> is set to False. 00304 // The 2nd version makes it possible to pass in your own Record 00305 // instead of using the internal record via the <src>accessKey</src> 00306 // functions. Note that the given Record will be copied to the internal 00307 // record, thus overwrites it. 00308 // <group> 00309 uInt getRowNumber (Bool& found); 00310 uInt getRowNumber (Bool& found, const Record& key); 00311 // </group> 00312 00313 // Find the row numbers matching the key. It should be used instead 00314 // of <src>getRowNumber</src> if the same key can exist multiple times. 00315 // The 2nd version makes it possible to pass in your own Record 00316 // instead of using the internal record via the <src>accessKey</src> 00317 // functions. Note that the given Record will be copied to the internal 00318 // record, thus overwrites it. 00319 // <group> 00320 Vector<uInt> getRowNumbers(); 00321 Vector<uInt> getRowNumbers (const Record& key); 00322 // </group> 00323 00324 // Find the row numbers matching the key range. The boolean arguments 00325 // tell if the lower and upper key are part of the range. 00326 // The 2nd version makes it possible to pass in your own Records 00327 // instead of using the internal records via the 00328 // <src>accessLower/UpperKey</src> functions. 00329 // Note that the given Records will be copied to the internal 00330 // records, thus overwrite them. 00331 // <group> 00332 Vector<uInt> getRowNumbers (Bool lowerInclusive, Bool upperInclusive); 00333 Vector<uInt> getRowNumbers (const Record& lower, const Record& upper, 00334 Bool lowerInclusive, Bool upperInclusive); 00335 // </group> 00336 00337 // Fill the internal key field from the corresponding external key. 00338 // The data type may differ. 00339 static void copyKeyField (void* field, int dtype, const Record& key); 00340 00341 protected: 00342 // Copy that object to this. 00343 void copy (const ColumnsIndex& that); 00344 00345 // Delete all data in the object. 00346 void deleteObjects(); 00347 00348 // Add a column to the record description for the keys. 00349 void addColumnToDesc (RecordDesc& description, 00350 const TableColumn& column); 00351 00352 // Create the various members in the object. 00353 void create (const Table& table, const Vector<String>& columnNames, 00354 Compare* compareFunction, Bool noSort); 00355 00356 // Make the various internal <src>RecordFieldPtr</src> objects. 00357 void makeObjects (const RecordDesc& description); 00358 00359 // Read the data of the columns forming the index, sort them and 00360 // form the index. 00361 void readData(); 00362 00363 // Do a binary search on <src>itsUniqueIndex</src> for the key in 00364 // <src>fieldPtrs</src>. 00365 // If the key is found, <src>found</src> is set to True and the index 00366 // in <src>itsUniqueIndex</src> is returned. 00367 // If not found, <src>found</src> is set to False and the index 00368 // of the next higher key is returned. 00369 uInt bsearch (Bool& found, const Block<void*>& fieldPtrs) const; 00370 00371 // Compare the key in <src>fieldPtrs</src> with the given index entry. 00372 // -1 is returned when less, 0 when equal, 1 when greater. 00373 static Int compare (const Block<void*>& fieldPtrs, 00374 const Block<void*>& dataPtrs, 00375 const Block<Int>& dataTypes, 00376 Int index); 00377 00378 // Fill the row numbers vector for the given start till end in the 00379 // <src>itsUniqueIndex</src> vector (end is not inclusive). 00380 void fillRowNumbers (Vector<uInt>& rows, uInt start, uInt end) const; 00381 00382 private: 00383 // Fill the internal key fields from the corresponding external key. 00384 void copyKey (Block<void*> fields, const Record& key); 00385 00386 // Fill the internal key field from the corresponding external key. 00387 // The data type may differ. 00388 template <typename T> 00389 static void copyKeyField (RecordFieldPtr<T>& field, const Record& key) 00390 { 00391 key.get (field.name(), *field); 00392 } 00393 00394 Table itsTable; 00395 uInt itsNrrow; 00396 Record* itsLowerKeyPtr; 00397 Record* itsUpperKeyPtr; 00398 Block<Int> itsDataTypes; 00399 Block<void*> itsDataVectors; 00400 Block<void*> itsData; //# pointer to data in itsDataVectors 00401 //# The following 2 blocks are actually blocks of RecordFieldPtr<T>*. 00402 //# They are used for fast access to the records. 00403 Block<void*> itsLowerFields; 00404 Block<void*> itsUpperFields; 00405 Block<Bool> itsColumnChanged; 00406 Bool itsChanged; 00407 Bool itsNoSort; //# True = sort is not needed 00408 Compare* itsCompare; //# Compare function 00409 Vector<uInt> itsDataIndex; //# Row numbers of all keys 00410 //# Indices in itsDataIndex for each unique key 00411 Vector<uInt> itsUniqueIndex; 00412 uInt* itsDataInx; //# pointer to data in itsDataIndex 00413 uInt* itsUniqueInx; //# pointer to data in itsUniqueIndex 00414 }; 00415 00416 00417 inline Bool ColumnsIndex::isUnique() const 00418 { 00419 return (itsDataIndex.nelements() == itsUniqueIndex.nelements()); 00420 } 00421 inline const Table& ColumnsIndex::table() const 00422 { 00423 return itsTable; 00424 } 00425 inline Record& ColumnsIndex::accessKey() 00426 { 00427 return *itsLowerKeyPtr; 00428 } 00429 inline Record& ColumnsIndex::accessLowerKey() 00430 { 00431 return *itsLowerKeyPtr; 00432 } 00433 inline Record& ColumnsIndex::accessUpperKey() 00434 { 00435 return *itsUpperKeyPtr; 00436 } 00437 00438 00439 } //# NAMESPACE CASA - END 00440 00441 #endif