casa
$Rev:20696$
|
00001 //# ReadAsciiTable.h: Filling a table from an Ascii file 00002 //# Copyright (C) 1993,1994,1995,1999,2001,2002 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: ReadAsciiTable.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $ 00027 00028 #ifndef TABLES_READASCIITABLE_H 00029 #define TABLES_READASCIITABLE_H 00030 00031 //# Includes 00032 #include <casa/aips.h> 00033 #include <casa/BasicSL/String.h> 00034 #include <casa/Arrays/IPosition.h> 00035 #include <tables/Tables/Table.h> 00036 00037 //# Forward Declarations 00038 #include <casa/iosfwd.h> 00039 00040 namespace casa { //# NAMESPACE CASA - BEGIN 00041 00042 class Regex; 00043 class IPosition; 00044 class LogIO; 00045 class TableRecord; 00046 class TableColumn; 00047 00048 00049 // <summary> 00050 // Filling a table from an Ascii file. 00051 // </summary> 00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos=""> 00053 // </reviewed> 00054 00055 // <use visibility=export> 00056 00057 // <prerequisite> 00058 // <li> <linkto class="Table:description">Table</linkto> 00059 // </prerequisite> 00060 00061 // <synopsis> 00062 // Global functions to fill a table from an Ascii file. 00063 // 00064 // The table columns are filled from a file containing the data values 00065 // separated by a separator (optionally followed by whitespace). The 00066 // default separator is a comma. Non-given values default to 0, False, or 00067 // blank string (depending on data type). A value is not given between 2 00068 // consecutive separators or if less values are given than needed. 00069 // One line per table row should be given. 00070 // The following two header lines define the columns in the table: 00071 // <ol> 00072 // <li> The first line contains the names of the variables in each column. 00073 // These names may be enclosed in double quotes. 00074 // <li> The second line contains the data types of each column. 00075 // Valid types are: 00076 // <ul> 00077 // <li> S for Short Integer data 00078 // <li> I for Integer data 00079 // <li> R for Real data 00080 // <li> D for Double Precision data 00081 // <li> X for Complex data (Real, Imaginary) 00082 // <li> DX for Double Precision Complex data (R,I) 00083 // <li> Z for Complex data (Amplitude, Phase) 00084 // <li> DZ for Double Precision Complex data (A,P) 00085 // <li> A for ASCII data (must be enclosed in double 00086 // quotes if it contains one or more blanks) 00087 // <li> DMS for MVAngle-format position in DMS (converted to radians) 00088 // In this case a colon separated position is seen as 00089 // degrees and not as hours. 00090 // Blanks instead of : can be used as separator. 00091 // <li> HMS for MVAngle-format position in HMS (converted to radians) 00092 // Blanks instead of : can be used as separator. 00093 // </ul> 00094 // The type can optionally be followed by one or more positive numbers 00095 // (separated by commas without whitespace) indicating that the column 00096 // contains an array. The numbers give the shape of the array. 00097 // E.g. <src>D2,4</src> defines a column containing arrays with 00098 // shape [2,4]. It "consumes" 8 numbers in each input data line. 00099 // The last column can contain a 0 in one of the shape numbers. 00100 // It indicates that the arrays are variable shaped; it "consumes" 00101 // all remaining numbers in each input data line. If needed, 00102 // the arrays are filled with default values (0, False, or blank). 00103 // E.g. <src>I0</src> indicates a variable shaped vector. 00104 // <src>I0,4</src> with a line with remaining input 00105 // <src>1 2 3 4 5 6 7 8 9</src> results in an array with shape [3,4] 00106 // (filled with with 3 zeroes). 00107 // </ol> 00108 // If the <src>autoHeader</src> argument is True, the column definition 00109 // lines should not be given. It recognizes the types from the first data 00110 // line. It gives the names 'column0', etc. to the columns. 00111 // It can recognize integer, double, and string types. 00112 // It is possible to give a shape argument which has the same function 00113 // as the shape values discussed above. 00114 // <p> 00115 // There are two forms of the readAsciiTable function: 00116 // <ol> 00117 // <li> The simplest form has two input files. 00118 // The second input file contains the column data. 00119 // The first input file contains the keywords (if any) 00120 // and the column definitions. 00121 // The keywords in the first file, if there are any, must be enclosed 00122 // between a line that starts with ".keywords" and a line that starts 00123 // with ".endkeywords". To define column keywords, .keywords should be 00124 // followed by whitespace and the column name. 00125 // Between these two lines each line should contain the following: 00126 // <ul> 00127 // <li> The keyword name, e.g., ANYKEY 00128 // <li> The datatype of the keyword (cf. list of valid types above) 00129 // <li> The value or values for the keyword (the keyword may contain a 00130 // scalar or a vector of values). e.g., 3.14159 21.78945 00131 // </ul> 00132 // After the keywords definitions, the two column definition lines 00133 // should follow (unless <src>autoHeader=True</src> is given). 00134 // <br>For example: 00135 // <srcblock> 00136 // .keywords 00137 // KEYI I 10 00138 // KEYIV I 11 12 13 14 00139 // KEYF R 1.2 00140 // KEYFV R -3.2 0 5.6 00141 // KEYD D 1.23456789 00142 // KEYDV D 1 2 3 4 5 6 7 8 9 00143 // KEYX X -1.5 -3 00144 // KEYXC X 0 1 2 3 4 5 6 7 8 9 00145 // KEYZ Z -3 -1.5 00146 // KEYZV Z 0 0.1 0.2 0.3 0.4 0.5 00147 // KEYS A "1 2 3 4 5" 00148 // KEYSV A " 1 2 " "AAA" BBB bbb CCc C "@#$%^&*()" 00149 // .endkeywords 00150 // .keywords COLDX 00151 // IKEYS A "coldx ikey" 00152 // DKEYS A "coldx dkey" 00153 // .endkeywords 00154 // COLI COLF COLD COLX COLZ COLS 00155 // I R D X Z A 00156 // </srcblock> 00157 // defines a table with 12 table keywords (of which 6 contain vector 00158 // values), 2 keywords for column COLDX, and and 6 columns. 00159 // The number of rows is determined by the number of 00160 // lines in the second input file. 00161 // <li> The other form is to combine the two files in one file. 00162 // In that case the data lines must be preceeded by the optional 00163 // keyword and column definitions (without an intermediate blank line). 00164 // </ol> 00165 // </synopsis> 00166 00167 // <example> 00168 // <srcblock> 00169 // readAsciiTable ("file.in", "", "table.test"); 00170 // </srcblock> 00171 // creates a table with name <src>table.test</src> from the text file 00172 // <src>file.in</src>. The text file could look like: 00173 // <srcblock> 00174 // COLI COLF COLD COLX COLZ COLS 00175 // I R D X Z A 00176 // 1 1.1 1.11 1.12 1.13 1.14 1.15 Str1 00177 // 10 11 12 13 14 15 16 String17 00178 // </srcblock> 00179 // resulting in a table with 6 columns and 2 rows. 00180 // </example> 00181 00182 // <group name=readAsciiTable> 00183 00184 00185 // Create a table with name as given by tableName. 00186 // If autoHeader==True, the format is automatically derived from the 00187 // first data line. It can recognize integer, double, and String types. 00188 // The columns will be named column1, column2, etc.. 00189 // If the autoShape argument is given with 1 or more axes, all values are 00190 // treated as a single column with the given shape. Note that one of the 00191 // can have length 0 indicating a variable shaped array. 00192 // If autoHeader==False, the layout of the table has to be defined in 00193 // the first 2 lines of the input file. The remaining lines in the 00194 // input file contain the data. 00195 // 00196 // When the tableDescName is not blank, the table description will 00197 // be stored in a table description file with the given name. 00198 // <br>It returns a string containing the format of the columns in 00199 // the form COL1=R, COL2=D, ... 00200 // 00201 // The separator gives the character separating the values. The default 00202 // is a blank. Note that irrespective of the separator, blanks between 00203 // values are always ignored. A string value has to be enclosed in 00204 // double quotes if it has to contain blanks or the separator value. 00205 // 00206 // Header and data lines starting with the regular expression given in the 00207 // commentMarker are ignored. By default no comment marker is present. 00208 // E.g. "#" ignores all lines starting with the #-sign. 00209 // " *#" does the same, but the lines to ignore can start with whitespace. 00210 // 00211 // The first and last line argument give the 1-relative number of the 00212 // first and last line to read from the file. firstLine <= 0 is the 00213 // same as 1. lastLine <= 0 means until end-of-file. 00214 // Note that lines matching the comment marker are also counted. 00215 String readAsciiTable (const String& filein, const String& tableDescName, 00216 const String& tableName, Bool autoHeader = False, 00217 Char separator = ' ', 00218 const String& commentMarkerRegex = "", 00219 Int firstLine = 1, Int lastLine = -1, 00220 const IPosition& autoShape = IPosition()); 00221 00222 // This form gets the header info in the given vectors. 00223 // Each element in the dataTypes vector has to be of the form as would 00224 // be given in a header line. 00225 String readAsciiTable (const String& filein, const String& tableproto, 00226 const String& tablename, 00227 const Vector<String>& columnNames, 00228 const Vector<String>& dataTypes, 00229 Char separator, const String& commentMarkerRegex, 00230 Int firstLine, Int lastLine); 00231 00232 // This form reads TWO Ascii files. The first file may contain 00233 // keywords and their values as well as the two lines described above for 00234 // the names and type of variables. The second file is intended for data only. 00235 // 00236 // When the tableDescName is not blank, the table description will 00237 // be stored in a table description file with the given name. 00238 // <br>It returns a string containing the format of the columns in 00239 // the form COL1=R, COL2=D, ... 00240 // 00241 // The separator gives the character separating the values. The default 00242 // is a blank. Note that irrespective of the separator, blanks between 00243 // values are always ignored. A string value has to be enclosed in 00244 // double quotes if it has to contain blanks or the separator value. 00245 // 00246 // Header and data lines starting with the regular expression given in the 00247 // commentMarker are ignored. By default no comment marker is present. 00248 // E.g. "#" ignores all lines starting with the #-sign. 00249 // " *#" does the same, but the lines to ignore can start with whitespace. 00250 // 00251 // The first and last line argument give the 1-relative number of the 00252 // first and last line to read from the data file. firstLine <= 0 is the 00253 // same as 1. lastLine <= 0 means until end-of-file. 00254 // Note that lines matching the comment marker are also counted. 00255 // <group> 00256 String readAsciiTable (const String& headerFile, const String& dataFile, 00257 const String& tableDescName, const String& tablename, 00258 Char separator = ' ', 00259 const String& commentMarkerRegex = "", 00260 Int firstLine = 1, Int lastLine = -1); 00261 //# Note that this char* version is needed, because of the first version 00262 //# Taking a Bool as the 4th argument. 00263 String readAsciiTable (const String& headerFile, const String& dataFile, 00264 const String& tableDescName, const char* tablename, 00265 Char separator = ' ', 00266 const String& commentMarkerRegex = "", 00267 Int firstLine = 1, Int lastLine = -1); 00268 // </group> 00269 00270 // Similar versions as above, but returning a Table object. 00271 // The format string is returned in the first argument. 00272 // The type of Table can be given (Plain or Memory). 00273 // <group> 00274 Table readAsciiTable (String& formatString, Table::TableType tableType, 00275 const String& filein, const String& tableDescName, 00276 const String& tableName, Bool autoHeader = False, 00277 Char separator = ' ', 00278 const String& commentMarkerRegex = "", 00279 Int firstLine = 1, Int lastLine = -1, 00280 const IPosition& autoShape = IPosition()); 00281 Table readAsciiTable (String& formatString, Table::TableType tableType, 00282 const String& filein, const String& tableproto, 00283 const String& tablename, 00284 const Vector<String>& columnNames, 00285 const Vector<String>& dataTypes, 00286 Char separator, const String& commentMarkerRegex, 00287 Int firstLine, Int lastLine); 00288 Table readAsciiTable (String& formatString, Table::TableType tableType, 00289 const String& headerFile, const String& dataFile, 00290 const String& tableDescName, const String& tablename, 00291 Char separator = ' ', 00292 const String& commentMarkerRegex = "", 00293 Int firstLine = 1, Int lastLine = -1); 00294 Table readAsciiTable (String& formatString, Table::TableType tableType, 00295 const String& headerFile, const String& dataFile, 00296 const String& tableDescName, const char* tablename, 00297 Char separator = ' ', 00298 const String& commentMarkerRegex = "", 00299 Int firstLine = 1, Int lastLine = -1); 00300 // </group> 00301 00302 // </group> 00303 00304 00305 00306 00307 // <summary> 00308 // Helper class for readAsciiTable 00309 // </summary> 00310 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos=""> 00311 // </reviewed> 00312 00313 // <use visibility=local> 00314 00315 // <synopsis> 00316 // This class contains static functions as helpers for readAsciiTable. 00317 // </synopsis> 00318 00319 class ReadAsciiTable 00320 { 00321 public: 00322 // Run the readAsciiTable. 00323 static String run (const String& headerfile, const String& filein, 00324 const String& tableproto, const String& tablename, 00325 Bool autoHeader, const IPosition& autoShape, 00326 const Vector<String>& columnNames, 00327 const Vector<String>& dataTypes, 00328 Char separator, 00329 const String& commentMarkerRegex, 00330 Int firstLine, Int lastLine); 00331 static Table runt (String& formatString, Table::TableType tableType, 00332 const String& headerfile, const String& filein, 00333 const String& tableproto, const String& tablename, 00334 Bool autoHeader, const IPosition& autoShape, 00335 const Vector<String>& columnNames, 00336 const Vector<String>& dataTypes, 00337 Char separator, 00338 const String& commentMarkerRegex, 00339 Int firstLine, Int lastLine); 00340 00341 // Read a position using MVAngle. 00342 // If isDMS is True, a position with : is treated as DMS instead of HMS. 00343 // This function is a bit more relaxed than MVAngle::read. 00344 // It allows whitespace. Furthermore it allows whitespace as separator :. 00345 static double stringToPos (const String& pos, Bool isDMS); 00346 00347 private: 00348 // Define types. 00349 enum RATType {RATBool, RATShort, RATInt, RATFloat, RATDouble, RATString, 00350 RATComX, RATComZ, RATDComX, RATDComZ, RATDMS, RATHMS}; 00351 00352 00353 // Do the actual run. 00354 static String doRun (const String& headerfile, const String& filein, 00355 const String& tableproto, const String& tablename, 00356 Bool autoHeader, const IPosition& autoShape, 00357 const Vector<String>& columnNames, 00358 const Vector<String>& dataTypes, 00359 Char separator, 00360 Bool testComment, const Regex& commentMarker, 00361 Int firstLine, Int lastLine); 00362 00363 // Do the actual work of making and filling the table. 00364 static Table makeTab (String& formatString, Table::TableType tableType, 00365 const String& headerfile, const String& filein, 00366 const String& tableproto, 00367 const String& tablename, 00368 Bool autoHeader, const IPosition& autoShape, 00369 const Vector<String>& columnNames, 00370 const Vector<String>& dataTypes, 00371 Char separator, 00372 Bool testComment, const Regex& commentMarker, 00373 Int firstLine, Int lastLine); 00374 00375 // Get the next line. Skip lines to be ignored. 00376 // It returns False when no more lines are available. 00377 static Bool getLine (ifstream& file, Int& lineNumber, 00378 char* line, Int lineSize, 00379 Bool testComment, const Regex& commentMarker, 00380 Int firstLine, Int lastLine); 00381 00382 // Get the next part of the line using the separator as delimiter. 00383 // Leading blanks are ignored. 00384 static Int getNext (const Char* string, Int strlen, Char* result, 00385 Int& at, Char separator); 00386 00387 // Derive the types from the values in the first data line. 00388 static void getTypes (const IPosition& shape, 00389 const Char* in, Int leng, 00390 Char* string1, Char* string2, Char separator); 00391 00392 // Turn the string into a Bool value. 00393 // Empty string, value 0 and any value starting with f, F, n or N are False. 00394 static Bool makeBool (const String& str); 00395 00396 // Handle a keyword set. 00397 static void handleKeyset (Int lineSize, char* string1, 00398 char* first, char* second, 00399 TableRecord& keysets, 00400 LogIO& logger, 00401 const String& fileName, 00402 ifstream& jFile, 00403 Int& lineNumber, 00404 Char separator, 00405 Bool testComment, 00406 const Regex& commentMarker, 00407 Int firstLine, Int lastLine); 00408 00409 // Get the shape and type from the type string. 00410 static Int getTypeShape (const String& typestr, 00411 IPosition& shape, Int& type); 00412 00413 // Get the next scalar value with the given type from string1. 00414 static Bool getValue (char* string1, Int lineSize, char* first, 00415 Int& at1, Char separator, 00416 Int type, void* value); 00417 00418 // Handle the next scalar with the given type from the data line and 00419 // put it into the table column. 00420 static void handleScalar (char* string1, Int lineSize, char* first, 00421 Int& at1, Char separator, 00422 Int type, 00423 TableColumn& tabcol, uInt rownr); 00424 00425 // Get the next array with the given type from string1. 00426 // It returns the shape (for variable shaped arrays). 00427 static IPosition getArray (char* string1, Int lineSize, char* first, 00428 Int& at1, Char separator, 00429 const IPosition& shape, Int varAxis, 00430 Int type, void* valueBlock); 00431 00432 // Get the next array with the given type from the data line and 00433 // put it into the table column. 00434 static void handleArray (char* string1, Int lineSize, char* first, 00435 Int& at1, Char separator, 00436 const IPosition& shape, Int varAxis, 00437 Int type, 00438 TableColumn& tabcol, uInt rownr); 00439 }; 00440 00441 00442 00443 } //# NAMESPACE CASA - END 00444 00445 #endif