casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
ReadAsciiTable.h
Go to the documentation of this file.
00001 //# ReadAsciiTable.h: Filling a table from an Ascii file
00002 //# Copyright (C) 1993,1994,1995,1999,2001,2002
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //# 
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //# 
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //# 
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //# 
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: ReadAsciiTable.h 20551 2009-03-25 00:11:33Z Malte.Marquarding $
00027 
00028 #ifndef TABLES_READASCIITABLE_H
00029 #define TABLES_READASCIITABLE_H
00030 
00031 //# Includes
00032 #include <casa/aips.h>
00033 #include <casa/BasicSL/String.h>
00034 #include <casa/Arrays/IPosition.h>
00035 #include <tables/Tables/Table.h>
00036 
00037 //# Forward Declarations
00038 #include <casa/iosfwd.h>
00039 
00040 namespace casa { //# NAMESPACE CASA - BEGIN
00041 
00042 class Regex;
00043 class IPosition;
00044 class LogIO;
00045 class TableRecord;
00046 class TableColumn;
00047 
00048 
00049 // <summary>
00050 // Filling a table from an Ascii file.
00051 // </summary>
00052 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos="">
00053 // </reviewed>
00054 
00055 // <use visibility=export>
00056 
00057 // <prerequisite>
00058 //  <li> <linkto class="Table:description">Table</linkto>
00059 // </prerequisite>
00060 
00061 // <synopsis>
00062 // Global functions to fill a table from an Ascii file.
00063 //
00064 // The table columns are filled from a file containing the data values
00065 // separated by a separator (optionally followed by whitespace). The
00066 // default separator is a comma. Non-given values default to 0, False, or
00067 // blank string (depending on data type). A value is not given between 2
00068 // consecutive separators or if less values are given than needed.
00069 // One line per table row should be given.
00070 // The following two header lines define the columns in the table:
00071 // <ol>
00072 //   <li> The first line contains the names of the variables in each column.
00073 //        These names may be enclosed in double quotes.
00074 //   <li> The second line contains the data types of each column.
00075 //        Valid types are:
00076 //      <ul>
00077 //        <li>  S     for Short Integer data
00078 //        <li>  I     for Integer data
00079 //        <li>  R     for Real data
00080 //        <li>  D     for Double Precision data
00081 //        <li>  X     for Complex data (Real, Imaginary)
00082 //        <li>  DX    for Double Precision Complex data (R,I)
00083 //        <li>  Z     for Complex data (Amplitude, Phase)
00084 //        <li>  DZ    for Double Precision Complex data (A,P)
00085 //        <li>  A     for ASCII data (must be enclosed in double
00086 //                    quotes if it contains one or more blanks)
00087 //        <li>  DMS   for MVAngle-format position in DMS (converted to radians)
00088 //                    In this case a colon separated position is seen as
00089 //                    degrees and not as hours.
00090 //                    Blanks instead of : can be used as separator.
00091 //        <li>  HMS   for MVAngle-format position in HMS (converted to radians)
00092 //                    Blanks instead of : can be used as separator.
00093 //      </ul>
00094 //        The type can optionally be followed by one or more positive numbers
00095 //        (separated by commas without whitespace) indicating that the column
00096 //        contains an array. The numbers give the shape of the array.
00097 //        E.g. <src>D2,4</src> defines a column containing arrays with
00098 //        shape [2,4]. It "consumes" 8 numbers in each input data line.
00099 //        The last column can contain a 0 in one of the shape numbers.
00100 //        It indicates that the arrays are variable shaped; it "consumes"
00101 //        all remaining numbers in each input data line. If needed,
00102 //        the arrays are filled with default values (0, False, or blank).
00103 //        E.g. <src>I0</src> indicates a variable shaped vector.
00104 //        <src>I0,4</src> with a line with remaining input
00105 //        <src>1 2 3 4 5 6 7 8 9</src> results in an array with shape [3,4]
00106 //        (filled with with 3 zeroes).
00107 // </ol>
00108 // If the <src>autoHeader</src> argument is True, the column definition
00109 // lines should not be given. It recognizes the types from the first data
00110 // line. It gives the names 'column0', etc. to the columns.
00111 // It can recognize integer, double, and string types.
00112 // It is possible to give a shape argument which has the same function
00113 // as the shape values discussed above.
00114 // <p>
00115 // There are two forms of the readAsciiTable function:
00116 // <ol>
00117 //  <li> The simplest form has two input files.
00118 //       The second input file contains the column data.
00119 //       The first input file contains the keywords (if any)
00120 //       and the column definitions.
00121 //       The keywords in the first file, if there are any, must be enclosed
00122 //       between a line that starts with ".keywords" and a line that starts
00123 //       with ".endkeywords". To define column keywords, .keywords should be
00124 //       followed by whitespace and the column name. 
00125 //       Between these two lines each line should contain the following:
00126 //       <ul>
00127 //        <li> The keyword name, e.g., ANYKEY
00128 //        <li> The datatype of the keyword (cf. list of valid types above)
00129 //        <li> The value or values for the keyword (the keyword may contain a
00130 //             scalar or a vector of values).  e.g., 3.14159  21.78945
00131 //      </ul>
00132 //      After the keywords definitions, the two column definition lines
00133 //      should follow (unless <src>autoHeader=True</src> is given).
00134 //      <br>For example:
00135 //      <srcblock>
00136 //       .keywords
00137 //       KEYI  I  10
00138 //       KEYIV I  11 12 13 14
00139 //       KEYF  R  1.2
00140 //       KEYFV R  -3.2 0 5.6
00141 //       KEYD  D  1.23456789
00142 //       KEYDV D  1 2 3 4 5 6 7 8 9
00143 //       KEYX  X  -1.5 -3
00144 //       KEYXC X  0 1 2 3 4 5 6 7 8 9
00145 //       KEYZ  Z  -3  -1.5
00146 //       KEYZV Z  0 0.1 0.2 0.3 0.4 0.5
00147 //       KEYS  A  "1 2 3 4 5"
00148 //       KEYSV A  " 1 2 " "AAA" BBB bbb CCc C "@#$%^&*()"
00149 //       .endkeywords
00150 //       .keywords  COLDX
00151 //       IKEYS   A "coldx ikey"
00152 //       DKEYS   A "coldx dkey"
00153 //       .endkeywords
00154 //       COLI   COLF   COLD       COLX        COLZ       COLS
00155 //        I      R      D          X           Z          A
00156 //      </srcblock>
00157 //      defines a table with 12 table keywords (of which 6 contain vector
00158 //      values), 2 keywords for column COLDX, and and 6 columns.
00159 //      The number of rows is determined by the number of
00160 //      lines in the second input file.
00161 //  <li> The other form is to combine the two files in one file.
00162 //       In that case the data lines must be preceeded by the optional
00163 //       keyword and column definitions (without an intermediate blank line).
00164 // </ol>
00165 // </synopsis>
00166 
00167 // <example>
00168 // <srcblock>
00169 //   readAsciiTable ("file.in", "", "table.test");
00170 // </srcblock>
00171 // creates a table with name <src>table.test</src> from the text file
00172 // <src>file.in</src>. The text file could look like:
00173 // <srcblock>
00174 //  COLI   COLF   COLD       COLX        COLZ       COLS
00175 //   I      R      D          X           Z          A
00176 //  1      1.1    1.11       1.12 1.13   1.14 1.15  Str1
00177 //  10     11     12         13   14     15   16    String17
00178 // </srcblock>
00179 // resulting in a table with 6 columns and 2 rows.
00180 // </example>
00181 
00182 // <group name=readAsciiTable>
00183 
00184 
00185 // Create a table with name as given by tableName.
00186 // If autoHeader==True, the format is automatically derived from the
00187 // first data line. It can recognize integer, double, and String types.
00188 // The columns will be named column1, column2, etc..
00189 // If the autoShape argument is given with 1 or more axes, all values are
00190 // treated as a single column with the given shape. Note that one of the
00191 // can have length 0 indicating a variable shaped array.
00192 // If autoHeader==False, the layout of the table has to be defined in
00193 // the first 2 lines of the input file. The remaining lines in the
00194 // input file contain the data.
00195 //
00196 // When the tableDescName is not blank, the table description will
00197 // be stored in a table description file with the given name.
00198 // <br>It returns a string containing the format of the columns in
00199 // the form COL1=R, COL2=D, ...
00200 //
00201 // The separator gives the character separating the values. The default
00202 // is a blank. Note that irrespective of the separator, blanks between
00203 // values are always ignored. A string value has to be enclosed in
00204 // double quotes if it has to contain blanks or the separator value.
00205 //
00206 // Header and data lines starting with the regular expression given in the
00207 // commentMarker are ignored. By default no comment marker is present.
00208 // E.g. "#" ignores all lines starting with the #-sign.
00209 // " *#" does the same, but the lines to ignore can start with whitespace.
00210 //
00211 // The first and last line argument give the 1-relative number of the
00212 // first and last line to read from the file. firstLine <= 0 is the
00213 // same as 1. lastLine <= 0 means until end-of-file.
00214 // Note that lines matching the comment marker are also counted.
00215 String readAsciiTable (const String& filein, const String& tableDescName,
00216                        const String& tableName, Bool autoHeader = False,
00217                        Char separator = ' ',
00218                        const String& commentMarkerRegex = "",
00219                        Int firstLine = 1, Int lastLine = -1,
00220                        const IPosition& autoShape = IPosition());
00221 
00222 // This form gets the header info in the given vectors.
00223 // Each element in the dataTypes vector has to be of the form as would
00224 // be given in a header line.
00225 String readAsciiTable (const String& filein, const String& tableproto,
00226                        const String& tablename,
00227                        const Vector<String>& columnNames,
00228                        const Vector<String>& dataTypes,
00229                        Char separator, const String& commentMarkerRegex,
00230                        Int firstLine, Int lastLine);
00231 
00232 // This form reads TWO Ascii files. The first file may contain 
00233 // keywords and their values as well as the two lines described above for
00234 // the names and type of variables. The second file is intended for data only.
00235 //
00236 // When the tableDescName is not blank, the table description will
00237 // be stored in a table description file with the given name.
00238 // <br>It returns a string containing the format of the columns in
00239 // the form COL1=R, COL2=D, ...
00240 //
00241 // The separator gives the character separating the values. The default
00242 // is a blank. Note that irrespective of the separator, blanks between
00243 // values are always ignored. A string value has to be enclosed in
00244 // double quotes if it has to contain blanks or the separator value.
00245 //
00246 // Header and data lines starting with the regular expression given in the
00247 // commentMarker are ignored. By default no comment marker is present.
00248 // E.g. "#" ignores all lines starting with the #-sign.
00249 // " *#" does the same, but the lines to ignore can start with whitespace.
00250 //
00251 // The first and last line argument give the 1-relative number of the
00252 // first and last line to read from the data file. firstLine <= 0 is the
00253 // same as 1. lastLine <= 0 means until end-of-file.
00254 // Note that lines matching the comment marker are also counted.
00255 // <group>
00256 String readAsciiTable (const String& headerFile, const String& dataFile, 
00257                        const String& tableDescName, const String& tablename,
00258                        Char separator = ' ',
00259                        const String& commentMarkerRegex = "",
00260                        Int firstLine = 1, Int lastLine = -1);
00261 //# Note that this char* version is needed, because of the first version
00262 //# Taking a Bool as the 4th argument.
00263 String readAsciiTable (const String& headerFile, const String& dataFile, 
00264                        const String& tableDescName, const char* tablename,
00265                        Char separator = ' ',
00266                        const String& commentMarkerRegex = "",
00267                        Int firstLine = 1, Int lastLine = -1);
00268 // </group>
00269 
00270 // Similar versions as above, but returning a Table object.
00271 // The format string is returned in the first argument.
00272 // The type of Table can be given (Plain or Memory).
00273 // <group>
00274 Table readAsciiTable (String& formatString, Table::TableType tableType,
00275                       const String& filein, const String& tableDescName,
00276                       const String& tableName, Bool autoHeader = False,
00277                       Char separator = ' ',
00278                       const String& commentMarkerRegex = "",
00279                       Int firstLine = 1, Int lastLine = -1,
00280                       const IPosition& autoShape = IPosition());
00281 Table readAsciiTable (String& formatString, Table::TableType tableType,
00282                       const String& filein, const String& tableproto,
00283                       const String& tablename,
00284                       const Vector<String>& columnNames,
00285                       const Vector<String>& dataTypes,
00286                       Char separator, const String& commentMarkerRegex,
00287                       Int firstLine, Int lastLine);
00288 Table readAsciiTable (String& formatString, Table::TableType tableType,
00289                       const String& headerFile, const String& dataFile, 
00290                       const String& tableDescName, const String& tablename,
00291                       Char separator = ' ',
00292                       const String& commentMarkerRegex = "",
00293                       Int firstLine = 1, Int lastLine = -1);
00294 Table readAsciiTable (String& formatString, Table::TableType tableType,
00295                       const String& headerFile, const String& dataFile, 
00296                       const String& tableDescName, const char* tablename,
00297                       Char separator = ' ',
00298                       const String& commentMarkerRegex = "",
00299                       Int firstLine = 1, Int lastLine = -1);
00300 // </group>
00301 
00302 // </group>
00303 
00304 
00305 
00306 
00307 // <summary>
00308 // Helper class for readAsciiTable
00309 // </summary>
00310 // <reviewed reviewer="UNKNOWN" date="before2004/08/25" tests="" demos="">
00311 // </reviewed>
00312 
00313 // <use visibility=local>
00314 
00315 // <synopsis>
00316 // This class contains static functions as helpers for readAsciiTable.
00317 // </synopsis>
00318 
00319 class ReadAsciiTable
00320 {
00321 public:
00322   // Run the readAsciiTable.
00323   static String run (const String& headerfile, const String& filein, 
00324                      const String& tableproto, const String& tablename,
00325                      Bool autoHeader, const IPosition& autoShape,
00326                      const Vector<String>& columnNames,
00327                      const Vector<String>& dataTypes,
00328                      Char separator,
00329                      const String& commentMarkerRegex,
00330                      Int firstLine, Int lastLine);
00331   static Table runt (String& formatString, Table::TableType tableType,
00332                      const String& headerfile, const String& filein, 
00333                      const String& tableproto, const String& tablename,
00334                      Bool autoHeader, const IPosition& autoShape,
00335                      const Vector<String>& columnNames,
00336                      const Vector<String>& dataTypes,
00337                      Char separator,
00338                      const String& commentMarkerRegex,
00339                      Int firstLine, Int lastLine);
00340 
00341   // Read a position using MVAngle.
00342   // If isDMS is True, a position with : is treated as DMS instead of HMS.
00343   // This function is a bit more relaxed than MVAngle::read.
00344   // It allows whitespace. Furthermore it allows whitespace as separator :.
00345   static double stringToPos (const String& pos, Bool isDMS);
00346 
00347 private:
00348   // Define types.
00349   enum RATType {RATBool, RATShort, RATInt, RATFloat, RATDouble, RATString,
00350                 RATComX, RATComZ, RATDComX, RATDComZ, RATDMS, RATHMS};
00351 
00352 
00353   // Do the actual run.
00354   static String doRun (const String& headerfile, const String& filein, 
00355                        const String& tableproto, const String& tablename,
00356                        Bool autoHeader, const IPosition& autoShape,
00357                        const Vector<String>& columnNames,
00358                        const Vector<String>& dataTypes,
00359                        Char separator,
00360                        Bool testComment, const Regex& commentMarker,
00361                        Int firstLine, Int lastLine);
00362 
00363   // Do the actual work of making and filling the table.
00364   static Table makeTab (String& formatString, Table::TableType tableType,
00365                         const String& headerfile, const String& filein, 
00366                         const String& tableproto,
00367                         const String& tablename,
00368                         Bool autoHeader, const IPosition& autoShape,
00369                         const Vector<String>& columnNames,
00370                         const Vector<String>& dataTypes,
00371                         Char separator,
00372                         Bool testComment, const Regex& commentMarker,
00373                         Int firstLine, Int lastLine);
00374 
00375   // Get the next line. Skip lines to be ignored.
00376   // It returns False when no more lines are available.
00377   static Bool getLine (ifstream& file, Int& lineNumber,
00378                        char* line, Int lineSize,
00379                        Bool testComment, const Regex& commentMarker,
00380                        Int firstLine, Int lastLine);
00381   
00382   // Get the next part of the line using the separator as delimiter.
00383   // Leading blanks are ignored.
00384   static Int getNext (const Char* string, Int strlen, Char* result,
00385                       Int& at, Char separator);
00386   
00387   // Derive the types from the values in the first data line.
00388   static void getTypes (const IPosition& shape,
00389                         const Char* in, Int leng,
00390                         Char* string1, Char* string2, Char separator);
00391 
00392   // Turn the string into a Bool value.
00393   // Empty string, value 0 and any value starting with f, F, n or N are False.
00394   static Bool makeBool (const String& str);
00395 
00396   // Handle a keyword set.
00397   static void handleKeyset (Int lineSize, char* string1,
00398                             char* first, char* second,
00399                             TableRecord& keysets,
00400                             LogIO& logger,
00401                             const String& fileName,
00402                             ifstream& jFile,
00403                             Int& lineNumber,
00404                             Char separator,
00405                             Bool testComment,
00406                             const Regex& commentMarker,
00407                             Int firstLine, Int lastLine);
00408 
00409   // Get the shape and type from the type string.
00410   static Int getTypeShape (const String& typestr,
00411                            IPosition& shape, Int& type);
00412   
00413   // Get the next scalar value with the given type from string1.
00414   static Bool getValue (char* string1, Int lineSize, char* first,
00415                         Int& at1, Char separator,
00416                         Int type, void* value);
00417 
00418   // Handle the next scalar with the given type from the data line and
00419   // put it into the table column.
00420   static void handleScalar (char* string1, Int lineSize, char* first,
00421                             Int& at1, Char separator,
00422                             Int type,
00423                             TableColumn& tabcol, uInt rownr);
00424 
00425   // Get the next array with the given type from string1.
00426   // It returns the shape (for variable shaped arrays).
00427   static IPosition getArray (char* string1, Int lineSize, char* first,
00428                              Int& at1, Char separator,
00429                              const IPosition& shape, Int varAxis,
00430                              Int type, void* valueBlock);
00431 
00432   // Get the next array with the given type from the data line and
00433   // put it into the table column.
00434   static void handleArray (char* string1, Int lineSize, char* first,
00435                            Int& at1, Char separator,
00436                            const IPosition& shape, Int varAxis,
00437                            Int type,
00438                            TableColumn& tabcol, uInt rownr);
00439 };
00440 
00441 
00442 
00443 } //# NAMESPACE CASA - END
00444 
00445 #endif