casa  5.7.0-16
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableStreamReader.h
Go to the documentation of this file.
1 #ifndef TABLE_STREAM_READER_H
2 #define TABLE_STREAM_READER_H
3 #include <alma/ASDM/Misc.h>
4 #include <libxml/parser.h>
5 #include <libxml/tree.h>
6 #include <alma/ASDM/ASDM.h>
7 #include <alma/ASDM/Entity.h>
10 #include <sstream>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <unistd.h>
14 
15 #define READBUFFERSIZE ( 50 * 1024 * 1024 )
16 namespace asdm {
39  template<class T, class R> class TableStreamReader {
40  public:
45 
49  virtual ~TableStreamReader(){;}
50 
51 
52 
58  void open(const std::string& directory){
59  checkState(T_OPEN, "TableStreamReader::open");
60  // Open the file.
61  tablePath = directory + "/"+ T::name() + ".bin";
62  tableFile.open(tablePath.c_str(), std::ios::in|std::ios::binary);
63  if (!tableFile.is_open())
64  throw asdm::ConversionException("Could not open file " + tablePath, T::name());
65 
66  // Determine the size of the file.
67  struct stat filestatus;
68  stat( tablePath.c_str(), &filestatus);
69  fileSizeInBytes = filestatus.st_size;
70 
71  // And start parsing the content.
72 
74  // cout << "boundary_1 = " << boundary_1 << std::endl;
75 
77 
79  std::string xmlHeader = accumulateUntilBoundary(boundary_1, 100);
80 
81  //
82  // We have the xmlHeader , let's parse it.
83  //
84  xmlDoc *doc;
85  doc = xmlReadMemory(xmlHeader.data(), xmlHeader.size(), "BinaryTableHeader.xml", NULL, XML_PARSE_NOBLANKS);
86  if ( doc == NULL )
87  throw asdm::ConversionException("Failed to parse the xmlHeader into a DOM structure.", T::name());
88 
89  xmlNode* root_element = xmlDocGetRootElement(doc);
90  if ( root_element == NULL || root_element->type != XML_ELEMENT_NODE )
91  throw asdm::ConversionException("Failed to parse the xmlHeader into a DOM structure.", T::name());
92 
93  const ByteOrder* byteOrder = NULL;
94  if ( std::string("ASDMBinaryTable").compare((const char*) root_element->name) == 0) {
95  // Then it's an "old fashioned" MIME file for tables.
96  // Just try to deserialize it with Big_Endian for the bytes ordering.
97  byteOrder = asdm::ByteOrder::Big_Endian;
98  attributesSeq = T::defaultAttributesNamesInBin();
99  } else if (std::string(T::name()+"Table").compare((const char*) root_element->name) == 0) {
100  // It's a new (and correct) MIME file for tables.
101  //
102  // 1st ) Look for a BulkStoreRef element with an attribute byteOrder.
103  //
104  xmlNode* bulkStoreRef = 0;
105  xmlNode* child = root_element->children;
106 
107  // Skip the two first children (Entity and ContainerEntity).
108  bulkStoreRef = (child == 0) ? 0 : ( (child->next) == 0 ? 0 : child->next->next );
109 
110  if ( bulkStoreRef == 0 || (bulkStoreRef->type != XML_ELEMENT_NODE) || (std::string("BulkStoreRef").compare((const char*) bulkStoreRef->name) != 0))
111  throw asdm::ConversionException ("Could not find the element '/"+T::name()+"Table/BulkStoreRef'. Invalid XML header '"+ xmlHeader + "'.", T::name());
112 
113  // We found BulkStoreRef, now look for its attribute byteOrder.
114  _xmlAttr* byteOrderAttr = 0;
115  for (struct _xmlAttr* attr = bulkStoreRef->properties; attr; attr = attr->next) {
116  if (std::string("byteOrder").compare((const char*) attr->name) == 0) {
117  byteOrderAttr = attr;
118  break;
119  }
120  }
121 
122  if (byteOrderAttr == 0)
123  throw asdm::ConversionException("Could not find the element '/"+T::name()+"Table/BulkStoreRef/@byteOrder'. Invalid XML header '" + xmlHeader +"'.", T::name());
124 
125  std::string byteOrderValue = std::string((const char*) byteOrderAttr->children->content);
126  if (!(byteOrder = asdm::ByteOrder::fromString(byteOrderValue)))
127  throw asdm::ConversionException("No valid value retrieved for the element '/"+T::name()+"Table/BulkStoreRef/@byteOrder'. Invalid XML header '" + xmlHeader + "'.", T::name());
128 
129  //
130  // 2nd) Look for the Attributes element and grab the names of the elements it contains.
131  //
132  xmlNode* attributes = bulkStoreRef->next;
133  if ( attributes == 0 || (attributes->type != XML_ELEMENT_NODE) || (std::string("Attributes").compare((const char*) attributes->name) != 0))
134  throw asdm::ConversionException ("Could not find the element '/"+T::name()+"Table/Attributes'. Invalid XML header '"+ xmlHeader + "'.", T::name());
135 
136  xmlNode* childOfAttributes = attributes->children;
137 
138  while ( childOfAttributes != 0 && (childOfAttributes->type == XML_ELEMENT_NODE) ) {
139  attributesSeq.push_back(std::string((const char*) childOfAttributes->name));
140  childOfAttributes = childOfAttributes->next;
141  }
142  }
143 
144  skipUntilEmptyLine(10);
145 
146  // Create an EndianIFStream from the substring containing the binary part.
147  eifs = asdm::EndianIFStream (&tableFile, byteOrder);
148 
150 
151  // We do nothing with that but we have to read it.
152  asdm::Entity containerEntity = Entity::fromBin((EndianIStream &)eifs);
153 
154  // Let's read numRows but ignore it and rely on the value specified in the ASDM.xml file.
155  ((EndianIStream &)eifs).readInt();
156 
157  // Memorize the starting point of rows.
158  whereRowsStart = tableFile.tellg();
159 
160  // find where the rows end, seek to near the end
161  tableFile.seekg(fileSizeInBytes-100);
162  // the accumulateUntilBoundary looks at "lines", but it may be starting
163  // from inside the binary part where null values might be found
164  // At most, there might be 100 bytes of null, or 100 lines. So limit
165  // the search to 100 lines before giving up.
166  std::string lastPart = accumulateUntilBoundary(boundary_1, 100);
167 
168  // the full size of the boundary and anything after it
169  endBoundarySizeInBytes = 100 - lastPart.size();
170 
171  // reset back to start of rows
172  tableFile.seekg(whereRowsStart);
173 
174  // Update the state
176  }
177 
182  void reset() {
183  checkState(T_RESET, "TableStreamReader::reset");
184  clear();
185  tableFile.seekg(whereRowsStart);
186  }
187 
194  const std::vector<R*>& nextNRows(unsigned int nRows) {
195  checkState(T_READ, "TableStreamReader::nextNRows");
196  clear();
197  unsigned int nread = 0;
198  T& tableRef = (T&) asdm.getTable(T::name());
199  while ( hasRows() && nread < nRows ) {
200  rows.push_back(R::fromBin((EndianIStream&) eifs, tableRef, attributesSeq));
201  nread++;
202  }
203  return rows;
204  }
205 
214  const std::vector<R*>& untilNBytes(unsigned int nBytes) {
215  checkState(T_READ, "TableStreamReader::untilNBytes");
216  clear();
217  off_t whereAmI = tableFile.tellg();
218  if (!hasRows()) return rows;
219 
220  T& tableRef = (T&) asdm.getTable(T::name());
221  do {
222  rows.push_back(R::fromBin((EndianIStream&) eifs, tableRef , attributesSeq));
223  } while (((tableFile.tellg() - whereAmI) < nBytes) && hasRows());
224 
225  return rows;
226  }
227 
231  bool hasRows() {
232  checkState(T_CHECK, "TableStreamReader::hasRows");
233  return tableFile.tellg() < (fileSizeInBytes - endBoundarySizeInBytes);
234  }
235 
239  void close() {
240  checkState(T_CLOSE, "TableStreamReader::close");
241  clear();
242  if (tableFile.is_open()) tableFile.close();
243  free(readBuffer);
244  // Update the state.
246  }
247 
248  private:
249  std::string tablePath;
250  std::ifstream tableFile;
251  std::string currentLine;
252  std::string boundary_1;
253 
257  std::vector<std::string> attributesSeq;
259  std::vector<R*> rows;
260 
261  char* readBuffer;
262 
263  std::streampos whereRowsStart;
264 
268 
269  void checkState(Transition t, const std::string& methodName) const {
270  switch (currentState) {
271  case S_CLOSED:
272  if (t == T_OPEN) return;
273 
274  case S_OPENED:
275  if (t == T_CHECK || t == T_RESET || t == T_READ || t == T_CLOSE) return;
276  }
277  throw asdm::ConversionException("Invalid call of method '" + methodName + "' in the current context.", T::name());
278  }
282  void clear() {
283  for (unsigned int i = 0; i < rows.size(); i++)
284  if (rows[i]) delete rows[i];
285  rows.clear();
286  }
287 
288  void skipUntilEmptyLine(int maxSkips) {
289  // cout << "Entering skipUntilEmptyLine" << std::endl;
290  int numSkips = 0;
291  std::string line;
292  do {
293 #ifndef WITHOUT_BOOST
294  // not sure where this trim is coming from
295  line = trim(nextLine());
296 #else
297  // using function in Misc.h
298  line = nextLine();
299  trim(line);
300 #endif
301  numSkips++;
302  } while (line.size() != 0 && numSkips <= maxSkips);
303 
304  if (numSkips > maxSkips) {
305  std::ostringstream oss;
306  oss << "could not find an empty line is less than " << maxSkips + 1 << " lines." << std::endl;
307  throw asdm::ConversionException(oss.str(), T::name());
308  }
309  // cout << "Exiting skipUntilEmptyLine" << std::endl;
310  }
311 
312  std::string nextLine() {
313  unsigned long long whereAmI = tableFile.tellg();
314  getline(tableFile, currentLine);
315  if (tableFile.fail()) {
316  std::ostringstream oss ;
317  oss << "TableStreamReader::nextLine() : I could not read a line in '" << tablePath << "' at position " << whereAmI << ".";
318  throw asdm::ConversionException(oss.str(), T::name());
319  }
320  // cout << "nextLine has read '" << currentLine << "'" << std::endl;
321  return currentLine;
322  }
323 
324  std::pair<std::string, std::string> headerField2Pair(const std::string& hf){
325  std::string name, value;
326  size_t colonIndex = hf.find(":");
327  if (colonIndex == std::string::npos)
328  throw asdm::ConversionException(" could not detect a well formed MIME header field in '"+hf+"'", T::name());
329 
330  if (colonIndex > 0) {
331  name = hf.substr(0, colonIndex);
332  trim(name);
333  }
334 
335  if (colonIndex < hf.size()) {
336  value = hf.substr(colonIndex+1);
337  trim(value);
338  }
339 
340  return make_pair(name, value);
341 }
342 
343 std::string requireMIMEHeader() {
344  // MIME-Version
345  std::pair<std::string, std::string>name_value(headerField2Pair(nextLine()));
346  // cout << name_value.first << "=" << name_value.second << std::endl;
347  // if (currentLine != "MIME-Version: 1.0") // a work around for the case when the very first character is not the expected "M" (happened with some corrupted data).
348 #ifndef WITHOUT_BOOST
349  if (! boost::algorithm::iends_with(currentLine, "IME-Version: 1.0"))
350 #else
351  std::string versionEnd = "IME-Version: 1.0";
352  if ((currentLine.size()<=versionEnd.size()) || (currentLine.compare((currentLine.size()-versionEnd.size()),versionEnd.size(),versionEnd)!=0))
353 #endif
354  throw asdm::ConversionException("'MIME-Version: 1.0' missing at the very beginning of the file '"+ tablePath +"'.", T::name());
355 
356  // Content-Type
358 
359  // cout << "boundary_1 =" << boundary_1 << std::endl;
360 
361  // Content-Description
362  //name_value = requireHeaderField("CONTENT-DESCRIPTION");
363 
364  // Content-Location
365  //name_value = requireHeaderField("CONTENT-LOCATION");
366 
367  // Look for an empty line in the at most 10 subsequent lines.
368  skipUntilEmptyLine(20);
369 
370  return boundary_1;
371 }
372 
373 std::pair<std::string, std::string> requireHeaderField(const std::string & hf) {
374 #ifndef WITHOUT_BOOST
375  std::string s = boost::trim_copy(nextLine());
376  while (boost::algorithm::iends_with(s, ";")) {
377  s += boost::trim_copy(nextLine());
378  }
379 #else
380  std::string s = asdm::trim_copy(nextLine());
381  while (s.back()==';') {
382  s += asdm::trim_copy(nextLine());
383  }
384 #endif
385 
386  std::pair<std::string, std::string> hf2pair(headerField2Pair(s));
387 
388 #ifndef WITHOUT_BOOST
389  if (boost::algorithm::to_upper_copy(hf2pair.first) != hf)
390  throw asdm::ConversionException("read '" + currentLine + "'. Was expecting '" + hf + "'...", T::name());
391 #else
392  if (asdm::str_toupper(hf2pair.first) != hf)
393  throw asdm::ConversionException("read '" + currentLine + "'. Was expecting '" + hf + "'...", T::name());
394 #endif
395  return hf2pair;
396 }
397 
398 void requireBoundary(const std::string& boundary, int maxLines) {
399  // cout << "Entering require boundary with boundary == '" << boundary << "' and maxLines = " << maxLines << std::endl;
400  int numLines = 0;
401  std::string dashdashBoundary = "--"+boundary;
402  std::string line = nextLine();
403  while ((numLines <= maxLines) && (line.compare(dashdashBoundary) != 0)) {
404  numLines++;
405  line = nextLine();
406  }
407 
408  if (numLines > maxLines) {
409  std::ostringstream oss;
410  oss << "could not find the boundary std::string '"<< boundary << "' in less than " << maxLines + 1 << " lines." << std::endl;
411  throw asdm::ConversionException(oss.str(), T::name());
412  }
413 }
414 
415 std::string accumulateUntilBoundary(const std::string& boundary, int maxLines) {
416  // cout << "Entering accumulateUntilBoundary with maxLines = " << maxLines << std::endl;
417  int numLines = 0;
418  std::string line ;
419  std::string result;
420 
421 #ifndef WITHOUT_BOOST
422  // not sure where this trim is coming from
423  line=trim(nextLine());
424 #else
425  // using function in Misc.h
426  line = nextLine();
427  trim(line);
428 #endif
429 
430  while ( numLines <= maxLines && line.find("--"+boundary) == std::string::npos ) {
431  result += line;
432 #ifndef WITHOUT_BOOST
433  line=trim(nextLine());
434 #else
435  line = nextLine();
436  trim(line);
437 #endif
438  numLines++;
439  }
440 
441  if (numLines > maxLines) {
442  std::ostringstream oss;
443  oss << "could not find the boundary std::string '"<< boundary << "' in less than " << maxLines + 1 << " lines." << std::endl;
444  throw asdm::ConversionException(oss.str(), T::name());
445  }
446  return result;
447 }
448 
449 std::string requireBoundaryInCT(const std::string& ctValue) {
450  std::vector<std::string> cvValueItems;
451 
452 #ifndef WITHOUT_BOOST
453  boost::algorithm::split (cvValueItems, ctValue, boost::algorithm::is_any_of(";"));
454 #else
455  asdm::strsplit(ctValue,';',cvValueItems);
456 #endif
457  std::vector<std::string> cvValueItemsNameValue;
458  for ( std::vector<std::string>::const_iterator iter = cvValueItems.begin(); iter != cvValueItems.end() ; iter++ ) {
459  cvValueItemsNameValue.clear();
460 #ifndef WITHOUT_BOOST
461  boost::algorithm::split(cvValueItemsNameValue, *iter, boost::algorithm::is_any_of("="));
462  std::string boundary;
463  if ((cvValueItemsNameValue.size() > 1) && (boost::to_upper_copy(boost::trim_copy(cvValueItemsNameValue[0])) == "BOUNDARY") && (unquote(cvValueItemsNameValue[1], boundary).size() > 0))
464  return boundary;
465 #else
466  asdm::strsplit(*iter,'=',cvValueItemsNameValue);
467  std::string boundary;
468  if ((cvValueItemsNameValue.size() > 1) && (asdm::str_toupper(asdm::trim_copy(cvValueItemsNameValue[0])) == "BOUNDARY") && (unquote(cvValueItemsNameValue[1], boundary).size() > 0))
469  return boundary;
470 #endif
471  }
472 throw asdm::ConversionException("could not find a boundary definition in '" + ctValue + "'.", T::name());
473 }
474 
475 std::string unquote(const std::string& s, std::string& unquoted) {
476  if (s.size() >= 2) {
477  if (((s.at(0) == '"') && (s.at(s.size()-1) == '"')) || ((s.at(0) == '\'') && (s.at(s.size()-1) == '\''))) {
478  if (s.size() == 2) {
479  unquoted = "";
480  } else {
481  unquoted = s.substr(1, s.size() - 2);
482  }
483  } else {
484  unquoted = s;
485  }
486  } else {
487  unquoted = s;
488  }
489  return unquoted;
490 }
491 };
492 } // end namespace asdm
493 #endif
Elements::const_iterator const_iterator
struct _xmlDoc xmlDoc
Definition: Misc.h:59
std::vector< std::string > attributesSeq
A class to represent byte order information.
Definition: Misc.h:115
std::pair< std::string, std::string > headerField2Pair(const std::string &hf)
void close()
Releases all the resources allocated to the instance which can be reused with another file...
const std::vector< R * > & nextNRows(unsigned int nRows)
Reads at most n rows in the file, creates as many memory representations of these rows and return the...
The ASDM class is the container for all tables.
Definition: ASDM.h:273
#define READBUFFERSIZE
std::vector< casacore::String > split(const casacore::String &string, const casacore::String &splitter, bool ignoreConsecutiveSplitters=false)
void checkState(Transition t, const std::string &methodName) const
ABSTRACT CLASSES Abstract class for colors Any implementation of color should be able to provide a hexadecimal form of the if a human readable name(i.e."black").In many places throughout the plotter
void skipUntilEmptyLine(int maxSkips)
virtual ~TableStreamReader()
The destructor.
static const ByteOrder * fromString(const std::string &s)
Convert a string to a const ByteOrder*.
bool hasRows()
Returns true if the end of the file has not been reached.
TableExprNode trim(const TableExprNode &node)
Definition: ExprNode.h:1541
Representable & getTable(std::string tableName)
Return the table, as a Representable object, with the specified name.
std::string unquote(const std::string &s, std::string &unquoted)
The ConversionException class represents an exception when an error occurs in converting a table to i...
std::string requireBoundaryInCT(const std::string &ctValue)
asdm::EndianIFStream eifs
const Double second
Time interval [T]:
struct _xmlNode xmlNode
Definition: Misc.h:55
free(pool)
void reset()
Repositions the read head to the beginning of the table.
TableStreamReader()
An empty constructor.
std::string accumulateUntilBoundary(const std::string &boundary, int maxLines)
const std::vector< R * > & untilNBytes(unsigned int nBytes)
Reads as many rows as possible in the file, keeps their in memory representation until the number of ...
static Entity fromBin(EndianIStream &eis)
Read the binary representation of an Enity from a EndianIStream and use the read value to set an Enti...
static const ByteOrder * Big_Endian
Definition: Misc.h:118
void open(const std::string &directory)
Opens a file expected to contain an ASDM table of type T with rows of type R.
A generic class to read a file containing an ASDM table as a stream.
void clear()
Empty the local storage containing the rows obtained during the last read operation.
std::pair< std::string, std::string > requireHeaderField(const std::string &hf)
void requireBoundary(const std::string &boundary, int maxLines)
std::vector< R * > rows
LatticeExprNode value(const LatticeExprNode &expr)
This function returns the value of the expression without a mask.
The Entity class is an identification of a persistant entity in the ALMA archive. ...
Definition: Entity.h:59