casa  5.7.0-16
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Vi2DataProvider.h
Go to the documentation of this file.
1 // -*- mode: c++ -*-
2 //# Copyright (C) 1996,1997,1998,1999,2000,2002,2003,2015
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //
27 // Base class of data providers based on casacore::StatsDataProvider, backed by a
28 // VisibilityIterator2 instances.
29 //
30 #ifndef MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
31 #define MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
32 
33 #include <casacore/casa/aips.h>
44 #include <memory>
45 #include <vector>
46 #include <unordered_map>
47 #include <unordered_set>
48 #include <set>
49 #include <cassert>
50 
51 namespace casa {
52 
53 // casacore::StatsDataProvider template class backed by VisibilityIterator2
54 // instances. These data providers operate on a single casacore::MS column over
55 // all vi2 sub-chunks in a dataset composed of one or more chunks selected when
56 // the data provider is instantiated. In other words, the data sample for
57 // statistics generated with a Vi2DataProvider instance is the data from a
58 // single column in a single dataset of consecutive vi2 chunks. It is intended
59 // that the user set up the VisibilityIterator2 appropriately to select the
60 // desired data sample for computing statistics. Consecutive chunks produced by
61 // an iterator may be merged by the data provider to produce a single
62 // dataset. The iteration over an MS, and the computation of statistics on each
63 // dataset driven using a Vi2StatisticsIteratee instance, may be done as
64 // follows:
65 //
66 // Vi2DataProvider *dp; // given
67 // casacore::StatisticsAlgorithm statistics; // given
68 // class DoStuff : public Vi2StatisticsIteratee {
69 // ... // constructor, probably needs some sort of accumulator
70 // void nextDataset(casacore::StatisticsAlgorithm stats,
71 // const std::unordered_map<int,std::string> *colVals) {
72 // stats.getStatistics()...;
73 // }
74 // }
75 // DoStuff doStuff;
76 // dp->foreachDataset(statistics, doStuff);
77 //
78 // Note that the AccumType template parameter value of
79 // casacore::StatsDataProvider is derived from the DataIterator parameter value
80 // of this template, imposing a constraint on the classes that can be used for
81 // DataIterator.
82 //
83 template <class DataIterator, class MaskIterator, class WeightsIterator>
85  : public casacore::StatsDataProvider<typename DataIterator::AccumType,
86  DataIterator,
87  MaskIterator,
88  WeightsIterator> {
89 
90 public:
91  // Define typedefs for some template parameters. This is primarily to
92  // support instantiating "...Statistics" instances of the proper type given
93  // only an instance of this type. [AccumType is an exception, in that it's
94  // also needed by the (macro) definition of DataRanges.]
95  typedef typename DataIterator::AccumType AccumType;
96  typedef DataIterator DataIteratorType;
97  typedef MaskIterator MaskIteratorType;
98  typedef WeightsIterator WeightsIteratorType;
99 
102  const std::set<casacore::MSMainEnums::PredefinedColumns>
103  &mergedColumns_,
107  : vi2(vi2)
108  , component(component)
109  , use_data_weights(use_data_weights)
110  , omit_flagged_data(omit_flagged_data) {
111 
112  // Attach the casacore::MS columns to vi2 by calling originChunks(). Not
113  // the most direct method, but attaching the columns is required in many
114  // cases to pass existsColumn() test.
115  vi2->originChunks();
116  if (!vi2->existsColumn(component))
117  throw casacore::AipsError("Column is not present");
118 
119  for (auto &&i : mergedColumns_)
120  mergedColumns.insert(columnNames.at(i));
121  }
122 
124  : vi2(other.vi2)
126  , datasetIndex(other.datasetIndex)
129  , currentChunk(other.currentChunk)
130  , component(other.component)
135  , mask_iterator(other.mask_iterator) {
136  other.vi2 = nullptr;
137  }
138 
140  vi2 = other.vi2;
141  mergedColumns = other.mergedColumns;
142  datasetIndex = other.datasetIndex;
143  datasetChunkOrigin = other.datasetChunkOrigin;
144  followingChunkDatasetIndex = other.followingChunkDatasetIndex;
145  currentChunk = other.currentChunk;
146  component = other.component;
147  use_data_weights = other.use_data_weights;
148  omit_flagged_data = other.omit_flagged_data;
149  data_iterator = other.data_iterator;
150  weights_iterator = other.weights_iterator;
151  mask_iterator = other.mask_iterator;
152  other.vi2 = nullptr;
153  return *this;
154  }
155 
156  // Increment the data provider to the sub-chunk within the dataset.
157  void operator++() {
158  if (atEnd())
159  throw casacore::AipsError(
160  "Data provider increment beyond end of dataset");
161  vi2->next();
162  reset_iterators();
163  if (!vi2->more() && followingChunkDatasetIndex == datasetIndex)
165  }
166 
167  // Is this the last sub-chunk within the dataset?
169  return followingChunkDatasetIndex != datasetIndex && !vi2->more();
170  }
171 
172  // Take any actions necessary to finalize the provider. This will be called
173  // when atEnd() returns true.
174  void finalize() {};
175 
177  return getData().getCount();
178  }
179 
180  // Get the current sub-chunk
181  DataIterator getData() {
182  if (!data_iterator)
183  data_iterator =
184  std::unique_ptr<DataIterator>(new DataIterator(dataArray()));
185  return *data_iterator;
186  }
187 
189  return 1;
190  };
191 
193  return omit_flagged_data;
194  };
195 
196  // Get the associated mask of the current sub-chunk. Only called if
197  // hasMask() returns true.
198  MaskIterator getMask() {
199  if (!mask_iterator)
200  mask_iterator = std::unique_ptr<MaskIterator>(
201  new MaskIterator(vi2->getVisBuffer()));
202  return *mask_iterator;
203  };
204 
205  // Get the stride for the current mask (only called if hasMask() returns
206  // true). Will always return 1 in this implementation.
208  return 1;
209  };
210 
212  return use_data_weights;
213  };
214 
215  // Get the associated weights of the current sub-chunk. Only called if
216  // hasWeights() returns true;
217  WeightsIterator getWeights() {
218  if (!weights_iterator)
219  weights_iterator = std::unique_ptr<WeightsIterator>(
220  new WeightsIterator(vi2->getVisBuffer()));
221  return *weights_iterator;
222  };
223 
225  return false; // TODO: is this correct in all cases?
226  };
227 
228  // Get the associated range(s) of the current sub-chunk. Only called if
229  // hasRanges() returns true, which will never be the case in this
230  // implementation.
231  DataRanges
233  DataRanges result;
234  return result;
235  };
236 
237  // If the associated data set has ranges, are these include (return true) or
238  // exclude (return false) ranges?
240  return false;
241  };
242 
243  // Reset the provider to point to the first sub-chunk of the dataset.
244  void reset() {
246  vi2->originChunks();
247  currentChunk = 0;
248  initChunk();
252  } else {
253  initChunk();
255  }
256  }
257 
261 
262  datasetIndex = -1;
264  while (nextDataset()) {
265  std::unique_ptr<std::unordered_map<int,std::string> >
266  columnValues(mkColumnValues());
267  statistics.setDataProvider(this);
268  iteratee.nextDataset(statistics, columnValues.get());
269  };
270  }
271 
272 protected:
273 
274  std::unique_ptr<vi::VisibilityIterator2> vi2;
275 
277 
278  std::unique_ptr<const DataIterator> data_iterator;
279 
281 
282  std::unique_ptr<const WeightsIterator> weights_iterator;
283 
285 
286  std::unique_ptr<const MaskIterator> mask_iterator;
287 
289 
291 
293 
294  unsigned currentChunk;
295 
296  std::unordered_set<string> mergedColumns;
297 
298 private:
299 
300  void
302  data_iterator.reset();
303  weights_iterator.reset();
304  mask_iterator.reset();
305  }
306 
307  void
309  vi2->nextChunk();
310  ++currentChunk;
311  if (vi2->moreChunks())
312  initChunk();
314  }
315 
316  bool
318  ++datasetIndex;
320  if (datasetIndex == 0) {
321  vi2->originChunks();
322  currentChunk = 0;
323 
324  } else {
325  vi2->nextChunk();
326  ++currentChunk;
327  }
328  if (vi2->moreChunks())
329  initChunk();
332  return vi2->moreChunks();
333  }
334 
335  void
337  vi2->origin();
338  reset_iterators();
339  }
340 
342 
343  void
345  if (!vi2->moreChunks() || mergedColumns.count(vi2->keyChange()) == 0)
347  else
349  }
350 
351  std::unique_ptr<std::unordered_map<int,std::string> >
353  const vi::VisBuffer2 *vb = vi2->getVisBuffer();
354  return std::unique_ptr<std::unordered_map<int,std::string> >(
355  new std::unordered_map<int,std::string> {
356  {casacore::MSMainEnums::PredefinedColumns::ARRAY_ID,
357  "ARRAY_ID=" + std::to_string(vb->arrayId()[0])},
358  {casacore::MSMainEnums::PredefinedColumns::FIELD_ID,
359  "FIELD_ID=" + std::to_string(vb->fieldId()[0])},
360  {casacore::MSMainEnums::PredefinedColumns::DATA_DESC_ID,
361  "DATA_DESC_ID="
362  + std::to_string(vb->dataDescriptionIds()[0])},
363  {casacore::MSMainEnums::PredefinedColumns::TIME,
364  "TIME="
365  + std::to_string(vb->time()[0] - vb->timeInterval()[0] / 2)},
366  {casacore::MSMainEnums::PredefinedColumns::SCAN_NUMBER,
367  "SCAN_NUMBER=" + std::to_string(vb->scan()[0])},
368  {casacore::MSMainEnums::PredefinedColumns::STATE_ID,
369  "STATE_ID=" + std::to_string(vb->stateId()[0])}
370  });
371  }
372 
373  std::map<casacore::MSMainEnums::PredefinedColumns,std::string> columnNames = {
374  {casacore::MSMainEnums::PredefinedColumns::ARRAY_ID, "ARRAY_ID"},
375  {casacore::MSMainEnums::PredefinedColumns::FIELD_ID, "FIELD_ID"},
376  {casacore::MSMainEnums::PredefinedColumns::DATA_DESC_ID, "DATA_DESC_ID"},
377  {casacore::MSMainEnums::PredefinedColumns::TIME, "TIME"}};
378 };
379 
380 // casacore::Data provider template for row-based casacore::MS columns (i.e, not visibilities) using
381 // the 'weights' column for data weights. In most instances, you would not
382 // weight the data in these columns, but the Vi2DataProvider template
383 // requires that a WeightsIterator be provided.
384 template<class DataIterator>
388 
389 // casacore::Data provider template for row-based casacore::MS columns (i.e, not visibilities) using
390 // the 'sigma' column for data weights (appropriately transformed). In most
391 // instances, you would not weight the data in these columns, but the
392 // Vi2DataProvider template requires that a WeightsIterator be provided.
393 template<class DataIterator>
396  DataIterator,Vi2StatsFlagsRowIterator,Vi2StatsSigmasRowIterator>;
397 
398 // casacore::Data provider template for cube-based casacore::MS columns (i.e, the visibilities)
399 // using the 'weights' column for data weights.
400 template<class DataIterator>
404 
405 // casacore::Data provider template for cube-based casacore::MS columns (i.e, the visibilities)
406 // using the 'sigma' column for data weights (appropriately transformed).
407 template<class DataIterator>
410  DataIterator,Vi2StatsFlagsCubeIterator,Vi2StatsSigmasCubeIterator>;
411 
412 } // end namespace casa
413 
414 #endif // MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
const casacore::Bool omit_flagged_data
DataIterator DataIteratorType
std::unique_ptr< const DataIterator > data_iterator
casacore::Bool hasWeights() const
Does the current data set have associated weights?
MaskIterator MaskIteratorType
VisibilityIterator2 iterates through one or more readonly MeasurementSets.
vi::VisBufferComponent2 component
Weights iterator over weights cube.
DataIterator getData()
Get the current sub-chunk.
unsigned long long uInt64
Definition: aipsxtype.h:39
virtual const casacore::Vector< casacore::Int > & arrayId() const =0
void operator++()
Increment the data provider to the sub-chunk within the dataset.
DataIterator::AccumType AccumType
Define typedefs for some template parameters.
virtual void nextDataset(casacore::StatisticsAlgorithm< AccumType, DataIterator, MaskIterator, WeightsIterator > &stats, const std::unordered_map< int, std::string > *columnValues)=0
This method will be called by Vi2DataProvider::foreachDataset() for each dataset composed of one or m...
Abstract base class which defines interface for providing &quot;datasets&quot; to the statistics framework in c...
Mask iterator over row flags.
DataRanges getRanges()
Get the associated range(s) of the current sub-chunk.
virtual const casacore::Vector< casacore::Double > & timeInterval() const =0
virtual const casacore::Vector< casacore::Int > & stateId() const =0
virtual const casacore::Matrix&lt;casacore::Float&gt; &amp; sigmaMat () const = 0; // [nC,nR] ...
virtual const casacore::Vector< casacore::Int > & dataDescriptionIds() const =0
virtual casacore::Int dataDescriptionId () const = 0; virtual void setDataDescriptionId (casacore::In...
void finalize()
Take any actions necessary to finalize the provider.
std::unique_ptr< const WeightsIterator > weights_iterator
const casacore::Bool use_data_weights
void reset()
Reset the provider to point to the first sub-chunk of the dataset.
casacore::uInt64 getCount()
get the count of elements in the current data set.
casacore::Bool atEnd() const
Is this the last sub-chunk within the dataset?
Weights iterator over row weights.
virtual void setDataProvider(StatsDataProvider< CASA_STATP > *dataProvider)
instead of setting and adding data &quot;by hand&quot;, set the data provider that will provide all the data se...
This class is used to encapsulate the action(s) taken on a sequence of casacore::StatisticsAlgorithm ...
std::unique_ptr< std::unordered_map< int, std::string > > mkColumnValues()
std::unique_ptr< const MaskIterator > mask_iterator
virtual const casacore::Array< typename DataIterator::DataType > & dataArray()=0
WeightsIterator WeightsIteratorType
virtual const casacore::Vector< casacore::Int > & scan() const =0
bool existsColumn(VisBufferComponent2 id) const
Returns true if the specified column exists.
Vi2DataProvider & operator=(Vi2DataProvider &&other)
#define DataRanges
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
virtual const casacore::Vector< casacore::Int > & fieldId() const =0
Weights iterator over sigma cube.
std::unordered_set< string > mergedColumns
Vi2DataProvider(Vi2DataProvider &&other)
casacore::StatsDataProvider template class backed by VisibilityIterator2 instances.
template &lt;class T, class U&gt; class vector;
Definition: MSFlagger.h:37
std::string to_string(int i)
Definition: StringUtil.h:36
casacore::Bool isInclude() const
If the associated data set has ranges, are these include (return true) or exclude (return false) rang...
Mask iterator over flag cube.
casacore::uInt getMaskStride()
Get the stride for the current mask (only called if hasMask() returns true).
VisBuffer2s encapsulate one chunk of visibility data for processing.
Definition: VisBuffer2.h:141
Base class for all Casacore library errors.
Definition: Error.h:134
void foreachDataset(casacore::StatisticsAlgorithm< AccumType, DataIteratorType, MaskIteratorType, WeightsIteratorType > &statistics, Vi2StatisticsIteratee< DataIterator, WeightsIterator, MaskIterator > &iteratee)
void updateFollowingChunkDatasetIndex()
std::unique_ptr< vi::VisibilityIterator2 > vi2
Weights iterator over row sigmas.
WeightsIterator getWeights()
Get the associated weights of the current sub-chunk.
casacore::uInt getStride()
Get the stride for the current data set.
Vi2DataProvider(vi::VisibilityIterator2 *vi2, const std::set< casacore::MSMainEnums::PredefinedColumns > &mergedColumns_, vi::VisBufferComponent2 component, casacore::Bool omit_flagged_data, casacore::Bool use_data_weights)
std::map< casacore::MSMainEnums::PredefinedColumns, std::string > columnNames
Base class of statistics algorithm class hierarchy.
MaskIterator getMask()
Get the associated mask of the current sub-chunk.
Vi2DataProvider< DataIterator, Vi2StatsFlagsRowIterator, Vi2StatsWeightsRowIterator > Vi2WeightsRowDataProvider
casacore::Data provider template for row-based casacore::MS columns (i.e, not visibilities) using the...
casacore::Bool hasRanges() const
Does the current data set have associated range(s)?
casacore::Bool hasMask() const
Does the current data set have an associated mask?
unsigned int uInt
Definition: aipstype.h:51
virtual const casacore::Vector< casacore::Double > & time() const =0