Line data Source code
1 : // -*- mode: c++ -*-
2 : //# Copyright (C) 1996,1997,1998,1999,2000,2002,2003,2015
3 : //# Associated Universities, Inc. Washington DC, USA.
4 : //#
5 : //# This library is free software; you can redistribute it and/or modify it
6 : //# under the terms of the GNU Library General Public License as published by
7 : //# the Free Software Foundation; either version 2 of the License, or (at your
8 : //# option) any later version.
9 : //#
10 : //# This library is distributed in the hope that it will be useful, but WITHOUT
11 : //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 : //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 : //# License for more details.
14 : //#
15 : //# You should have received a copy of the GNU Library General Public License
16 : //# along with this library; if not, write to the Free Software Foundation,
17 : //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 : //#
19 : //# Correspondence concerning AIPS++ should be addressed as follows:
20 : //# Internet email: aips2-request@nrao.edu.
21 : //# Postal address: AIPS++ Project Office
22 : //# National Radio Astronomy Observatory
23 : //# 520 Edgemont Road
24 : //# Charlottesville, VA 22903-2475 USA
25 : //#
26 : //
27 : // Base class of data providers based on casacore::StatsDataProvider, backed by a
28 : // VisibilityIterator2 instances.
29 : //
30 : #ifndef MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
31 : #define MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
32 :
33 : #include <casacore/casa/aips.h>
34 : #include <casacore/casa/Arrays/Array.h>
35 : #include <casacore/ms/MeasurementSets/MSMainEnums.h>
36 : #include <msvis/MSVis/VisibilityIterator2.h>
37 : #include <msvis/MSVis/VisBufferComponents2.h>
38 : #include <msvis/MSVis/statistics/Vi2StatsFlagsIterator.h>
39 : #include <msvis/MSVis/statistics/Vi2StatsWeightsIterator.h>
40 : #include <msvis/MSVis/statistics/Vi2StatsSigmasIterator.h>
41 : #include <msvis/MSVis/statistics/Vi2StatisticsIteratee.h>
42 : #include <casacore/scimath/StatsFramework/StatisticsAlgorithm.h>
43 : #include <casacore/scimath/StatsFramework/StatsDataProvider.h>
44 : #include <memory>
45 : #include <vector>
46 : #include <unordered_map>
47 : #include <unordered_set>
48 : #include <set>
49 : #include <cassert>
50 :
51 : namespace casa {
52 :
53 : // casacore::StatsDataProvider template class backed by VisibilityIterator2
54 : // instances. These data providers operate on a single casacore::MS column over
55 : // all vi2 sub-chunks in a dataset composed of one or more chunks selected when
56 : // the data provider is instantiated. In other words, the data sample for
57 : // statistics generated with a Vi2DataProvider instance is the data from a
58 : // single column in a single dataset of consecutive vi2 chunks. It is intended
59 : // that the user set up the VisibilityIterator2 appropriately to select the
60 : // desired data sample for computing statistics. Consecutive chunks produced by
61 : // an iterator may be merged by the data provider to produce a single
62 : // dataset. The iteration over an MS, and the computation of statistics on each
63 : // dataset driven using a Vi2StatisticsIteratee instance, may be done as
64 : // follows:
65 : //
66 : // Vi2DataProvider *dp; // given
67 : // casacore::StatisticsAlgorithm statistics; // given
68 : // class DoStuff : public Vi2StatisticsIteratee {
69 : // ... // constructor, probably needs some sort of accumulator
70 : // void nextDataset(casacore::StatisticsAlgorithm stats,
71 : // const std::unordered_map<int,std::string> *colVals) {
72 : // stats.getStatistics()...;
73 : // }
74 : // }
75 : // DoStuff doStuff;
76 : // dp->foreachDataset(statistics, doStuff);
77 : //
78 : // Note that the AccumType template parameter value of
79 : // casacore::StatsDataProvider is derived from the DataIterator parameter value
80 : // of this template, imposing a constraint on the classes that can be used for
81 : // DataIterator.
82 : //
83 : template <class DataIterator, class MaskIterator, class WeightsIterator>
84 : class Vi2DataProvider
85 : : public casacore::StatsDataProvider<typename DataIterator::AccumType,
86 : DataIterator,
87 : MaskIterator,
88 : WeightsIterator> {
89 :
90 : public:
91 : // Define typedefs for some template parameters. This is primarily to
92 : // support instantiating "...Statistics" instances of the proper type given
93 : // only an instance of this type. [AccumType is an exception, in that it's
94 : // also needed by the (macro) definition of DataRanges.]
95 : typedef typename DataIterator::AccumType AccumType;
96 : typedef DataIterator DataIteratorType;
97 : typedef MaskIterator MaskIteratorType;
98 : typedef WeightsIterator WeightsIteratorType;
99 :
100 114 : Vi2DataProvider(
101 : vi::VisibilityIterator2 *vi2,
102 : const std::set<casacore::MSMainEnums::PredefinedColumns>
103 : &mergedColumns_,
104 : vi::VisBufferComponent2 component,
105 : casacore::Bool omit_flagged_data,
106 : casacore::Bool use_data_weights)
107 : : vi2(vi2)
108 : , component(component)
109 : , use_data_weights(use_data_weights)
110 570 : , omit_flagged_data(omit_flagged_data) {
111 :
112 : // Attach the casacore::MS columns to vi2 by calling originChunks(). Not
113 : // the most direct method, but attaching the columns is required in many
114 : // cases to pass existsColumn() test.
115 114 : vi2->originChunks();
116 114 : if (!vi2->existsColumn(component))
117 0 : throw casacore::AipsError("Column is not present");
118 :
119 372 : for (auto &&i : mergedColumns_)
120 258 : mergedColumns.insert(columnNames.at(i));
121 114 : }
122 :
123 : Vi2DataProvider(Vi2DataProvider&& other)
124 : : vi2(other.vi2)
125 : , mergedColumns(other.mergedColumns)
126 : , datasetIndex(other.datasetIndex)
127 : , datasetChunkOrigin(other.datasetChunkOrigin)
128 : , followingChunkDatasetIndex(other.followingChunkDatasetIndex)
129 : , currentChunk(other.currentChunk)
130 : , component(other.component)
131 : , use_data_weights(other.use_data_weights)
132 : , omit_flagged_data(other.omit_flagged_data)
133 : , data_iterator(other.data_iterator)
134 : , weights_iterator(other.weights_iterator)
135 : , mask_iterator(other.mask_iterator) {
136 : other.vi2 = nullptr;
137 : }
138 :
139 : Vi2DataProvider& operator=(Vi2DataProvider&& other) {
140 : vi2 = other.vi2;
141 : mergedColumns = other.mergedColumns;
142 : datasetIndex = other.datasetIndex;
143 : datasetChunkOrigin = other.datasetChunkOrigin;
144 : followingChunkDatasetIndex = other.followingChunkDatasetIndex;
145 : currentChunk = other.currentChunk;
146 : component = other.component;
147 : use_data_weights = other.use_data_weights;
148 : omit_flagged_data = other.omit_flagged_data;
149 : data_iterator = other.data_iterator;
150 : weights_iterator = other.weights_iterator;
151 : mask_iterator = other.mask_iterator;
152 : other.vi2 = nullptr;
153 : return *this;
154 : }
155 :
156 : // Increment the data provider to the sub-chunk within the dataset.
157 87396 : void operator++() {
158 87396 : if (atEnd())
159 : throw casacore::AipsError(
160 0 : "Data provider increment beyond end of dataset");
161 87396 : vi2->next();
162 87396 : reset_iterators();
163 87396 : if (!vi2->more() && followingChunkDatasetIndex == datasetIndex)
164 60 : nextDatasetChunk();
165 87396 : }
166 :
167 : // Is this the last sub-chunk within the dataset?
168 174792 : casacore::Bool atEnd() const {
169 174792 : return followingChunkDatasetIndex != datasetIndex && !vi2->more();
170 : }
171 :
172 : // Take any actions necessary to finalize the provider. This will be called
173 : // when atEnd() returns true.
174 414 : void finalize() {};
175 :
176 87396 : casacore::uInt64 getCount() {
177 87396 : return getData().getCount();
178 : }
179 :
180 : // Get the current sub-chunk
181 174792 : DataIterator getData() {
182 174792 : if (!data_iterator)
183 87396 : data_iterator =
184 87396 : std::unique_ptr<DataIterator>(new DataIterator(dataArray()));
185 174792 : return *data_iterator;
186 : }
187 :
188 87396 : casacore::uInt getStride() {
189 87396 : return 1;
190 : };
191 :
192 87396 : casacore::Bool hasMask() const {
193 87396 : return omit_flagged_data;
194 : };
195 :
196 : // Get the associated mask of the current sub-chunk. Only called if
197 : // hasMask() returns true.
198 86316 : MaskIterator getMask() {
199 86316 : if (!mask_iterator)
200 86316 : mask_iterator = std::unique_ptr<MaskIterator>(
201 86316 : new MaskIterator(vi2->getVisBuffer()));
202 86316 : return *mask_iterator;
203 : };
204 :
205 : // Get the stride for the current mask (only called if hasMask() returns
206 : // true). Will always return 1 in this implementation.
207 86316 : casacore::uInt getMaskStride() {
208 86316 : return 1;
209 : };
210 :
211 87396 : casacore::Bool hasWeights() const {
212 87396 : return use_data_weights;
213 : };
214 :
215 : // Get the associated weights of the current sub-chunk. Only called if
216 : // hasWeights() returns true;
217 0 : WeightsIterator getWeights() {
218 0 : if (!weights_iterator)
219 0 : weights_iterator = std::unique_ptr<WeightsIterator>(
220 0 : new WeightsIterator(vi2->getVisBuffer()));
221 0 : return *weights_iterator;
222 : };
223 :
224 87396 : casacore::Bool hasRanges() const {
225 87396 : return false; // TODO: is this correct in all cases?
226 : };
227 :
228 : // Get the associated range(s) of the current sub-chunk. Only called if
229 : // hasRanges() returns true, which will never be the case in this
230 : // implementation.
231 : DataRanges
232 0 : getRanges() {
233 0 : DataRanges result;
234 0 : return result;
235 : };
236 :
237 : // If the associated data set has ranges, are these include (return true) or
238 : // exclude (return false) ranges?
239 0 : casacore::Bool isInclude() const {
240 0 : return false;
241 : };
242 :
243 : // Reset the provider to point to the first sub-chunk of the dataset.
244 414 : void reset() {
245 414 : if (currentChunk != datasetChunkOrigin) {
246 8 : vi2->originChunks();
247 8 : currentChunk = 0;
248 8 : initChunk();
249 8 : updateFollowingChunkDatasetIndex();
250 8 : while (currentChunk != datasetChunkOrigin)
251 0 : nextDatasetChunk();
252 : } else {
253 406 : initChunk();
254 406 : updateFollowingChunkDatasetIndex();
255 : }
256 414 : }
257 :
258 : void foreachDataset(
259 : casacore::StatisticsAlgorithm<AccumType,DataIteratorType,MaskIteratorType,WeightsIteratorType>& statistics,
260 : Vi2StatisticsIteratee<DataIterator,WeightsIterator,MaskIterator>& iteratee) {
261 :
262 : datasetIndex = -1;
263 : followingChunkDatasetIndex = 0;
264 : while (nextDataset()) {
265 : std::unique_ptr<std::unordered_map<int,std::string> >
266 : columnValues(mkColumnValues());
267 : statistics.setDataProvider(this);
268 : iteratee.nextDataset(statistics, columnValues.get());
269 : };
270 : }
271 :
272 : protected:
273 :
274 : std::unique_ptr<vi::VisibilityIterator2> vi2;
275 :
276 : vi::VisBufferComponent2 component;
277 :
278 : std::unique_ptr<const DataIterator> data_iterator;
279 :
280 : const casacore::Bool use_data_weights;
281 :
282 : std::unique_ptr<const WeightsIterator> weights_iterator;
283 :
284 : const casacore::Bool omit_flagged_data;
285 :
286 : std::unique_ptr<const MaskIterator> mask_iterator;
287 :
288 : int datasetIndex;
289 :
290 : unsigned datasetChunkOrigin;
291 :
292 : int followingChunkDatasetIndex;
293 :
294 : unsigned currentChunk;
295 :
296 : std::unordered_set<string> mergedColumns;
297 :
298 : private:
299 :
300 : void
301 87870 : reset_iterators() {
302 87870 : data_iterator.reset();
303 87870 : weights_iterator.reset();
304 87870 : mask_iterator.reset();
305 87870 : }
306 :
307 : void
308 60 : nextDatasetChunk() {
309 60 : vi2->nextChunk();
310 60 : ++currentChunk;
311 60 : if (vi2->moreChunks())
312 60 : initChunk();
313 60 : updateFollowingChunkDatasetIndex();
314 60 : }
315 :
316 : bool
317 : nextDataset() {
318 : ++datasetIndex;
319 : assert(followingChunkDatasetIndex == datasetIndex);
320 : if (datasetIndex == 0) {
321 : vi2->originChunks();
322 : currentChunk = 0;
323 :
324 : } else {
325 : vi2->nextChunk();
326 : ++currentChunk;
327 : }
328 : if (vi2->moreChunks())
329 : initChunk();
330 : updateFollowingChunkDatasetIndex();
331 : datasetChunkOrigin = currentChunk;
332 : return vi2->moreChunks();
333 : }
334 :
335 : void
336 474 : initChunk() {
337 474 : vi2->origin();
338 474 : reset_iterators();
339 474 : }
340 :
341 : virtual const casacore::Array<typename DataIterator::DataType>& dataArray() = 0;
342 :
343 : void
344 474 : updateFollowingChunkDatasetIndex() {
345 474 : if (!vi2->moreChunks() || mergedColumns.count(vi2->keyChange()) == 0)
346 414 : followingChunkDatasetIndex = datasetIndex + 1;
347 : else
348 60 : followingChunkDatasetIndex = datasetIndex;
349 474 : }
350 :
351 : std::unique_ptr<std::unordered_map<int,std::string> >
352 : mkColumnValues() {
353 : const vi::VisBuffer2 *vb = vi2->getVisBuffer();
354 : return std::unique_ptr<std::unordered_map<int,std::string> >(
355 : new std::unordered_map<int,std::string> {
356 : {casacore::MSMainEnums::PredefinedColumns::ARRAY_ID,
357 : "ARRAY_ID=" + std::to_string(vb->arrayId()[0])},
358 : {casacore::MSMainEnums::PredefinedColumns::FIELD_ID,
359 : "FIELD_ID=" + std::to_string(vb->fieldId()[0])},
360 : {casacore::MSMainEnums::PredefinedColumns::DATA_DESC_ID,
361 : "DATA_DESC_ID="
362 : + std::to_string(vb->dataDescriptionIds()[0])},
363 : {casacore::MSMainEnums::PredefinedColumns::TIME,
364 : "TIME="
365 : + std::to_string(vb->time()[0] - vb->timeInterval()[0] / 2)},
366 : {casacore::MSMainEnums::PredefinedColumns::SCAN_NUMBER,
367 : "SCAN_NUMBER=" + std::to_string(vb->scan()[0])},
368 : {casacore::MSMainEnums::PredefinedColumns::STATE_ID,
369 : "STATE_ID=" + std::to_string(vb->stateId()[0])}
370 : });
371 : }
372 :
373 : std::map<casacore::MSMainEnums::PredefinedColumns,std::string> columnNames = {
374 : {casacore::MSMainEnums::PredefinedColumns::ARRAY_ID, "ARRAY_ID"},
375 : {casacore::MSMainEnums::PredefinedColumns::FIELD_ID, "FIELD_ID"},
376 : {casacore::MSMainEnums::PredefinedColumns::DATA_DESC_ID, "DATA_DESC_ID"},
377 : {casacore::MSMainEnums::PredefinedColumns::TIME, "TIME"}};
378 : };
379 :
380 : // casacore::Data provider template for row-based casacore::MS columns (i.e, not visibilities) using
381 : // the 'weights' column for data weights. In most instances, you would not
382 : // weight the data in these columns, but the Vi2DataProvider template
383 : // requires that a WeightsIterator be provided.
384 : template<class DataIterator>
385 : using Vi2WeightsRowDataProvider =
386 : Vi2DataProvider<
387 : DataIterator,Vi2StatsFlagsRowIterator,Vi2StatsWeightsRowIterator>;
388 :
389 : // casacore::Data provider template for row-based casacore::MS columns (i.e, not visibilities) using
390 : // the 'sigma' column for data weights (appropriately transformed). In most
391 : // instances, you would not weight the data in these columns, but the
392 : // Vi2DataProvider template requires that a WeightsIterator be provided.
393 : template<class DataIterator>
394 : using Vi2SigmasRowDataProvider =
395 : Vi2DataProvider<
396 : DataIterator,Vi2StatsFlagsRowIterator,Vi2StatsSigmasRowIterator>;
397 :
398 : // casacore::Data provider template for cube-based casacore::MS columns (i.e, the visibilities)
399 : // using the 'weights' column for data weights.
400 : template<class DataIterator>
401 : using Vi2WeightsCubeDataProvider =
402 : Vi2DataProvider<
403 : DataIterator,Vi2StatsFlagsCubeIterator,Vi2StatsWeightsCubeIterator>;
404 :
405 : // casacore::Data provider template for cube-based casacore::MS columns (i.e, the visibilities)
406 : // using the 'sigma' column for data weights (appropriately transformed).
407 : template<class DataIterator>
408 : using Vi2SigmasCubeDataProvider =
409 : Vi2DataProvider<
410 : DataIterator,Vi2StatsFlagsCubeIterator,Vi2StatsSigmasCubeIterator>;
411 :
412 : } // end namespace casa
413 :
414 : #endif // MSVIS_STATISTICS_VI2_DATA_PROVIDER_H_
|