casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables
make_labelled_ms.py
Go to the documentation of this file.
00001 import numpy
00002 import os
00003 import shutil
00004 import stat
00005 from taskutil import get_global_namespace
00006 
00007 def make_labelled_ms(srcms, outputms, labelbases, ow=False, debug=False,
00008                      whichdatacol='DATA'):
00009     """
00010     Note: This has been more or less obsoleted by label_itered_ms and could
00011     probably be replaced by it after a little testing.
00012     
00013     Transform one measurement set into another with the same (ideally small but
00014     nontrivial) shape and reference frames, but data set to a sequence
00015     according to labelbases.  The output MS is useful for testing tasks or
00016     tools that read and write MSes, since it is more obvious where each bit of
00017     data came from, and what has been done to it.  Think of it as an
00018     alternative to making certain bits of data fluoresce under UV light.
00019 
00020     Arguments:
00021 
00022     srcms:    The template MS.
00023     
00024     outputms: The output MS.
00025 
00026     labelbases: A dictionary of quant: number pairs, where quant is an index to
00027     label, and number is how quickly the label changes with the index.  quant
00028     should be one of 'SCAN_NUMBER', 'DATA_DESC_ID', 'ANTENNA1', 'ANTENNA2',
00029     'ARRAY_ID', 'FEED1', 'FEED2', 'FIELD_ID', 'OBSERVATION_ID', 'PROCESSOR_ID',
00030     'STATE_ID', 'time', 'time_centroid', 'chan(nel)', or 'pol(whatever)' (case
00031     insensitive), but you will be let off with a warning if it isn't.
00032     TIME and TIME_CENTROID tend to hover around 4.7e9, so they are offset and
00033     scaled by subtracting the first value and dividing by the first integration
00034     interval.  Example labelbases: labelbases = {'channel': 1.0, 'antenna1':
00035     complex(0, 1)} The data column in the output will be complex(channel index,
00036     antenna1).
00037 
00038         labelbases = {'SCAN_NUMBER': 1.0,
00039                       'FIELD_ID':    0.1,
00040                       'DATA_DESC_ID': complex(0, 1)}
00041         The data column in the output will go like complex(scan.field, spw) as
00042         long as field < 10.
00043 
00044     ow: Whether or not outputms can be overwritten if it exists.
00045 
00046     debug: If True and it hits an error, it will try to return what it has so
00047            far instead of raising an exception.
00048 
00049     whichdatacol: Which of DATA, MODEL_DATA, or CORRECTED_DATA to modify in
00050                   the output.  Case insensitive.
00051 
00052     Returns True or False as a success guess.
00053     """
00054     # Make sure we have tb, casalog, clearcal, and ms.
00055     have_tools = False
00056     try:
00057         # Members that they likely have, but imposters (if any) do not.
00058         have_tools = hasattr(tb, 'colnames') and hasattr(ms, 'selectchannel')
00059         have_tools &= hasattr(casalog, 'post') and hasattr(clearcal,
00060                                                            'check_params')
00061     except NameError:
00062         pass  # through to next clause...
00063     if not have_tools:
00064         try:
00065             from taskutil import get_global_namespace
00066             my_globals = get_global_namespace()
00067             tb = my_globals['tb']
00068             ms = my_globals['ms']
00069             casalog = my_globals['casalog']
00070             clearcal = my_globals['clearcal']
00071         except NameError:
00072             print "Could not find tb and ms.  my_globals =",
00073             print "\n\t".join(my_globals.keys())
00074 
00075     casalog.origin("make_labelled_ms")
00076 
00077     whichdatacol = whichdatacol.upper()
00078     if whichdatacol not in ['DATA', 'MODEL_DATA', 'CORRECTED_DATA']:
00079         casalog.post(whichdatacol + "is not one of DATA, MODEL_DATA, or CORRECTED_DATA.",
00080                      'EXCEPTION')
00081 
00082     try:
00083         if outputms != srcms:
00084             if os.path.isdir(outputms):
00085                 if ow:
00086                     shutil.rmtree(outputms)
00087                 else:
00088                     print "Use ow=True if you really want to overwrite", outputms
00089                     return False
00090             shutil.copytree(srcms, outputms)
00091     except Exception, instance:
00092         casalog.post("*** Error %s copying %s to %s." % (instance,
00093                                                          srcms, outputms),
00094                      'SEVERE')
00095         return
00096 
00097     make_writable_recursively(outputms)
00098         
00099     tb.open(outputms, nomodify=False)
00100 
00101     if whichdatacol not in tb.colnames():
00102         casalog.post("Adding scratch columns to " + outputms, 'INFO')
00103         tb.close()
00104         clearcal(outputms)
00105         tb.open(outputms, nomodify=False)
00106 
00107     # Setup rowcols, polbase, and chanbase
00108     polbase = 0.0
00109     chanbase = 0.0
00110     rowcols = {}
00111     for quant in labelbases:
00112         try:
00113             if quant.upper() in ['SCAN_NUMBER', 'DATA_DESC_ID',
00114                                  'ANTENNA1', 'ANTENNA2', 'ARRAY_ID',
00115                                  'FEED1', 'FEED2', 'FIELD_ID',
00116                                  'OBSERVATION_ID', 'PROCESSOR_ID',
00117                                  'STATE_ID']:
00118                 rowcols[quant] = tb.getcol(quant.upper())
00119             elif quant[:4].upper() == 'TIME':
00120                 rowcols[quant] = tb.getcol(quant.upper())
00121                 print quant, "is a timelike quantity, so it will be offset and scaled"
00122                 print "by subtracting the first value and dividing by the first integration"
00123                 print "interval."
00124                 rowcols[quant] -= rowcols[quant][0]
00125                 rowcols[quant] /= tb.getcell('INTERVAL')
00126             elif quant[:3].upper() == 'POL':
00127                 polbase = labelbases[quant]
00128             elif quant[:4].upper() == 'CHAN':
00129                 chanbase = labelbases[quant]
00130             else:
00131                 casalog.post("Do not know how to label %s." % quant, 'WARN')
00132         except Exception, e:
00133             print "Error getting", quant
00134             if debug:
00135                 return rowcols
00136             else:
00137                 raise e
00138 
00139     dat = numpy.array(tb.getcol('DATA'))
00140     for rowind in xrange(dat.shape[2]):
00141         rowlabel = 0
00142         for q in rowcols:
00143             rowlabel += rowcols[q][rowind] * labelbases[q]
00144                                         
00145         for polind in xrange(dat.shape[0]):
00146             pollabel = rowlabel + polbase * polind
00147                                 
00148             for chanind in xrange(dat.shape[1]):
00149                 label = pollabel + chanind * chanbase
00150                 dat[polind, chanind, rowind] = label
00151     tb.putcol(whichdatacol, dat.tolist())
00152     tb.close()
00153 
00154     try:
00155         addendum = srcms + " labelled by labelbases = {\n"
00156         qbs = []
00157         for q, b in labelbases.items():
00158             qb = "\t%16s: " % ("'" + q + "'")
00159             if type(b) == complex:
00160                 if b.real != 0.0:
00161                     qb += "%.1g" % b.real
00162                 if b.imag != 0.0:
00163                     qb += "%+.1gi" % b.imag
00164             else:
00165                 qb += "%.1g" % b
00166             qbs.append(qb)
00167         addendum += ",\n".join(qbs)
00168         addendum += "\n}"
00169         ms.open(outputms, nomodify=False)
00170         # The parms parameter is a false lead - ms.listhistory and listhistory
00171         # don't show it in the logger.
00172         ms.writehistory(addendum, origin="make_labelled_ms")
00173         ms.close()
00174     except Exception, instance:
00175         casalog.post("*** Error %s updating %s's history." % (instance,
00176                                                               outputms),
00177                      'SEVERE')
00178     return True
00179 
00180 def label_itered_ms(msname, labelbases, debug=False, datacol='DATA',
00181                     incremental=False):
00182     """
00183     Set datacol according to labelbases.  Like make_labelled_ms() except it
00184     modifies in place using ms iteration to handle MSes that are large and/or
00185     have multiple data shapes.
00186     
00187     Arguments:
00188 
00189     msname:   The MS to be modified.
00190 
00191     labelbases: A dictionary of quant: number or quant: func pairs, where quant
00192     is an index to label, and number is how quickly the label changes with the
00193     index.  func should be a function that takes a data chunk as a dictionary
00194     (as returned by ms.getdata), and returns either a scalar or a numpy array
00195     with the same shape as the chunk's datacol.  quant should be one of
00196     'SCAN_NUMBER', 'DATA_DESC_ID', 'ANTENNA1', 'ANTENNA2', 'ARRAY_ID', 'FEED1',
00197     'FEED2', 'FIELD_ID', 'OBSERVATION_ID', 'PROCESSOR_ID', 'STATE_ID', 'time',
00198     'time_centroid', 'chan(nel)', or 'pol(whatever)' (case insensitive), but
00199     you will be let off with a warning if it isn't.  TIME and TIME_CENTROID
00200     tend to hover around 4.7e9, so they are offset and scaled by subtracting
00201     the first value and dividing by the first integration interval.  Example
00202     labelbases: labelbases = {'channel': 1.0, 'antenna1': complex(0, 1)} The
00203     data column in the output will be complex(channel index, antenna1).
00204 
00205         labelbases = {'SCAN_NUMBER': 1.0,
00206                       'FIELD_ID':    0.1,
00207                       'DATA_DESC_ID': complex(0, 1)}
00208         The data column in the output will go like complex(scan.field, spw) as
00209         long as field < 10.
00210 
00211     debug: If True and it hits an error, it will try to return what it has so
00212            far instead of raising an exception.
00213 
00214     datacol: Which of DATA, MODEL_DATA, or CORRECTED_DATA to modify in the output.
00215              Case insensitive.
00216 
00217     incremental: If True, the visibilities of the MS are added to.
00218                  Otherwise (default), they are replaced.
00219 
00220     Returns True or False as a success guess.
00221     """
00222     # Make sure we have tb, casalog, clearcal, and ms.
00223     have_tools = False
00224     try:
00225         # Members that they likely have, but imposters (if any) do not.
00226         have_tools = hasattr(tb, 'colnames') and hasattr(ms, 'selectchannel')
00227         have_tools &= hasattr(casalog, 'post') and hasattr(clearcal, 'check_params')
00228     except NameError:
00229         pass  # through to next clause...
00230     if not have_tools:
00231         try:
00232             my_globals = get_global_namespace()
00233             tb = my_globals['tb']
00234             ms = my_globals['ms']
00235             casalog = my_globals['casalog']
00236             clearcal = my_globals['clearcal']
00237         except NameError:
00238             print "Could not find tb and ms.  my_globals =",
00239             print "\n\t".join(my_globals.keys())
00240 
00241     casalog.origin("label_itered_ms")
00242 
00243     datacol = datacol.upper()
00244     if datacol not in ['DATA', 'MODEL_DATA', 'CORRECTED_DATA']:
00245         casalog.post(datacol + "is not one of DATA, MODEL_DATA, or CORRECTED_DATA.",
00246                      'EXCEPTION')
00247 
00248     make_writable_recursively(msname)
00249         
00250     tb.open(msname)
00251     if datacol not in tb.colnames():
00252         casalog.post("Adding scratch columns to " + msname, 'INFO')
00253         tb.close()
00254         clearcal(msname)
00255     else:
00256         tb.close()
00257 
00258     # Setup cols_to_get, polbase, and chanbase
00259     polbase = 0.0
00260     chanbase = 0.0
00261     cols_to_get = [datacol]
00262     for c in labelbases:
00263         # ms.getdata doesn't recognize OBSERVATION_ID, PROCESSOR_ID, or STATE_ID.
00264         if c.upper() in ['SCAN_NUMBER', 'DATA_DESC_ID', 'ANTENNA1', 'ANTENNA2',
00265                          'ARRAY_ID', 'FEED1', 'FEED2', 'FIELD_ID']:
00266             cols_to_get.append(c)
00267         elif c.upper() == 'TIME':
00268             cols_to_get.append(c)
00269             #cols_to_get.append('INTERVAL')
00270         elif c[:3].upper() == 'POL':
00271             polbase = labelbases[c]
00272         elif c[:4].upper() == 'CHAN':
00273             chanbase = labelbases[c]
00274         else:
00275             casalog.post("Do not know how to label by %s." % c, 'WARN')
00276 
00277     # Sigh - it seems to be necessary to get the list of DDIDs before
00278     # using ms.iter*, i.e. ms.iter* can't actually iter over > 1 DDID?
00279     # I wonder what happens if SPECTRAL_WINDOW has, as it often does,
00280     # more spws than are used in the main table.
00281     tb.open(msname + '/SPECTRAL_WINDOW')
00282     nddid = tb.nrows()
00283     tb.close()
00284 
00285     ms.open(msname, nomodify=False)
00286     for ddid in xrange(nddid):
00287         ms.selectinit(datadescid=ddid)
00288         ms.iterinit(maxrows=2000)
00289         ms.iterorigin()
00290         datacol = datacol.lower()
00291         ismore = True
00292         while ismore:
00293             chunk = ms.getdata(cols_to_get)
00294             if chunk.has_key('time'):
00295                 chunk['time'] -= chunk['time'][0]
00296                 #chunk['time'] /= chunk['interval'][0]
00297 
00298             if not incremental:
00299                 chunk[datacol][:,:,:] = 0.0
00300             for q in labelbases:
00301                 # Relies on all the columns that are both in labelbases and chunk
00302                 # being scalar.
00303                 if chunk.has_key(q.lower()):
00304                     if hasattr(labelbases[q], '__call__'):
00305                         data = labelbases[q](chunk)
00306                         if hasattr(data, 'shape'):
00307                             chunk[datacol] += data
00308                         else:
00309                             chunk[datacol][:,:,:] += data
00310                     else:
00311                         chunk[datacol][:,:,:] += chunk[q.lower()] * labelbases[q]
00312             datshape = chunk[datacol].shape
00313             if polbase:
00314                 if hasattr(polbase, '__call__'):
00315                     chunk[datacol] += polbase(chunk)
00316                 else:
00317                     for polind in xrange(datshape[0]):
00318                         chunk[datacol][polind, :, :] += polbase * polind
00319             if chanbase:
00320                 if hasattr(chanbase, '__call__'):
00321                     chunk[datacol] += chanbase(chunk)
00322                 else:
00323                     for cind in xrange(datshape[1]):
00324                         chunk[datacol][:, cind, :] += chanbase * cind
00325             ms.putdata({datacol: chunk[datacol]})
00326             ismore = ms.iternext()
00327 
00328     try:
00329         addendum = msname + " "
00330         if incremental:
00331             addendum += "added to"
00332         else:
00333             addendum += "labelled"
00334         addendum += " by labelbases = {\n"
00335         qbs = []
00336         for q, b in labelbases.items():
00337             qb = "\t%16s: " % ("'" + q + "'")
00338             if hasattr(b, '__call__'):
00339                 qb += b.__name__ + ' (function)'
00340             elif type(b) == complex:
00341                 if b.real != 0.0:
00342                     qb += "%.1g" % b.real
00343                 if b.imag != 0.0:
00344                     qb += "%+.1gi" % b.imag
00345             else:
00346                 qb += "%.1g" % b
00347             qbs.append(qb)
00348         addendum += ",\n".join(qbs)
00349         addendum += "\n}"
00350         # The parms parameter is a false lead - ms.listhistory and listhistory
00351         # don't show it in the logger.
00352         ms.writehistory(addendum, origin="label_itered_ms")
00353     except Exception, instance:
00354         casalog.post("*** Error updating %s's history: %s" % (msname,
00355                                                               instance),
00356                      'SEVERE')
00357     finally:
00358         ms.close()
00359         casalog.origin('')
00360         
00361     return True
00362 
00363 def make_writable_recursively(dir):
00364     """
00365     Unfortunately neither os nor shutil make operating on permissions as easy
00366     as it should be.
00367     """
00368     def walkable_chmod(mode, dirname, fnames):
00369         "Thanks to Fabian Steiner on the python list."
00370         dmode = os.lstat(dirname)[stat.ST_MODE]
00371         os.chmod(dirname, dmode | stat.S_IWUSR)
00372         for fname in fnames:
00373             dfname = os.path.join(dirname, fname)
00374             fmode = os.lstat(dfname)[stat.ST_MODE]
00375             os.chmod(dfname, fmode | stat.S_IWUSR)
00376 
00377     os.path.walk(dir, walkable_chmod, None)