00001
00002
00003
00004
00005
00006
00007
00008 from glob import glob
00009 import locale
00010 import os
00011
00012 def lsms(musthave=[], mspat="*[-_.][Mm][Ss]", combine='or', remind=True,
00013 sortfirst=False):
00014 """
00015 Summarize measurement sets matching certain criteria.
00016
00017 Arguments:
00018
00019 musthave: A list of columns, subtables, or keywords that must be in the MS.
00020 If [] (default), the list of optional columns, subtables, and
00021 keywords of each MS will be printed. Any entries will be
00022 internally uppercased.
00023
00024 mspat: A filename pattern, relative to the current directory, that the
00025 directory names matching of the MSes must match.
00026 Default: '*[-_.][Mm][Ss]'
00027 Tip: Try '**/*.ms' to find *.ms in . and all its subdirectories.
00028
00029 combine : Controls whether the conditions of musthave are combined with
00030 'or' (default) or 'and'.
00031
00032 remind: If True (default), print all columns and keywords of optional
00033 subtables, not just the optional ones.
00034
00035 sortfirst: If sortfirst=True, print the matching MSes in alphabetical order.
00036 Otherwise, print each one as soon as it is found.
00037
00038 Note that to fit in better with *sh behavior the argument order is reversed
00039 when calling from a non-python shell. i.e. if you enter
00040 lsms \*_MS source polarization
00041 in a *sh session, it will run with
00042 mspat='*_MS' and musthave=['SOURCE', 'POLARIZATION'].
00043 (remember to quote wildcards to avoid sh expansion)
00044 """
00045 if type(musthave) == str:
00046 musthave = [s.replace(',', '') for s in musthave.split()]
00047
00048 listall = True
00049 if musthave:
00050 listall = False
00051
00052 msdict, use_tb = matchingMSes(musthave, mspat, combine, remind,
00053 not sortfirst, not sortfirst)
00054
00055 if sortfirst:
00056 mses = msdict.keys()
00057
00058
00059
00060
00061 locale.setlocale(locale.LC_ALL, '')
00062 mses.sort(locale.strcoll)
00063
00064
00065
00066 for currms in mses:
00067 print_ms(currms, msdict[currms], listall, use_tb, remind)
00068
00069
00070 def print_ms(currms, msdict, listall=False, use_tb=False, remind=True):
00071 """
00072 Prints the blurb in msdict, which is nominally about currms.
00073 """
00074 currmsstr = ''
00075 if listall:
00076 notindefn = []
00077 subtabs = msdict.keys()
00078 subtabs.sort()
00079 for st in subtabs:
00080 ststr = ''
00081 if use_tb:
00082 if st in mstables['req']:
00083 optcols = set(msdict[st]['cols']).difference(mstables['req'][st]['req']['cols'])
00084 if optcols:
00085 ststr = " Optional column"
00086 ststr += string_from_list_or_set(optcols)
00087
00088 optkws = msdict[st]['kws'].difference(mstables['req'][st]['req']['kws'])
00089 if optkws:
00090 ststr += " Optional keyword"
00091 ststr += string_from_list_or_set(optkws)
00092
00093 elif st in mstables['opt']:
00094 reqcols = mstables['opt'][st]['req']['cols']
00095 if remind and reqcols:
00096 ststr = ' Required column'
00097 ststr += string_from_list_or_set(reqcols)
00098
00099 optcols = set(msdict[st]['cols']).difference(reqcols)
00100 if optcols:
00101 ststr += " Optional column"
00102 ststr += string_from_list_or_set(optcols)
00103
00104 reqkws = mstables['opt'][st]['req']['kws']
00105 if remind and reqkws:
00106 ststr = ' Required keyword'
00107 ststr += string_from_list_or_set(reqkws)
00108
00109 optkws = msdict[st]['kws']
00110 optkws.difference_update(reqkws)
00111 if optkws:
00112 ststr += " Optional keyword"
00113 ststr += string_from_list_or_set(optkws)
00114
00115 if not ststr:
00116 currmsstr += " " + st + "\n"
00117 else:
00118 notindefn.append(st)
00119 elif st not in mstables['req']:
00120 notindefn.append(st)
00121
00122 if ststr:
00123 currmsstr += " " + st + ":\n" + ststr
00124
00125 if notindefn:
00126 notindefn.sort()
00127 if use_tb:
00128 currmsstr += " Not in MS def'n V. 2.0: "
00129 currmsstr += ', '.join(notindefn) + "\n"
00130
00131 if currmsstr:
00132 if use_tb:
00133 print currms + ":\n" + currmsstr
00134 else:
00135 print currms + ": " + currmsstr.strip()
00136 else:
00137 print currms
00138
00139
00140 def string_from_list_or_set(li):
00141 retstr = ''
00142 if len(li) > 1:
00143 retstr += 's'
00144 li = list(li)
00145 li.sort()
00146 return retstr + ': ' + ', '.join(li) + "\n"
00147
00148
00149 mstables = {
00150 'req': {'MAIN': {'req': {'cols': set(['TIME',
00151 'ANTENNA1',
00152 'ANTENNA2',
00153 'FEED1',
00154 'FEED2',
00155 'DATA_DESC_ID',
00156 'PROCESSOR_ID',
00157 'FIELD_ID',
00158 'INTERVAL',
00159 'EXPOSURE',
00160 'TIME_CENTROID',
00161 'SCAN_NUMBER',
00162 'ARRAY_ID',
00163 'OBSERVATION_ID',
00164 'STATE_ID',
00165 'UVW',
00166 'SIGMA',
00167 'WEIGHT',
00168 'FLAG',
00169 'FLAG_CATEGORY',
00170 'FLAG_ROW']),
00171 'kws': set(['MS_VERSION'])},
00172 'opt': {'cols': set(['TIME_EXTRA_PREC',
00173 'ANTENNA3',
00174 'FEED3',
00175 'PHASE_ID',
00176 'PULSAR_BIN',
00177 'PULSAR_GATE_ID',
00178 'BASELINE_REF',
00179 'UVW2',
00180 'DATA',
00181 'MODEL_DATA',
00182 'CORRECTED_DATA',
00183 'FLOAT_DATA',
00184 'VIDEO_POINT',
00185 'LAG_DATA',
00186 'SIGMA_SPECTRUM',
00187 'WEIGHT_SPECTRUM']),
00188 'kws': set(['SORT_COLUMNS',
00189 'SORT_ORDER'])}},
00190 'ANTENNA': {'req': {'cols': set(['NAME',
00191 'STATION',
00192 'TYPE',
00193 'MOUNT',
00194 'POSITION',
00195 'OFFSET',
00196 'DISH_DIAMETER',
00197 'FLAG_ROW']),
00198 'kws': set([])},
00199 'opt': {'cols': set(['ORBIT_ID',
00200 'MEAN_ORBIT',
00201 'PHASED_ARRAY_ID']),
00202 'kws': set([])}},
00203 'DATA_DESCRIPTION': {'req': {'cols': set(['SPECTRAL_WINDOW_ID',
00204 'POLARIZATION_ID',
00205 'FLAG_ROW']),
00206 'kws': set([])},
00207 'opt': {'cols': set(['LAG_ID']),
00208 'kws': set([])}},
00209 'FEED': {'req': {'cols': set(['ANTENNA_ID',
00210 'FEED_ID',
00211 'SPECTRAL_WINDOW_ID',
00212 'TIME',
00213 'INTERVAL',
00214 'NUM_RECEPTORS',
00215 'BEAM_ID',
00216 'BEAM_OFFSET',
00217 'POLARIZATION_TYPE',
00218 'POL_RESPONSE',
00219 'POSITION',
00220 'RECEPTOR_ANGLE']),
00221 'kws': set([])},
00222 'opt': {'cols': set(['FOCUS_LENGTH',
00223 'PHASED_FEED_ID']),
00224 'kws': set([])}},
00225 'FIELD': {'req': {'cols': set(['NAME',
00226 'CODE',
00227 'TIME',
00228 'NUM_POLY',
00229 'DELAY_DIR',
00230 'PHASE_DIR',
00231 'REFERENCE_DIR',
00232 'SOURCE_ID',
00233 'FLAG_ROW']),
00234 'kws': set([])},
00235 'opt': {'cols': set(['EPHEMERIS_ID']),
00236 'kws': set([])}},
00237 'FLAG_CMD': {'req': {'cols': set(['TIME',
00238 'INTERVAL',
00239 'TYPE',
00240 'REASON',
00241 'LEVEL',
00242 'SEVERITY',
00243 'APPLIED',
00244 'COMMAND']),
00245 'kws': set([])},
00246 'opt': {'cols': set([]),
00247 'kws': set([])}},
00248 'HISTORY': {'req': {'cols': set(['TIME',
00249 'OBSERVATION_ID',
00250 'MESSAGE',
00251 'PRIORITY',
00252 'ORIGIN',
00253 'OBJECT_ID',
00254 'APPLICATION',
00255 'CLI_COMMAND',
00256 'APP_PARAMS']),
00257 'kws': set([])},
00258 'opt': {'cols': set([]),
00259 'kws': set([])}},
00260 'OBSERVATION': {'req': {'cols': set(['TELESCOPE_NAME',
00261 'TIME_RANGE',
00262 'OBSERVER',
00263 'LOG',
00264 'SCHEDULE_TYPE',
00265 'SCHEDULE',
00266 'PROJECT',
00267 'RELEASE_DATE',
00268 'FLAG_ROW']),
00269 'kws': set([])},
00270 'opt': {'cols': set([]),
00271 'kws': set([])}},
00272 'POINTING': {'req': {'cols': set(['ANTENNA_ID',
00273 'TIME',
00274 'INTERVAL',
00275 'NAME',
00276 'NUM_POLY',
00277 'TIME_ORIGIN',
00278 'DIRECTION',
00279 'TARGET',
00280 'TRACKING']),
00281 'kws': set([])},
00282 'opt': {'cols': set(['POINTING_OFFSET',
00283 'SOURCE_OFFSET',
00284 'ENCODER',
00285 'POINTING_MODEL_ID',
00286 'ON_SOURCE',
00287 'OVER_THE_TOP']),
00288 'kws': set([])}},
00289 'POLARIZATION': {'req': {'cols': set(['NUM_CORR',
00290 'CORR_TYPE',
00291 'CORR_PRODUCT',
00292 'FLAG_ROW']),
00293 'kws': set([])},
00294 'opt': {'cols': set([]),
00295 'kws': set([])}},
00296 'PROCESSOR': {'req': {'cols': set(['TYPE',
00297 'SUB_TYPE',
00298 'TYPE_ID',
00299 'MODE_ID',
00300 'FLAG_ROW']),
00301 'kws': set([])},
00302 'opt': {'cols': set(['PASS_ID']),
00303 'kws': set([])}},
00304 'SPECTRAL_WINDOW': {'req': {'cols': set(['NUM_CHAN',
00305 'NAME',
00306 'REF_FREQUENCY',
00307 'CHAN_FREQ',
00308 'CHAN_WIDTH',
00309 'MEAS_FREQ_REF',
00310 'EFFECTIVE_BW',
00311 'RESOLUTION',
00312 'TOTAL_BANDWIDTH',
00313 'NET_SIDEBAND',
00314 'IF_CONV_CHAIN',
00315 'FREQ_GROUP',
00316 'FREQ_GROUP_NAME',
00317 'FLAG_ROW']),
00318 'kws': set([])},
00319 'opt': {'cols': set(['BBC_NO',
00320 'BBC_SIDEBAND',
00321 'RECEIVER_ID',
00322 'DOPPLER_ID',
00323 'ASSOC_SPW_ID',
00324 'ASSOC_NATURE']),
00325 'kws': set([])}},
00326 'STATE': {'req': {'cols': set(['SIG',
00327 'REF',
00328 'CAL',
00329 'LOAD',
00330 'SUB_SCAN',
00331 'OBS_MODE',
00332 'FLAG_ROW']),
00333 'kws': set([])},
00334 'opt': {'cols': set([]),
00335 'kws': set([])}}
00336 },
00337 'opt': {'DOPPLER': {'req': {'cols': set(['DOPPLER_ID',
00338 'SOURCE_ID',
00339 'TRANSITION_ID',
00340 'VELDEF']),
00341 'kws': set([])},
00342 'opt': {'cols': set([]),
00343 'kws': set([])}},
00344 'FREQ_OFFSET': {'req': {'cols': set(['ANTENNA1',
00345 'ANTENNA2',
00346 'FEED_ID',
00347 'SPECTRAL_WINDOW_ID',
00348 'TIME',
00349 'INTERVAL',
00350 'OFFSET']),
00351 'kws': set([])},
00352 'opt': {'cols': set([]),
00353 'kws': set([])}},
00354 'SOURCE': {'req': {'cols': set(['SOURCE_ID',
00355 'TIME',
00356 'INTERVAL',
00357 'SPECTRAL_WINDOW_ID',
00358 'NUM_LINES',
00359 'NAME',
00360 'CALIBRATION_GROUP',
00361 'CODE',
00362 'DIRECTION',
00363 'PROPER_MOTION']),
00364 'kws': set([])},
00365 'opt': {'cols': set(['POSITION',
00366 'TRANSITION',
00367 'REST_FREQUENCY',
00368 'SYSVEL',
00369 'SOURCE_MODEL',
00370 'PULSAR_ID']),
00371 'kws': set([])}},
00372 'SYSCAL': {'req': {'cols': set(['ANTENNA_ID',
00373 'FEED_ID',
00374 'SPECTRAL_WINDOW_ID',
00375 'TIME',
00376 'INTERVAL']),
00377 'kws': set([])},
00378 'opt': {'cols': set(['PHASE_DIFF',
00379 'TCAL',
00380 'TRX',
00381 'TSKY',
00382 'TSYS',
00383 'TANT',
00384 'TANT_TSYS',
00385 'TCAL_SPECTRUM',
00386 'TRX_SPECTRUM',
00387 'TSKY_SPECTRUM',
00388 'TSYS_SPECTRUM',
00389 'TANT_SPECTRUM',
00390 'TANT_TSYS_SPECTRUM',
00391 'PHASE_DIFF_FLAG',
00392 'TCAL_FLAG',
00393 'TRX_FLAG',
00394 'TSKY_FLAG',
00395 'TSYS_FLAG',
00396 'TANT_FLAG',
00397 'TANT_TSYS_FLAG']),
00398 'kws': set([])}},
00399 'WEATHER': {'req': {'cols': set(['ANTENNA_ID',
00400 'TIME',
00401 'INTERVAL']),
00402 'kws': set([])},
00403 'opt': {'cols': set(['H2O',
00404 'IONOS_ELECTRON',
00405 'PRESSURE',
00406 'REL_HUMIDITY',
00407 'TEMPERATURE',
00408 'DEW_POINT',
00409 'WIND_DIRECTION',
00410 'WIND_SPEED',
00411 'H2O_FLAG',
00412 'IONOS_ELECTRON_FLAG',
00413 'PRESSURE_FLAG',
00414 'REL_HUMIDITY_FLAG',
00415 'TEMPERATURE_FLAG',
00416 'DEW_POINT_FLAG',
00417 'WIND_DIRECTION_FLAG',
00418 'WIND_SPEED_FLAG']),
00419 'kws': set([])}}
00420 }
00421 }
00422
00423 possible_subtables = set(mstables['req'].keys() + mstables['opt'].keys())
00424
00425
00426 def find_needed_items(musthave=set([]), listall=False):
00427 """
00428 Given the set of "must have" items, fill out needed_subtables and needed_items,
00429 and determine whether or not to use tb.
00430 """
00431
00432
00433
00434 needed_subtables = musthave.intersection(possible_subtables)
00435 needed_items = {'anywhere': set([])}
00436 for mh in musthave:
00437 mhparts = mh.split('/')
00438 if len(mhparts) > 1:
00439 if not needed_items.has_key(mhparts[0]):
00440 needed_items[mhparts[0]] = set([mhparts[1]])
00441 else:
00442 needed_items.add(mhparts[1])
00443 if mhparts[0] != 'MAIN':
00444 needed_subtables.add(mhparts[0])
00445 elif mh not in possible_subtables:
00446 needed_items['anywhere'].add(mh)
00447
00448 use_tb = False
00449 need_tb = musthave.difference(needed_subtables)
00450 mytb = None
00451 if need_tb or listall:
00452 try:
00453 use_tb = hasattr(tb, 'colnames')
00454 mytb = tb
00455 except:
00456 try:
00457 try:
00458 from casac import *
00459 except:
00460 casacpath = glob(os.sep.join(os.environ["CASAPATH"].split() +
00461 ['python', '2.*']))
00462 casacpath.sort()
00463 casacpath.reverse()
00464 casacpath.extend(glob(os.sep.join([os.environ["CASAPATH"].split()[0],
00465 'lib', 'python2.*'])))
00466
00467 import sys
00468 sys.path.extend(casacpath)
00469 import casac
00470
00471
00472
00473
00474 mytb = casac.table()
00475 use_tb = hasattr(mytb, 'colnames')
00476 except:
00477 print "Could not find the tb tool. Try running inside a casapy session or setting PYTHONPATH to /usr/lib/casapy/.../lib/python2.*."
00478 if need_tb and not use_tb:
00479 print "Removing", ', '.join(need_tb), "from the criteria for matching."
00480 musthave.difference_update(need_tb)
00481
00482 return needed_subtables, needed_items, use_tb, mytb
00483
00484
00485
00486 def matchingMSes(musthave=[], mspat="*.ms", combine='or', doprint=False,
00487 freemem=False, remind=True):
00488 """
00489 Returns a dict of MSes that match musthave and mspat as in
00490 lsms(musthave, mspat, combine, sortfirst, remind), and whether or not it
00491 found the tb tool.
00492
00493 If doprint=False a blurb about each ms will be printed as it is found,
00494 using remind as in lsms().
00495
00496 If freemem=True the return dict will NOT be updated. Note that usually
00497 you want freemem == doprint.
00498 """
00499
00500 holderdict = {'musthave': set([s.upper() for s in musthave]),
00501 'mspat': mspat,
00502 'msdict': {},
00503 'use_and': combine.lower() == 'and',
00504 'use_tb': None,
00505 'listall': False,
00506 'doprint': doprint,
00507 'remind': remind}
00508
00509 if not musthave:
00510 holderdict['listall'] = True
00511 holderdict['use_and'] = False
00512
00513 nsit = find_needed_items(holderdict['musthave'], holderdict['listall'])
00514 holderdict['needed_subtables'] = nsit[0]
00515 holderdict['needed_items'] = nsit[1]
00516 holderdict['use_tb'] = nsit[2]
00517 holderdict['mytb'] = nsit[3]
00518
00519 splitatdoubleglob = mspat.split('**/')
00520 if len(splitatdoubleglob) > 1:
00521 if splitatdoubleglob[0] == '':
00522 splitatdoubleglob[0] = '.'
00523 holderdict['mspat'] = splitatdoubleglob[1]
00524 os.path.walk(splitatdoubleglob[0], checkMSes, holderdict)
00525 else:
00526 checkMSes(holderdict, '', [])
00527
00528 return holderdict['msdict'], holderdict['use_tb']
00529
00530
00531 def checkMSes(holderdict, dir, files):
00532 """
00533 Updates holderdict['msdict'] with a list of MSes in dir that match
00534 holderdict['musthave'] and holderdict['mspat'] as in
00535 lsms(musthave, mspat, combine, sortfirst, remind).
00536
00537 If holderdict['doprint']=True a blurb about each ms will be printed as
00538 it is found, using holderdict['remind'] like remind in lsms().
00539
00540 If holderdict['freemem']=True holderdict['msdict'] will NOT be updated.
00541 Note that usually you want holderdict['freemem'] == holderdict['doprint'].
00542 """
00543
00544 mses = glob(os.path.join(dir, holderdict['mspat']))
00545
00546 musthave = holderdict.get('musthave', set([]))
00547 use_and = holderdict.get('use_and', False)
00548 listall = holderdict.get('listall', False)
00549
00550 if holderdict.get('freemem'):
00551 retval = {}
00552 else:
00553 if not holderdict.get('msdict'):
00554 holderdict['msdict'] = {}
00555 retval = holderdict['msdict']
00556
00557 needed_subtables = holderdict.get('needed_subtables', set([]))
00558 needed_items = holderdict.get('needed_items', {})
00559 use_tb = holderdict.get('use_tb', False)
00560
00561 if holderdict.get('mytb'):
00562 tb = holderdict['mytb']
00563
00564 for currms in mses:
00565 if currms[:2] == './':
00566 currms = currms[2:]
00567
00568 retval[currms] = {'MAIN': {}}
00569 keep_currms = listall
00570
00571 subtabs = glob(currms + '/[A-Z]*')
00572 subtabs = set([s.replace(currms + '/', '', 1) for s in subtabs])
00573 for s in subtabs:
00574 retval[currms][s] = {}
00575
00576 if needed_subtables:
00577 if use_and:
00578 keep_currms = needed_subtables.issubset(subtabs)
00579 elif needed_subtables.intersection(subtabs):
00580 keep_currms = True
00581
00582 if use_tb and (keep_currms or listall or (not use_and)):
00583 subtabs_to_check = needed_subtables
00584 if listall or needed_items['anywhere']:
00585 subtabs_to_check = subtabs
00586
00587 if listall or needed_items['anywhere'] or needed_items['MAIN']:
00588
00589 try:
00590 tb.open(currms)
00591 except Exception, e:
00592
00593
00594
00595
00596 if str(e)[-15:] == " does not exist":
00597 print "tb could not open", currms
00598 else:
00599 print "Error", e, "from tb.open(", currms, ")"
00600 break
00601
00602 retval[currms]['MAIN']['cols'] = tb.colnames()
00603 kws = set(tb.keywordnames())
00604 retval[currms]['MAIN']['kws'] = kws.difference(possible_subtables)
00605 tb.close()
00606
00607 if not listall:
00608 mainitems = set(retval[currms]['MAIN']['cols'])
00609 mainitems.update(retval[currms]['MAIN']['kws'])
00610 if use_and:
00611 keep_currms = needed_items['MAIN'].issubset(mainitems)
00612 elif not keep_currms:
00613 my_needed_items = set(needed_items.get('MAIN', []))
00614 my_needed_items.update(needed_items.get('anywhere', []))
00615 if my_needed_items.intersection(mainitems):
00616 keep_currms = True
00617
00618 for st in subtabs_to_check:
00619 stdir = currms + '/' + st
00620 if os.path.isdir(stdir):
00621 tb.open(stdir)
00622 retval[currms][st]['cols'] = tb.colnames()
00623 retval[currms][st]['kws'] = set(tb.keywordnames())
00624 tb.close()
00625 if not listall:
00626 stitems = set(retval[currms][st]['cols'])
00627 stitems.update(retval[currms][st]['kws'])
00628 if use_and:
00629 keep_currms = needed_items[st].issubset(stitems)
00630 if not keep_currms:
00631 break
00632 elif not keep_currms:
00633 my_needed_items = set(needed_items.get(st, []))
00634 my_needed_items.update(needed_items.get('anywhere', []))
00635 if my_needed_items.intersection(stitems):
00636 keep_currms = True
00637 elif st in needed_subtables:
00638 keep_currms = False
00639 break
00640
00641 if not keep_currms:
00642 del retval[currms]
00643 elif holderdict.get('doprint'):
00644 print_ms(currms, retval[currms], listall, use_tb, holderdict['remind'])
00645
00646
00647
00648 def termprops(stream):
00649 """
00650 Return whether or not stream supports colors, and a guess at its number of
00651 columns (in characters).
00652 """
00653 have_colors = False
00654 termwidth = 80
00655 if hasattr(stream, "isatty") and stream.isatty():
00656 try:
00657 import curses
00658 curses.setupterm()
00659 termwidth = curses.tigetnum('cols')
00660 if curses.tigetnum("colors") > 2:
00661 have_colors = True
00662 except:
00663 pass
00664 return have_colors, termwidth
00665
00666
00667
00668
00669
00670 def rowscols(n, nc):
00671 div, mod = divmod(n, nc)
00672 return div + (mod != 0), nc
00673
00674 if __name__ == '__main__':
00675 import sys
00676 mspat = '*.ms'
00677 musthave = []
00678 if len(sys.argv) > 1:
00679 mspat = sys.argv[1]
00680 musthave = sys.argv[2:]
00681 lsms(musthave, mspat)