casa
$Rev:20696$
|
00001 import os 00002 import sys 00003 import shutil 00004 import pprint as pp 00005 import traceback 00006 import time 00007 import commands 00008 from __main__ import * 00009 from taskinit import * 00010 from tasks import * 00011 00012 00013 class convertToMMS(): 00014 def __init__(self,\ 00015 inpdir=None, \ 00016 mmsdir=None, \ 00017 createmslink=False, \ 00018 cleanup=False): 00019 00020 '''Run the partition task to create MMSs from a directory with MSs''' 00021 casalog.origin('convertToMMS') 00022 00023 self.inpdir = inpdir 00024 self.outdir = mmsdir 00025 self. createmslink = createmslink 00026 self.mmsdir = '/tmp/mmsdir' 00027 self.cleanup = cleanup 00028 00029 # Input directory is mandatory 00030 if self.inpdir is None: 00031 casalog.post('You must give an input directory to this script') 00032 self.usage() 00033 return 00034 00035 if not os.path.exists(self.inpdir): 00036 casalog.post('Input directory inpdir does not exist -> '+self.inpdir,'ERROR') 00037 self.usage() 00038 return 00039 00040 if not os.path.isdir(self.inpdir): 00041 casalog.post('Value of inpdir is not a directory -> '+self.inpdir,'ERROR') 00042 self.usage() 00043 return 00044 00045 00046 # Only work with absolute paths 00047 self.inpdir = os.path.abspath(self.inpdir) 00048 casalog.post('Will read input MS from '+self.inpdir) 00049 00050 # Verify output directory 00051 if self.outdir is None: 00052 self.mmsdir = os.path.join(os.getcwd(),'mmsdir') 00053 elif self.outdir == '/': 00054 casalog.post('inpdir is set to root!', 'WARN') 00055 self.mmsdir = os.path.join(os.getcwd(),'mmsdir') 00056 else: 00057 self.outdir = os.path.abspath(self.outdir) 00058 self.mmsdir = self.outdir 00059 00060 # Cleanup output directory 00061 if self.cleanup: 00062 casalog.post('Cleaning up output directory '+self.mmsdir) 00063 if os.path.isdir(self.mmsdir): 00064 shutil.rmtree(self.mmsdir) 00065 00066 if not os.path.exists(self.mmsdir): 00067 os.makedirs(self.mmsdir) 00068 00069 00070 casalog.post('Will save output MMS to '+self.mmsdir) 00071 00072 # Walk through input directory 00073 files = os.walk(self.inpdir,followlinks=True).next() 00074 00075 # Get MS list 00076 mslist = [] 00077 mslist = self.getMSlist(files) 00078 00079 casalog.post('List of MSs in input directory') 00080 pp.pprint(mslist) 00081 00082 # Get non-MS directories and other files 00083 nonmslist = [] 00084 nonmslist = self.getFileslist(files) 00085 00086 casalog.post('List of other files in input directory') 00087 pp.pprint(nonmslist) 00088 00089 00090 # Create an MMS for each MS in list 00091 for ms in mslist: 00092 casalog.post('Will create an MMS for '+ms) 00093 ret = self.runPartition(ms, self.mmsdir, self.createmslink) 00094 if not ret: 00095 sys.exit(2) 00096 00097 # Verify later if this is still needed 00098 time.sleep(10) 00099 00100 casalog.origin('convertToMMS') 00101 casalog.post('--------------- Successfully created MMS') 00102 00103 00104 # Create links to the other files 00105 for file in nonmslist: 00106 bfile = os.path.basename(file) 00107 lfile = os.path.join(self.mmsdir, bfile) 00108 casalog.post('Creating symbolic link to '+bfile) 00109 os.symlink(file, lfile) 00110 00111 00112 00113 00114 def getMSlist(self, files): 00115 '''Get a list of MSs from a directory. 00116 files -> a tuple that is returned by the following call: 00117 files = os.walk(self.inpdir,followlinks=True).next() 00118 00119 It will test if a directory is an MS and will only return 00120 true MSs, that have Type:Measurement Set in table.info. It will skip 00121 directories that start with . and those that do not end with 00122 extension .ms. 00123 ''' 00124 00125 topdir = files[0] 00126 mslist = [] 00127 00128 # Loop through list of directories 00129 for d in files[1]: 00130 # Skip . entries 00131 if d.startswith('.'): 00132 continue 00133 00134 if not d.endswith('.ms'): 00135 continue 00136 00137 # Full path for directory 00138 dir = os.path.join(topdir,d) 00139 00140 # It is probably an MS 00141 if self.isItMS(dir) == 1: 00142 mslist.append(dir) 00143 00144 return mslist 00145 00146 def isItMS(self, dir): 00147 '''Check the type of a directory. 00148 dir --> full path of a directory. 00149 Returns 1 for an MS, 2 for a cal table and 3 for a MMS. 00150 If 0 is returned, it means any other type or an error.''' 00151 00152 ret = 0 00153 00154 # Listing of this directory 00155 ldir = os.listdir(dir) 00156 00157 if not ldir.__contains__('table.info'): 00158 return ret 00159 00160 cmd1 = 'grep Type '+dir+'/table.info' 00161 type = commands.getoutput(cmd1) 00162 cmd2 = 'grep SubType '+dir+'/table.info' 00163 stype = commands.getoutput(cmd2) 00164 00165 # It is a cal table 00166 if type.__contains__('Calibration'): 00167 ret = 2 00168 00169 elif type.__contains__('Measurement'): 00170 # It is a Multi-MS 00171 if stype.__contains__('CONCATENATED'): 00172 # Further check 00173 if ldir.__contains__('SUBMSS'): 00174 ret = 3 00175 # It is an MS 00176 else: 00177 ret = 1 00178 00179 return ret 00180 00181 00182 00183 def getFileslist(self, files): 00184 '''Get a list of non-MS files from a directory. 00185 files -> a tuple that is returned by the following call: 00186 files = os.walk(self.inpdir,followlinks=True).next() 00187 00188 It will return files and directories that are not MSs. It will skip 00189 files that start with . 00190 ''' 00191 00192 topdir = files[0] 00193 fileslist = [] 00194 00195 # Get other directories that are not MSs 00196 for d in files[1]: 00197 00198 # Skip . entries 00199 if d.startswith('.'): 00200 continue 00201 00202 # Skip MS directories 00203 if d.endswith('.ms'): 00204 continue 00205 00206 # Full path for directory 00207 dir = os.path.join(topdir,d) 00208 00209 # It is a Calibration 00210 if self.isItMS(dir) == 2: 00211 fileslist.append(dir) 00212 00213 00214 # Get non-directory files 00215 for f in files[2]: 00216 # Skip . entries 00217 if f.startswith('.'): 00218 continue 00219 00220 # Full path for file 00221 file = os.path.join(topdir, f) 00222 fileslist.append(file) 00223 00224 return fileslist 00225 00226 00227 def runPartition(self, ms, mmsdir, createlink): 00228 '''Run partition with default values to create an MMS. 00229 ms --> full pathname of the MS 00230 mmsdir --> directory to save the MMS to 00231 createlink --> when True, it will create a symbolic link to the 00232 just created MMS in the same directory with extension .ms 00233 ''' 00234 from tasks import partition 00235 00236 if not os.path.lexists(ms): 00237 return False 00238 00239 # Create MMS name 00240 bname = os.path.basename(ms) 00241 if bname.endswith('.ms'): 00242 mmsname = bname.replace('.ms','.mms') 00243 else: 00244 mmsname = bname+'.mms' 00245 00246 mms = os.path.join(self.mmsdir, mmsname) 00247 if os.path.lexists(mms): 00248 casalog.post('Output MMS already exist -->'+mms,'ERROR') 00249 return False 00250 00251 # Check for remainings of corrupted mms 00252 corrupted = mms.replace('.mms','.data') 00253 if os.path.exists(corrupted): 00254 casalog.post('Cleaning up left overs','WARN') 00255 shutil.rmtree(corrupted) 00256 00257 # Run partition 00258 default('partition') 00259 partition(vis=ms, outputvis=mms, createmms=True, datacolumn='all') 00260 casalog.origin('convertToMMS') 00261 00262 # Check if MMS was created 00263 if not os.path.exists(mms): 00264 casalog.post('Cannot create MMS ->'+mms, 'ERROR') 00265 return False 00266 00267 # If requested, create a link to this MMS 00268 if createlink: 00269 here = os.getcwd() 00270 os.chdir(mmsdir) 00271 mmsname = os.path.basename(mms) 00272 lms = mmsname.replace('.mms', '.ms') 00273 casalog.post('Creating symbolic link to MMS') 00274 os.symlink(mmsname, lms) 00275 os.chdir(here) 00276 00277 return True 00278 00279 def usage(self): 00280 print '=========================================================================' 00281 print ' convertToMMS will create a directory with multi-MSs.' 00282 print 'Usage:\n' 00283 print ' import partitionhelper as ph' 00284 print ' ph.convertToMMS(inpdir=\'dir\') \n' 00285 print 'Options:' 00286 print ' inpdir <dir> directory with input MS.' 00287 print ' mmsdir <dir> directory to save output MMS. If not given, it will save ' 00288 print ' the MMS in a directory called mmsdir in the current directory.' 00289 print ' createmslink=False if True it will create a link to the new MMS with extension .ms.' 00290 print ' cleanup=False if True it will remove the output directory before starting.\n' 00291 00292 print ' NOTE: this script will run using the default values of partition. It will try to ' 00293 print ' create an MMS for every MS in the input directory. It will skip non-MS directories ' 00294 print ' such as cal tables. If partition succeeds, the script will create a link to every ' 00295 print ' other directory or file in the output directory. This script might fail if run on ' 00296 print ' single dish MS because the datacolumn needs to be set in partition.\n' 00297 print ' The script will not walk through sub-directories of inpdir. It will also skip ' 00298 print ' files or directories that start with a .' 00299 print '==========================================================================' 00300 return 00301 00302 # 00303 # -------------- HELPER functions for dealing with an MMS -------------- 00304 # 00305 # getMMSScans 'Get the list of scans of an MMS dictionary' 00306 # getScanList 'Get the list of scans of an MS or MMS' 00307 # getScanNrows 'Get the number of rows of a scan in a MS. It will add the 00308 # nrows of all sub-scans.' 00309 # getMMSScanNrows 'Get the number of rows of a scan in an MMS dictionary.' 00310 # getSpwIds 'Get the Spw IDs of a scan.' 00311 # getDiskUsage 'eturn the size in bytes of an MS in disk.' 00312 # 00313 # ---------------------------------------------------------------------- 00314 00315 # NOTE 00316 # There is a bug in ms.getscansummary() that does not give the scans for all 00317 # observation Ids, but only for the last one. See CAS-4409 00318 def getMMSScans(mmsdict): 00319 '''Get the list of scans of an MMS dictionary. 00320 mmsdict --> output dictionary from listpartition(MMS,createdict=true) 00321 Return a list of the scans in this MMS. ''' 00322 00323 tkeys = mmsdict.keys() 00324 scanlist = [] 00325 slist = set(scanlist) 00326 for k in tkeys: 00327 skeys = mmsdict[k]['scanId'].keys() 00328 for k in skeys: 00329 slist.add(k) 00330 00331 return list(slist) 00332 00333 def getScanList(msfile, selection={}): 00334 '''Get the list of scans of an MS or MMS. 00335 msfile --> name of MS or MMS 00336 selection --> dictionary with data selection 00337 00338 Return a list of the scans in this MS/MMS. ''' 00339 00340 msTool=mstool() 00341 msTool.open(msfile) 00342 if isinstance(selection, dict) and selection != {}: 00343 msTool.msselect(items=selection) 00344 00345 scand = msTool.getscansummary() 00346 msTool.close() 00347 00348 scanlist = scand.keys() 00349 00350 return scanlist 00351 00352 00353 def getScanNrows(msfile, myscan, selection={}): 00354 '''Get the number of rows of a scan in a MS. It will add the nrows of all sub-scans. 00355 This will not take into account any selection done on the MS. 00356 msfile --> name of the MS or MMS 00357 myscan --> scan ID (int) 00358 selection --> dictionary with data selection 00359 00360 Return the number of rows in the scan. 00361 00362 To compare with the dictionary returned by listpartition, do the following: 00363 00364 resdict = listpartition('file.mms', createdict=True) 00365 slist = ph.getMMSScans(thisdict) 00366 for s in slist: 00367 mmsN = ph.getMMSScanNrows(thisdict, s) 00368 msN = ph.getScanNrows('referenceMS', s) 00369 assert (mmsN == msN) 00370 ''' 00371 msTool=mstool() 00372 msTool.open(msfile) 00373 if isinstance(selection, dict) and selection != {}: 00374 msTool.msselect(items=selection) 00375 00376 scand = msTool.getscansummary() 00377 msTool.close() 00378 00379 Nrows = 0 00380 if not scand.has_key(str(myscan)): 00381 return Nrows 00382 00383 subscans = scand[str(myscan)] 00384 for ii in subscans.keys(): 00385 Nrows += scand[str(myscan)][ii]['nRow'] 00386 00387 return Nrows 00388 00389 00390 def getMMSScanNrows(thisdict, myscan): 00391 '''Get the number of rows of a scan in an MMS dictionary. 00392 thisdict --> output dictionary from listpartition(MMS,createdict=true) 00393 myscan --> scan ID (int) 00394 Return the number of rows in the given scan. ''' 00395 00396 tkeys = thisdict.keys() 00397 scanrows = 0 00398 for k in tkeys: 00399 if thisdict[k]['scanId'].has_key(myscan): 00400 scanrows += thisdict[k]['scanId'][myscan]['nrows'] 00401 00402 return scanrows 00403 00404 00405 def getSpwIds(msfile, myscan, selection={}): 00406 '''Get the Spw IDs of a scan. 00407 msfile --> name of the MS or MMS 00408 myscan --> scan Id (int) 00409 selection --> dictionary with data selection 00410 00411 Return a list with the Spw IDs. Note that the returned spw IDs are sorted. 00412 00413 ''' 00414 import numpy as np 00415 00416 msTool=mstool() 00417 msTool.open(msfile) 00418 if isinstance(selection, dict) and selection != {}: 00419 msTool.msselect(items=selection) 00420 00421 scand = msTool.getscansummary() 00422 msTool.close() 00423 00424 spwlist = [] 00425 00426 if not scand.has_key(str(myscan)): 00427 return spwlist 00428 00429 subscans = scand[str(myscan)] 00430 aspws = np.array([],dtype=int) 00431 00432 for ii in subscans.keys(): 00433 sscanid = ii 00434 spwids = scand[str(myscan)][sscanid]['SpwIds'] 00435 aspws = np.append(aspws,spwids) 00436 00437 # Sort spws and remove duplicates 00438 aspws.sort() 00439 uniquespws = np.unique(aspws) 00440 00441 # Try to return a list 00442 spwlist = uniquespws.ravel().tolist() 00443 return spwlist 00444 00445 00446 def getMMSSpwIds(thisdict): 00447 '''Get the list of spws from an MMS dictionary. 00448 thisdict --> output dictionary from listpartition(MMS,createdict=true) 00449 Return a list of the spw Ids in the dictionary. ''' 00450 00451 import numpy as np 00452 00453 tkeys = thisdict.keys() 00454 00455 aspws = np.array([],dtype='int32') 00456 for k in tkeys: 00457 scanlist = thisdict[k]['scanId'].keys() 00458 for s in scanlist: 00459 spwids = thisdict[k]['scanId'][s]['spwIds'] 00460 aspws = np.append(aspws, spwids) 00461 00462 # Sort spws and remove duplicates 00463 aspws.sort() 00464 uniquespws = np.unique(aspws) 00465 00466 # Try to return a list 00467 spwlist = uniquespws.ravel().tolist() 00468 00469 return spwlist 00470 00471 00472 def getDiskUsage(msfile): 00473 '''Return the size in bytes of an MS or MMS in disk. 00474 msfile --> name of the MS 00475 This function will return a value given by 00476 the command du -hs''' 00477 00478 from subprocess import Popen, PIPE, STDOUT 00479 00480 # Command line to run 00481 ducmd = 'du -hs '+msfile 00482 00483 p = Popen(ducmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) 00484 00485 sizeline = p.stdout.read() 00486 00487 # Create a list of the output string, which looks like this: 00488 # ' 75M\tuidScan23.data/uidScan23.0000.ms\n' 00489 # This will create a list with [size,sub-ms] 00490 mssize = sizeline.split() 00491 00492 return mssize[0] 00493 00494 00495 def getSubtables(vis): 00496 tbTool = tbtool() 00497 theSubTables = [] 00498 tbTool.open(vis) 00499 myKeyw = tbTool.getkeywords() 00500 tbTool.close() 00501 for k in myKeyw.keys(): 00502 theKeyw = myKeyw[k] 00503 if (type(theKeyw)==str and theKeyw.split(' ')[0]=='Table:' 00504 and not k=='SORTED_TABLE'): 00505 theSubTables.append(os.path.basename(theKeyw.split(' ')[1])) 00506 00507 return theSubTables 00508 00509 00510 def makeMMS(outputvis, submslist, copysubtables=False, omitsubtables=[]): 00511 ''' 00512 Create an MMS named outputvis from the submss in list submslist. 00513 The subtables in omitsubtables are linked instead of copied. 00514 ''' 00515 00516 if os.path.exists(outputvis): 00517 raise ValueError, "Output MS already exists" 00518 00519 if len(submslist)==0: 00520 raise ValueError, "No SubMSs given" 00521 00522 ## make an MMS with all sub-MSs contained in a SUBMSS subdirectory 00523 origpath = os.getcwd() 00524 mymstool = mstool() 00525 mytbtool = tbtool() 00526 try: 00527 try: 00528 mymstool.createmultims(outputvis, 00529 submslist, 00530 [], 00531 True, # nomodify 00532 False, # lock 00533 copysubtables, 00534 omitsubtables) # when copying the subtables, omit these 00535 except: 00536 mymstool.close() 00537 raise 00538 mymstool.close() 00539 00540 00541 # remove the SORTED_TABLE keywords because the sorting is not reliable after partitioning 00542 try: 00543 mytbtool.open(outputvis, nomodify=False) 00544 if 'SORTED_TABLE' in mytbtool.keywordnames(): 00545 mytbtool.removekeyword('SORTED_TABLE') 00546 mytbtool.close() 00547 for thesubms in submslist: 00548 mytbtool.open(outputvis+'/SUBMSS/'+os.path.basename(thesubms), nomodify=False) 00549 if 'SORTED_TABLE' in mytbtool.keywordnames(): 00550 tobedel = mytbtool.getkeyword('SORTED_TABLE').split(' ')[1] 00551 mytbtool.removekeyword('SORTED_TABLE') 00552 os.system('rm -rf '+tobedel) 00553 mytbtool.close() 00554 except: 00555 mytbtool.close() 00556 raise 00557 00558 # finally create symbolic links to the subtables of the first SubMS 00559 os.chdir(origpath) 00560 os.chdir(outputvis) 00561 mastersubms = os.path.basename(submslist[0].rstrip('/')) 00562 thesubtables = getSubtables('SUBMSS/'+mastersubms) 00563 for s in thesubtables: 00564 os.symlink('SUBMSS/'+mastersubms+'/'+s, s) 00565 00566 # AND put links for those subtables omitted 00567 os.chdir('SUBMSS/'+mastersubms) 00568 for i in xrange(1,len(submslist)): 00569 thesubms = os.path.basename(submslist[i].rstrip('/')) 00570 os.chdir('../'+thesubms) 00571 for s in omitsubtables: 00572 os.system('rm -rf '+s) # shutil does not work in the general case 00573 os.symlink('../'+mastersubms+'/'+s, s) 00574 00575 except: 00576 theproblem = str(sys.exc_info()) 00577 os.chdir(origpath) 00578 raise ValueError, "Problem in MMS creation: "+theproblem 00579 00580 os.chdir(origpath) 00581 00582 return True 00583 00584 00585