casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables
partitionhelper.py
Go to the documentation of this file.
00001 import os
00002 import sys
00003 import shutil
00004 import pprint as pp
00005 import traceback
00006 import time
00007 import commands
00008 from __main__ import *
00009 from taskinit import *
00010 from tasks import *
00011 
00012 
00013 class convertToMMS():
00014     def __init__(self,\
00015                  inpdir=None, \
00016                  mmsdir=None, \
00017                  createmslink=False, \
00018                  cleanup=False):
00019 
00020         '''Run the partition task to create MMSs from a directory with MSs'''
00021         casalog.origin('convertToMMS')
00022         
00023         self.inpdir = inpdir
00024         self.outdir = mmsdir
00025         self. createmslink = createmslink
00026         self.mmsdir = '/tmp/mmsdir'
00027         self.cleanup = cleanup        
00028         
00029         # Input directory is mandatory
00030         if self.inpdir is None:
00031             casalog.post('You must give an input directory to this script') 
00032             self.usage()
00033             return
00034             
00035         if not os.path.exists(self.inpdir):
00036             casalog.post('Input directory inpdir does not exist -> '+self.inpdir,'ERROR') 
00037             self.usage()
00038             return
00039         
00040         if not os.path.isdir(self.inpdir):                            
00041             casalog.post('Value of inpdir is not a directory -> '+self.inpdir,'ERROR') 
00042             self.usage()
00043             return
00044 
00045 
00046         # Only work with absolute paths
00047         self.inpdir = os.path.abspath(self.inpdir)
00048         casalog.post('Will read input MS from '+self.inpdir)
00049 
00050         # Verify output directory
00051         if self.outdir is None:
00052             self.mmsdir = os.path.join(os.getcwd(),'mmsdir')
00053         elif self.outdir == '/':
00054             casalog.post('inpdir is set to root!', 'WARN')
00055             self.mmsdir = os.path.join(os.getcwd(),'mmsdir')
00056         else:
00057             self.outdir = os.path.abspath(self.outdir)
00058             self.mmsdir = self.outdir
00059 
00060         # Cleanup output directory
00061         if self.cleanup:
00062             casalog.post('Cleaning up output directory '+self.mmsdir)
00063             if os.path.isdir(self.mmsdir):
00064                 shutil.rmtree(self.mmsdir)
00065         
00066         if not os.path.exists(self.mmsdir):
00067             os.makedirs(self.mmsdir)
00068             
00069         
00070         casalog.post('Will save output MMS to '+self.mmsdir)
00071 
00072         # Walk through input directory
00073         files = os.walk(self.inpdir,followlinks=True).next()
00074 
00075         # Get MS list
00076         mslist = []
00077         mslist = self.getMSlist(files)
00078                         
00079         casalog.post('List of MSs in input directory')
00080         pp.pprint(mslist)        
00081         
00082         # Get non-MS directories and other files
00083         nonmslist = []
00084         nonmslist = self.getFileslist(files)
00085 
00086         casalog.post('List of other files in input directory')
00087         pp.pprint(nonmslist)
00088                     
00089     
00090         # Create an MMS for each MS in list
00091         for ms in mslist:
00092             casalog.post('Will create an MMS for '+ms)
00093             ret = self.runPartition(ms, self.mmsdir, self.createmslink)
00094             if not ret:
00095                 sys.exit(2)
00096             
00097             # Verify later if this is still needed
00098             time.sleep(10)
00099         
00100             casalog.origin('convertToMMS')
00101             casalog.post('--------------- Successfully created MMS')
00102                     
00103                 
00104         # Create links to the other files
00105         for file in nonmslist:
00106             bfile = os.path.basename(file)
00107             lfile = os.path.join(self.mmsdir, bfile)
00108             casalog.post('Creating symbolic link to '+bfile)
00109             os.symlink(file, lfile)
00110             
00111             
00112 
00113 
00114     def getMSlist(self, files):
00115         '''Get a list of MSs from a directory.
00116            files -> a tuple that is returned by the following call:
00117            files = os.walk(self.inpdir,followlinks=True).next() 
00118            
00119            It will test if a directory is an MS and will only return
00120            true MSs, that have Type:Measurement Set in table.info. It will skip
00121            directories that start with . and those that do not end with
00122            extension .ms.
00123            '''
00124         
00125         topdir = files[0]
00126         mslist = []
00127         
00128         # Loop through list of directories
00129         for d in files[1]:
00130             # Skip . entries
00131             if d.startswith('.'):
00132                 continue
00133             
00134             if not d.endswith('.ms'):
00135                 continue
00136             
00137             # Full path for directory
00138             dir = os.path.join(topdir,d)
00139                         
00140             # It is probably an MS
00141             if self.isItMS(dir) == 1:                                                
00142                 mslist.append(dir)
00143         
00144         return mslist
00145 
00146     def isItMS(self, dir):
00147         '''Check the type of a directory.
00148            dir  --> full path of a directory.
00149                 Returns 1 for an MS, 2 for a cal table and 3 for a MMS.
00150                 If 0 is returned, it means any other type or an error.'''
00151                 
00152         ret = 0
00153         
00154         # Listing of this directory
00155         ldir = os.listdir(dir)
00156         
00157         if not ldir.__contains__('table.info'): 
00158             return ret
00159                 
00160         cmd1 = 'grep Type '+dir+'/table.info'
00161         type = commands.getoutput(cmd1)
00162         cmd2 = 'grep SubType '+dir+'/table.info'
00163         stype = commands.getoutput(cmd2)
00164         
00165         # It is a cal table
00166         if type.__contains__('Calibration'):
00167             ret = 2
00168         
00169         elif type.__contains__('Measurement'):
00170             # It is a Multi-MS
00171             if stype.__contains__('CONCATENATED'):
00172                 # Further check
00173                 if ldir.__contains__('SUBMSS'):            
00174                     ret = 3
00175             # It is an MS
00176             else:
00177                 ret = 1
00178             
00179         return ret
00180                         
00181 
00182 
00183     def getFileslist(self, files):
00184         '''Get a list of non-MS files from a directory.
00185            files -> a tuple that is returned by the following call:
00186            files = os.walk(self.inpdir,followlinks=True).next() 
00187            
00188            It will return files and directories that are not MSs. It will skip
00189            files that start with .
00190            '''
00191                 
00192         topdir = files[0]
00193         fileslist = []
00194         
00195         # Get other directories that are not MSs
00196         for d in files[1]:
00197             
00198             # Skip . entries
00199             if d.startswith('.'):
00200                 continue
00201             
00202             # Skip MS directories
00203             if d.endswith('.ms'):
00204                 continue
00205             
00206             # Full path for directory
00207             dir = os.path.join(topdir,d)
00208             
00209             # It is a Calibration
00210             if self.isItMS(dir) == 2:
00211                 fileslist.append(dir)
00212 
00213 
00214         # Get non-directory files        
00215         for f in files[2]:
00216             # Skip . entries
00217             if f.startswith('.'):
00218                 continue
00219             
00220             # Full path for file
00221             file = os.path.join(topdir, f)
00222             fileslist.append(file)
00223             
00224         return fileslist
00225 
00226 
00227     def runPartition(self, ms, mmsdir, createlink):
00228         '''Run partition with default values to create an MMS.
00229            ms         --> full pathname of the MS
00230            mmsdir     --> directory to save the MMS to
00231            createlink --> when True, it will create a symbolic link to the
00232                          just created MMS in the same directory with extension .ms        
00233         '''
00234         from tasks import partition
00235 
00236         if not os.path.lexists(ms):
00237             return False
00238         
00239         # Create MMS name
00240         bname = os.path.basename(ms)
00241         if bname.endswith('.ms'):
00242             mmsname = bname.replace('.ms','.mms')
00243         else:
00244             mmsname = bname+'.mms'
00245         
00246         mms = os.path.join(self.mmsdir, mmsname)
00247         if os.path.lexists(mms):
00248             casalog.post('Output MMS already exist -->'+mms,'ERROR')
00249             return False
00250         
00251         # Check for remainings of corrupted mms
00252         corrupted = mms.replace('.mms','.data')
00253         if os.path.exists(corrupted):
00254             casalog.post('Cleaning up left overs','WARN')
00255             shutil.rmtree(corrupted)
00256         
00257         # Run partition   
00258         default('partition')
00259         partition(vis=ms, outputvis=mms, createmms=True, datacolumn='all')
00260         casalog.origin('convertToMMS')
00261         
00262         # Check if MMS was created
00263         if not os.path.exists(mms):
00264             casalog.post('Cannot create MMS ->'+mms, 'ERROR')
00265             return False
00266         
00267         # If requested, create a link to this MMS
00268         if createlink:
00269             here = os.getcwd()
00270             os.chdir(mmsdir)
00271             mmsname = os.path.basename(mms)
00272             lms = mmsname.replace('.mms', '.ms')
00273             casalog.post('Creating symbolic link to MMS')
00274             os.symlink(mmsname, lms)
00275             os.chdir(here)
00276                 
00277         return True
00278         
00279     def usage(self):
00280         print '========================================================================='
00281         print '          convertToMMS will create a directory with multi-MSs.'
00282         print 'Usage:\n'
00283         print '  import partitionhelper as ph'
00284         print '  ph.convertToMMS(inpdir=\'dir\') \n'
00285         print 'Options:'
00286         print '   inpdir <dir>        directory with input MS.'
00287         print '   mmsdir <dir>        directory to save output MMS. If not given, it will save '
00288         print '                       the MMS in a directory called mmsdir in the current directory.'
00289         print '   createmslink=False  if True it will create a link to the new MMS with extension .ms.'
00290         print '   cleanup=False       if True it will remove the output directory before starting.\n'
00291         
00292         print ' NOTE: this script will run using the default values of partition. It will try to '
00293         print ' create an MMS for every MS in the input directory. It will skip non-MS directories '
00294         print ' such as cal tables. If partition succeeds, the script will create a link to every '
00295         print ' other directory or file in the output directory. This script might fail if run on '
00296         print ' single dish MS because the datacolumn needs to be set in partition.\n'
00297         print ' The script will not walk through sub-directories of inpdir. It will also skip '
00298         print ' files or directories that start with a .'
00299         print '=========================================================================='
00300         return
00301         
00302 #
00303 # -------------- HELPER functions for dealing with an MMS --------------
00304 #
00305 #    getMMSScans        'Get the list of scans of an MMS dictionary'
00306 #    getScanList        'Get the list of scans of an MS or MMS'
00307 #    getScanNrows       'Get the number of rows of a scan in a MS. It will add the 
00308 #                         nrows of all sub-scans.'
00309 #    getMMSScanNrows    'Get the number of rows of a scan in an MMS dictionary.'
00310 #    getSpwIds          'Get the Spw IDs of a scan.'
00311 #    getDiskUsage       'eturn the size in bytes of an MS in disk.'
00312 #
00313 # ----------------------------------------------------------------------
00314 
00315 # NOTE
00316 # There is a bug in ms.getscansummary() that does not give the scans for all 
00317 # observation Ids, but only for the last one. See CAS-4409
00318 def getMMSScans(mmsdict):
00319     '''Get the list of scans of an MMS dictionary.
00320        mmsdict  --> output dictionary from listpartition(MMS,createdict=true)
00321        Return a list of the scans in this MMS. '''
00322     
00323     tkeys = mmsdict.keys()
00324     scanlist = []
00325     slist = set(scanlist)
00326     for k in tkeys:
00327         skeys = mmsdict[k]['scanId'].keys()
00328         for k in skeys:
00329             slist.add(k)
00330     
00331     return list(slist)
00332     
00333 def getScanList(msfile, selection={}):
00334     '''Get the list of scans of an MS or MMS. 
00335        msfile     --> name of MS or MMS
00336        selection  --> dictionary with data selection
00337        
00338        Return a list of the scans in this MS/MMS. '''
00339     
00340     msTool=mstool()
00341     msTool.open(msfile)
00342     if isinstance(selection, dict) and selection != {}:
00343         msTool.msselect(items=selection)
00344         
00345     scand = msTool.getscansummary()
00346     msTool.close()
00347         
00348     scanlist = scand.keys()
00349     
00350     return scanlist
00351     
00352     
00353 def getScanNrows(msfile, myscan, selection={}):
00354     '''Get the number of rows of a scan in a MS. It will add the nrows of all sub-scans.
00355        This will not take into account any selection done on the MS.
00356        msfile     --> name of the MS or MMS
00357        myscan     --> scan ID (int)
00358        selection  --> dictionary with data selection
00359        
00360        Return the number of rows in the scan.
00361        
00362        To compare with the dictionary returned by listpartition, do the following:
00363        
00364         resdict = listpartition('file.mms', createdict=True)
00365         slist = ph.getMMSScans(thisdict)
00366         for s in slist:
00367             mmsN = ph.getMMSScanNrows(thisdict, s)
00368             msN = ph.getScanNrows('referenceMS', s)
00369             assert (mmsN == msN)
00370     '''
00371     msTool=mstool()
00372     msTool.open(msfile)
00373     if isinstance(selection, dict) and selection != {}:
00374         msTool.msselect(items=selection)
00375         
00376     scand = msTool.getscansummary()
00377     msTool.close()
00378     
00379     Nrows = 0
00380     if not scand.has_key(str(myscan)):
00381         return Nrows
00382     
00383     subscans = scand[str(myscan)]
00384     for ii in subscans.keys():
00385         Nrows += scand[str(myscan)][ii]['nRow']
00386     
00387     return Nrows
00388 
00389 
00390 def getMMSScanNrows(thisdict, myscan):
00391     '''Get the number of rows of a scan in an MMS dictionary.
00392        thisdict  --> output dictionary from listpartition(MMS,createdict=true)
00393        myscan    --> scan ID (int) 
00394        Return the number of rows in the given scan. '''
00395     
00396     tkeys = thisdict.keys()
00397     scanrows = 0
00398     for k in tkeys:
00399         if thisdict[k]['scanId'].has_key(myscan):
00400             scanrows += thisdict[k]['scanId'][myscan]['nrows']
00401         
00402     return scanrows
00403    
00404 
00405 def getSpwIds(msfile, myscan, selection={}):
00406     '''Get the Spw IDs of a scan. 
00407        msfile     --> name of the MS or MMS
00408        myscan     --> scan Id (int)
00409        selection  --> dictionary with data selection
00410        
00411        Return a list with the Spw IDs. Note that the returned spw IDs are sorted.
00412                            
00413     '''
00414     import numpy as np
00415     
00416     msTool=mstool()
00417     msTool.open(msfile)
00418     if isinstance(selection, dict) and selection != {}:
00419         msTool.msselect(items=selection)
00420         
00421     scand = msTool.getscansummary()
00422     msTool.close()
00423     
00424     spwlist = []
00425 
00426     if not scand.has_key(str(myscan)):
00427         return spwlist
00428     
00429     subscans = scand[str(myscan)]
00430     aspws = np.array([],dtype=int)
00431     
00432     for ii in subscans.keys():
00433         sscanid = ii
00434         spwids = scand[str(myscan)][sscanid]['SpwIds']
00435         aspws = np.append(aspws,spwids)
00436     
00437     # Sort spws  and remove duplicates
00438     aspws.sort()
00439     uniquespws = np.unique(aspws)
00440     
00441     # Try to return a list
00442     spwlist = uniquespws.ravel().tolist()
00443     return spwlist
00444 
00445 
00446 def getMMSSpwIds(thisdict):
00447     '''Get the list of spws from an MMS dictionary.
00448        thisdict  --> output dictionary from listpartition(MMS,createdict=true)
00449        Return a list of the spw Ids in the dictionary. '''
00450 
00451     import numpy as np
00452     
00453     tkeys = thisdict.keys()
00454 
00455     aspws = np.array([],dtype='int32')
00456     for k in tkeys:
00457         scanlist = thisdict[k]['scanId'].keys()
00458         for s in scanlist:
00459             spwids = thisdict[k]['scanId'][s]['spwIds']
00460             aspws = np.append(aspws, spwids)
00461 
00462     # Sort spws  and remove duplicates
00463     aspws.sort()
00464     uniquespws = np.unique(aspws)
00465     
00466     # Try to return a list
00467     spwlist = uniquespws.ravel().tolist()
00468         
00469     return spwlist
00470 
00471 
00472 def getDiskUsage(msfile):
00473     '''Return the size in bytes of an MS or MMS in disk.
00474        msfile  --> name of the MS
00475        This function will return a value given by
00476        the command du -hs'''
00477     
00478     from subprocess import Popen, PIPE, STDOUT
00479 
00480     # Command line to run
00481     ducmd = 'du -hs '+msfile
00482     
00483     p = Popen(ducmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
00484     
00485     sizeline = p.stdout.read()
00486     
00487     # Create a list of the output string, which looks like this:
00488     # ' 75M\tuidScan23.data/uidScan23.0000.ms\n'
00489     # This will create a list with [size,sub-ms]
00490     mssize = sizeline.split()
00491 
00492     return mssize[0]
00493 
00494 
00495 def getSubtables(vis):
00496     tbTool = tbtool()
00497     theSubTables = []
00498     tbTool.open(vis)
00499     myKeyw = tbTool.getkeywords()
00500     tbTool.close()
00501     for k in myKeyw.keys():
00502         theKeyw = myKeyw[k]
00503         if (type(theKeyw)==str and theKeyw.split(' ')[0]=='Table:'
00504             and not k=='SORTED_TABLE'):
00505             theSubTables.append(os.path.basename(theKeyw.split(' ')[1]))
00506             
00507     return theSubTables
00508 
00509 
00510 def makeMMS(outputvis, submslist, copysubtables=False, omitsubtables=[]):
00511     '''
00512     Create an MMS named outputvis from the submss in list submslist.
00513     The subtables in omitsubtables are linked instead of copied.
00514     '''
00515 
00516     if os.path.exists(outputvis):
00517         raise ValueError, "Output MS already exists"
00518 
00519     if len(submslist)==0:
00520         raise ValueError, "No SubMSs given"
00521 
00522     ## make an MMS with all sub-MSs contained in a SUBMSS subdirectory
00523     origpath = os.getcwd()
00524     mymstool = mstool()
00525     mytbtool = tbtool()
00526     try:
00527         try:
00528             mymstool.createmultims(outputvis,
00529                                    submslist,
00530                                    [],
00531                                    True,  # nomodify
00532                                    False, # lock
00533                                    copysubtables,
00534                                    omitsubtables) # when copying the subtables, omit these
00535         except:
00536             mymstool.close()
00537             raise
00538         mymstool.close()
00539         
00540 
00541         # remove the SORTED_TABLE keywords because the sorting is not reliable after partitioning
00542         try:
00543             mytbtool.open(outputvis, nomodify=False)
00544             if 'SORTED_TABLE' in mytbtool.keywordnames():
00545                 mytbtool.removekeyword('SORTED_TABLE')
00546                 mytbtool.close()
00547             for thesubms in submslist:
00548                 mytbtool.open(outputvis+'/SUBMSS/'+os.path.basename(thesubms), nomodify=False)
00549                 if 'SORTED_TABLE' in mytbtool.keywordnames():
00550                     tobedel = mytbtool.getkeyword('SORTED_TABLE').split(' ')[1]
00551                     mytbtool.removekeyword('SORTED_TABLE')
00552                     os.system('rm -rf '+tobedel)
00553                 mytbtool.close()
00554         except:
00555             mytbtool.close()
00556             raise
00557             
00558         # finally create symbolic links to the subtables of the first SubMS
00559         os.chdir(origpath)
00560         os.chdir(outputvis)
00561         mastersubms = os.path.basename(submslist[0].rstrip('/'))
00562         thesubtables = getSubtables('SUBMSS/'+mastersubms)
00563         for s in thesubtables:
00564             os.symlink('SUBMSS/'+mastersubms+'/'+s, s)
00565 
00566         # AND put links for those subtables omitted
00567         os.chdir('SUBMSS/'+mastersubms)
00568         for i in xrange(1,len(submslist)):
00569             thesubms = os.path.basename(submslist[i].rstrip('/'))
00570             os.chdir('../'+thesubms)
00571             for s in omitsubtables:
00572                 os.system('rm -rf '+s) # shutil does not work in the general case
00573                 os.symlink('../'+mastersubms+'/'+s, s)
00574 
00575     except:
00576         theproblem = str(sys.exc_info())
00577         os.chdir(origpath)
00578         raise ValueError, "Problem in MMS creation: "+theproblem
00579 
00580     os.chdir(origpath)
00581 
00582     return True
00583 
00584 
00585