casa  $Rev:20696$
 All Classes Namespaces Files Functions Variables
simplelinefinder.py
Go to the documentation of this file.
00001 import math
00002 from asap.logging import asaplog
00003 
00004 class StatCalculator:
00005    def __init__(self):
00006        self.s=0.
00007        self.s2=0.
00008        self.cnt=0
00009 
00010    def mean(self):
00011        if self.cnt<=0:
00012           raise RuntimeError, "At least one data point has to be defined"
00013        return self.s/float(self.cnt)
00014 
00015    def variance(self):
00016        if self.cnt<=1:
00017           raise RuntimeError, "At least two data points has to be defined"
00018        return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
00019 
00020    def rms(self):
00021        """
00022           return rms of the accumulated sample
00023        """
00024        if self.cnt<=0:
00025           raise RuntimeError, "At least one data point has to be defined"
00026        return math.sqrt(self.s2/self.cnt)
00027 
00028    def add(self, pt):
00029        self.s = self.s + pt
00030        self.s2 = self.s2 + pt*pt
00031        self.cnt = self.cnt + 1
00032 
00033 
00034 class simplelinefinder:
00035    '''
00036        A simplified class to search for spectral features. The algorithm assumes that the bandpass
00037        is taken out perfectly and no spectral channels are flagged (except some edge channels).
00038        It works with a list or tuple rather than a scantable and returns the channel pairs.
00039        There is an optional feature to attempt to split the detected lines into components, although
00040        it should be used with caution. This class is largely intended to be used with scripts.
00041 
00042        The fully featured version of the algorithm working with scantables is called linefinder.
00043    '''
00044 
00045    def __init__(self):
00046       '''
00047          Initialize the class.
00048       '''
00049       self._median = None
00050       self._rms = None
00051 
00052    def writeLog(self, str):
00053       """
00054          Write user defined string into log file
00055       """
00056       asaplog.push(str)
00057 
00058    def invertChannelSelection(self, nchan, chans, edge = (0,0)):
00059       """
00060          This method converts a tuple with channel ranges to a tuple which covers all channels
00061          not selected by the original tuple (optionally edge channels can be discarded)
00062 
00063          nchan - number of channels in the spectrum.
00064          chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
00065          edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
00066 
00067          return:  a tuple with inverted channel selection
00068          Note, at this stage channel ranges are assumed to be sorted and without overlap
00069       """
00070       if nchan<=1:
00071          raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
00072       if len(chans)%2!=0:
00073          raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
00074       tempedge = edge
00075       if not isinstance(tempedge,tuple):
00076          tempedge = (edge,edge)
00077       if len(tempedge)!=2:
00078          raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
00079       if tempedge[0]<0 or tempedge[1]<0:
00080          raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
00081       startchan = tempedge[0]
00082       stopchan = nchan - tempedge[1]
00083       if stopchan-startchan<0:
00084          raise RuntimeError, "Edge rejection rejected all channels"
00085       ranges = []
00086       curstart = startchan
00087       for i in range(0,len(chans),2):
00088           if chans[i+1]<curstart:
00089              continue
00090           elif chans[i]<=curstart:
00091              curstart = chans[i+1]+1
00092           else:
00093              ranges.append(curstart)
00094              ranges.append(chans[i]-1)
00095              curstart = chans[i+1]+1
00096       if curstart<stopchan:
00097          ranges.append(curstart)
00098          ranges.append(stopchan-1)
00099       return tuple(ranges)
00100 
00101    def channelRange(self, spc, vel_range):
00102       """
00103          A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
00104          allows to convert supplied velocity range into the channel range.
00105 
00106          spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
00107          vel_range - a 2-element tuple with start and stop velocity of the range
00108 
00109          return: a 2-element tuple with channels
00110          Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
00111       """
00112       if len(spc) != 2:
00113          raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
00114       if len(spc[0]) != len(spc[1]):
00115          raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
00116       if len(vel_range) != 2:
00117          raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
00118       if vel_range[0]>=vel_range[1]:
00119          raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
00120       if len(spc[0])<=2:
00121          raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
00122       chans = list(vel_range)
00123       for j in range(len(chans)):
00124           chans[j] = -1
00125       for i in range(len(spc[0])):
00126           if i!=0:
00127              prev_vel = spc[0][i-1]
00128           else:
00129              prev_vel = spc[0][i+1]
00130           delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
00131           for j in range(len(vel_range)):
00132               if abs(vel_range[j]-spc[0][i])<delta:
00133                   chans[j] = i
00134       if chans[0] == chans[1]:
00135          return ()
00136       if chans[1] == -1:
00137          chans[1] = len(spc[0])-1
00138       if chans[0] == -1:
00139          chans[0] = 0
00140       if chans[0]<=chans[1]:
00141          return tuple(chans)
00142       return (chans[1],chans[0])
00143 
00144    def _absvalComp(self,x,y):
00145       """
00146          A helper method to compare x and y by absolute value (to do sorting)
00147 
00148          x - first value
00149          y - second value
00150 
00151          return -1,0 or 1 depending on the result of comparison
00152       """
00153       if abs(x)<abs(y):
00154          return -1
00155       elif abs(x)>abs(y):
00156          return 1
00157       else:
00158          return 0
00159 
00160    def rms(self):
00161       """
00162          Return rms scatter of the spectral points (with respect to the median) calculated
00163          during last find_lines call. Note, this method throws an exception if
00164          find_lines has never been called.
00165       """
00166       if self._rms==None:
00167          raise RuntimeError, "call find_lines before using the rms method"
00168       return self._rms
00169 
00170    def median(self):
00171       """
00172          Return the median of the last spectrum passed to find_lines.
00173          Note, this method throws an exception if find_lines has never been called.
00174       """
00175       if self._median==None:
00176          raise RuntimeError, "call find_lines before using the median method"
00177       return self._median
00178 
00179    def _mergeIntervals(self, lines, spc):
00180       """
00181          A helper method to merge intervals.
00182 
00183          lines - list of tuples with first and last channels of all intervals
00184          spc - spectrum (to be able to test whether adjacent intervals have the
00185                same sign.
00186       """
00187       toberemoved = []
00188       for i in range(1,len(lines)):
00189           if lines[i-1][1]+1>=lines[i][0]:
00190              if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
00191                  lines[i] = (lines[i-1][0],lines[i][1])
00192                  toberemoved.append(i-1)
00193       toberemoved.sort()
00194       for i in range(len(toberemoved)-1,-1,-1):
00195           if toberemoved[i]>=len(lines):
00196              raise RuntimeError, "this shouldn't have happened!"
00197           lines.pop(toberemoved[i])
00198 
00199    def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
00200       """
00201          A helper method used in the spectral line detection routine. It splits given intervals
00202          into a number of "spectral lines". Each line is characterised by a single extremum.
00203          Noise is dealt with by taking into account only those extrema, where a difference with
00204          respect to surrounding spectral points exceeds threshold times rms (stored inside this
00205          class, so the main body of the line detection should be executed first) and there are
00206          at least minchan such points.
00207       """
00208       if minchan<1:
00209                  raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
00210       newlines = []
00211       for line in lines:
00212           if line[1]-line[0]+1 <= minchan:
00213              newlines.append(line)
00214           wasIncreasing = None
00215           derivSignReversals = []
00216           for ch in range(line[0]+1,line[1]+1):
00217               diff=spc[ch]-spc[ch-1]
00218               isIncreasing = (diff>0)
00219               if wasIncreasing != None:
00220                  if isIncreasing != wasIncreasing:
00221                     derivSignReversals.append((ch,isIncreasing))
00222               wasIncreasing = isIncreasing
00223           if len(derivSignReversals)==0:
00224              newlines.append(line)
00225           elif len(derivSignReversals)%2 != 1:
00226              self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
00227              newlines.append(line)
00228           elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
00229              self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
00230              newlines.append(line)
00231           else:
00232              startchan = line[0]
00233              for i in range(len(derivSignReversals)):
00234                 if i%2 == 1:
00235                    newlines.append((startchan,derivSignReversals[i][0]-1))
00236                    startchan = derivSignReversals[i][0]
00237              newlines.append((startchan,line[1]))
00238       return newlines
00239 
00240    def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
00241       """
00242          A simple spectral line detection routine, which assumes that bandpass has been
00243          taken out perfectly and no channels are flagged within the spectrum. A detection
00244          is reported if consequtive minchan number of channels is consistently above or
00245          below the median value. The threshold is given in terms of the rms calculated
00246          using 80% of the lowest data points by the absolute value (with respect to median)
00247 
00248          spc - a list or tuple with the spectrum, no default
00249          threshold - detection threshold, default is 3 sigma, see above for the definition
00250          edge - if non-zero, this number of spectral channels will be rejected at the edge.
00251                 Default is not to do any rejection.
00252          minchan -  minimum number of consequitive channels exceeding threshold to claim the
00253                     detection, default is 3.
00254          tailsearch - if True (default), the algorithm attempts to widen each line until
00255                     its flux crosses the median. It merges lines if necessary. Set this
00256                     option off if you need to split the lines according to some criterion
00257          splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
00258                     a number of spectral lines (just one local extremum). The default action is
00259                     not to do it (may give an adverse results if the noise is high)
00260 
00261          This method returns a list of tuples each containing start and stop 0-based channel
00262          number of every detected line. Empty list if nothing has been detected.
00263 
00264          Note. The median and rms about this median is stored inside this class and can
00265          be obtained with rms and median methods.
00266       """
00267       if edge<0:
00268          raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
00269       if 2*edge>=len(spc):
00270          raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
00271       if threshold<=0:
00272          raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
00273       if minchan<=0:
00274          raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
00275 
00276       # temporary storage to get statistics, apply edge rejection here
00277       tmpspc = spc[edge:len(spc)-edge+1]
00278       if len(tmpspc)<2:
00279          raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
00280       tmpspc.sort()
00281       self._median=tmpspc[len(tmpspc)/2]
00282       # work with offsets from the median and sort by absolute values
00283       for i in range(len(tmpspc)):
00284           tmpspc[i]-=self._median
00285       tmpspc.sort(cmp=self._absvalComp)
00286       sc = StatCalculator()
00287       for i in tmpspc[:-int(0.2*len(tmpspc))]:
00288           sc.add(i)
00289       self._rms=sc.rms()
00290 
00291       self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
00292       self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
00293 
00294       #actual line detection
00295       lines=[]
00296       wasAbove = None
00297       nchan = 0
00298       startchan=None
00299       for i in range(edge,len(spc)-edge):
00300           if abs(spc[i]-self._median)>threshold*self._rms:
00301              isAbove=(spc[i] > self._median)
00302              if nchan!=0:
00303                 if wasAbove == isAbove:
00304                    nchan+=1
00305                 else:
00306                    if nchan>=minchan:
00307                       lines.append((startchan,i-1))
00308                    nchan=1
00309                    wasAbove = isAbove
00310                    startchan = i
00311              else:
00312                 nchan=1
00313                 wasAbove = isAbove
00314                 startchan = i
00315           else:
00316              if nchan>=minchan:
00317                 lines.append((startchan,i-1))
00318              nchan = 0
00319       if nchan>=minchan:
00320          lines.append((startchan,len(spc)-edge-1))
00321 
00322       if tailsearch:
00323          for i in range(len(lines)):
00324              wasAbove = None
00325              curRange = list(lines[i])
00326              for x in range(curRange[0],edge,-1):
00327                  isAbove=(spc[x] > self._median)
00328                  if wasAbove == None:
00329                     wasAbove = isAbove
00330                  if isAbove!=wasAbove:
00331                     curRange[0]=x+1
00332                     break
00333              for x in range(curRange[1],len(spc)-edge):
00334                  isAbove=(spc[x] > self._median)
00335                  if isAbove!=wasAbove:
00336                     curRange[1]=x-1
00337                     break
00338              lines[i]=tuple(curRange)
00339          self._mergeIntervals(lines,spc)
00340       if splitFeatures:
00341          return self._splitIntervals(lines,spc,threshold,minchan)
00342       return lines