00001 import math
00002 from asap.logging import asaplog
00003
00004 class StatCalculator:
00005 def __init__(self):
00006 self.s=0.
00007 self.s2=0.
00008 self.cnt=0
00009
00010 def mean(self):
00011 if self.cnt<=0:
00012 raise RuntimeError, "At least one data point has to be defined"
00013 return self.s/float(self.cnt)
00014
00015 def variance(self):
00016 if self.cnt<=1:
00017 raise RuntimeError, "At least two data points has to be defined"
00018 return math.sqrt((self.s2/self.cnt)-(self.s/self.cnt)**2+1e-12)
00019
00020 def rms(self):
00021 """
00022 return rms of the accumulated sample
00023 """
00024 if self.cnt<=0:
00025 raise RuntimeError, "At least one data point has to be defined"
00026 return math.sqrt(self.s2/self.cnt)
00027
00028 def add(self, pt):
00029 self.s = self.s + pt
00030 self.s2 = self.s2 + pt*pt
00031 self.cnt = self.cnt + 1
00032
00033
00034 class simplelinefinder:
00035 '''
00036 A simplified class to search for spectral features. The algorithm assumes that the bandpass
00037 is taken out perfectly and no spectral channels are flagged (except some edge channels).
00038 It works with a list or tuple rather than a scantable and returns the channel pairs.
00039 There is an optional feature to attempt to split the detected lines into components, although
00040 it should be used with caution. This class is largely intended to be used with scripts.
00041
00042 The fully featured version of the algorithm working with scantables is called linefinder.
00043 '''
00044
00045 def __init__(self):
00046 '''
00047 Initialize the class.
00048 '''
00049 self._median = None
00050 self._rms = None
00051
00052 def writeLog(self, str):
00053 """
00054 Write user defined string into log file
00055 """
00056 asaplog.push(str)
00057
00058 def invertChannelSelection(self, nchan, chans, edge = (0,0)):
00059 """
00060 This method converts a tuple with channel ranges to a tuple which covers all channels
00061 not selected by the original tuple (optionally edge channels can be discarded)
00062
00063 nchan - number of channels in the spectrum.
00064 chans - tuple (with even number of elements) containing start and stop channel for all selected ranges
00065 edge - one number or two element tuple (separate values for two ends) defining how many channels to reject
00066
00067 return: a tuple with inverted channel selection
00068 Note, at this stage channel ranges are assumed to be sorted and without overlap
00069 """
00070 if nchan<=1:
00071 raise RuntimeError, "Number of channels is supposed to be at least 2, you have %i"% nchan
00072 if len(chans)%2!=0:
00073 raise RuntimeError, "chans is supposed to be a tuple with even number of elements"
00074 tempedge = edge
00075 if not isinstance(tempedge,tuple):
00076 tempedge = (edge,edge)
00077 if len(tempedge)!=2:
00078 raise RuntimeError, "edge parameter is supposed to be a two-element tuple or a single number"
00079 if tempedge[0]<0 or tempedge[1]<0:
00080 raise RuntimeError, "number of edge rejected cannels is supposed to be positive"
00081 startchan = tempedge[0]
00082 stopchan = nchan - tempedge[1]
00083 if stopchan-startchan<0:
00084 raise RuntimeError, "Edge rejection rejected all channels"
00085 ranges = []
00086 curstart = startchan
00087 for i in range(0,len(chans),2):
00088 if chans[i+1]<curstart:
00089 continue
00090 elif chans[i]<=curstart:
00091 curstart = chans[i+1]+1
00092 else:
00093 ranges.append(curstart)
00094 ranges.append(chans[i]-1)
00095 curstart = chans[i+1]+1
00096 if curstart<stopchan:
00097 ranges.append(curstart)
00098 ranges.append(stopchan-1)
00099 return tuple(ranges)
00100
00101 def channelRange(self, spc, vel_range):
00102 """
00103 A helper method which works on a tuple with abcissa/flux vectors (i.e. as returned by uvSpectrum). It
00104 allows to convert supplied velocity range into the channel range.
00105
00106 spc - tuple with the spectrum (first is the vector of abcissa values, second is the spectrum itself)
00107 vel_range - a 2-element tuple with start and stop velocity of the range
00108
00109 return: a 2-element tuple with channels
00110 Note, if supplied range is completely outside the spectrum, an empty tuple will be returned
00111 """
00112 if len(spc) != 2:
00113 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 elements"
00114 if len(spc[0]) != len(spc[1]):
00115 raise RuntimeError, "spc supplied to channelRange is supposed to have 2 vectors of equal length"
00116 if len(vel_range) != 2:
00117 raise RuntimeError, "vel_range supplied to channelRange is supposed to have 2 elements"
00118 if vel_range[0]>=vel_range[1]:
00119 raise RuntimeError, "start velocity is supposed to be less than end velocity, vel_range: %s" % vel_range
00120 if len(spc[0])<=2:
00121 raise RuntimeError, "Spectrum should contain more than 2 points, you have %i" % len(spc[0])
00122 chans = list(vel_range)
00123 for j in range(len(chans)):
00124 chans[j] = -1
00125 for i in range(len(spc[0])):
00126 if i!=0:
00127 prev_vel = spc[0][i-1]
00128 else:
00129 prev_vel = spc[0][i+1]
00130 delta = max(prev_vel, spc[0][i]) - min(prev_vel, spc[0][i])
00131 for j in range(len(vel_range)):
00132 if abs(vel_range[j]-spc[0][i])<delta:
00133 chans[j] = i
00134 if chans[0] == chans[1]:
00135 return ()
00136 if chans[1] == -1:
00137 chans[1] = len(spc[0])-1
00138 if chans[0] == -1:
00139 chans[0] = 0
00140 if chans[0]<=chans[1]:
00141 return tuple(chans)
00142 return (chans[1],chans[0])
00143
00144 def _absvalComp(self,x,y):
00145 """
00146 A helper method to compare x and y by absolute value (to do sorting)
00147
00148 x - first value
00149 y - second value
00150
00151 return -1,0 or 1 depending on the result of comparison
00152 """
00153 if abs(x)<abs(y):
00154 return -1
00155 elif abs(x)>abs(y):
00156 return 1
00157 else:
00158 return 0
00159
00160 def rms(self):
00161 """
00162 Return rms scatter of the spectral points (with respect to the median) calculated
00163 during last find_lines call. Note, this method throws an exception if
00164 find_lines has never been called.
00165 """
00166 if self._rms==None:
00167 raise RuntimeError, "call find_lines before using the rms method"
00168 return self._rms
00169
00170 def median(self):
00171 """
00172 Return the median of the last spectrum passed to find_lines.
00173 Note, this method throws an exception if find_lines has never been called.
00174 """
00175 if self._median==None:
00176 raise RuntimeError, "call find_lines before using the median method"
00177 return self._median
00178
00179 def _mergeIntervals(self, lines, spc):
00180 """
00181 A helper method to merge intervals.
00182
00183 lines - list of tuples with first and last channels of all intervals
00184 spc - spectrum (to be able to test whether adjacent intervals have the
00185 same sign.
00186 """
00187 toberemoved = []
00188 for i in range(1,len(lines)):
00189 if lines[i-1][1]+1>=lines[i][0]:
00190 if (spc[lines[i-1][1]]>self._median) == (spc[lines[i][0]]>self._median):
00191 lines[i] = (lines[i-1][0],lines[i][1])
00192 toberemoved.append(i-1)
00193 toberemoved.sort()
00194 for i in range(len(toberemoved)-1,-1,-1):
00195 if toberemoved[i]>=len(lines):
00196 raise RuntimeError, "this shouldn't have happened!"
00197 lines.pop(toberemoved[i])
00198
00199 def _splitIntervals(self,lines,spc,threshold=3,minchan=3):
00200 """
00201 A helper method used in the spectral line detection routine. It splits given intervals
00202 into a number of "spectral lines". Each line is characterised by a single extremum.
00203 Noise is dealt with by taking into account only those extrema, where a difference with
00204 respect to surrounding spectral points exceeds threshold times rms (stored inside this
00205 class, so the main body of the line detection should be executed first) and there are
00206 at least minchan such points.
00207 """
00208 if minchan<1:
00209 raise RuntimeError, "minchan in _splitIntervals is not supposed to be less than 1, you have %s" % minchan
00210 newlines = []
00211 for line in lines:
00212 if line[1]-line[0]+1 <= minchan:
00213 newlines.append(line)
00214 wasIncreasing = None
00215 derivSignReversals = []
00216 for ch in range(line[0]+1,line[1]+1):
00217 diff=spc[ch]-spc[ch-1]
00218 isIncreasing = (diff>0)
00219 if wasIncreasing != None:
00220 if isIncreasing != wasIncreasing:
00221 derivSignReversals.append((ch,isIncreasing))
00222 wasIncreasing = isIncreasing
00223 if len(derivSignReversals)==0:
00224 newlines.append(line)
00225 elif len(derivSignReversals)%2 != 1:
00226 self.writeLog("SplitIntervals warning. Derivative is expected to have odd number of reversals within the interval: \"%s\" " % derivSignReversals);
00227 newlines.append(line)
00228 elif derivSignReversals[0][1]!=derivSignReversals[-1][1]:
00229 self.writeLog("SplitIntervals warning. Derivative is expected to have the same sign at the start and at the end of each interval: \"%s\"" % derivSignReversals)
00230 newlines.append(line)
00231 else:
00232 startchan = line[0]
00233 for i in range(len(derivSignReversals)):
00234 if i%2 == 1:
00235 newlines.append((startchan,derivSignReversals[i][0]-1))
00236 startchan = derivSignReversals[i][0]
00237 newlines.append((startchan,line[1]))
00238 return newlines
00239
00240 def find_lines(self,spc,threshold=3,edge=0,minchan=3, tailsearch = True, splitFeatures = False):
00241 """
00242 A simple spectral line detection routine, which assumes that bandpass has been
00243 taken out perfectly and no channels are flagged within the spectrum. A detection
00244 is reported if consequtive minchan number of channels is consistently above or
00245 below the median value. The threshold is given in terms of the rms calculated
00246 using 80% of the lowest data points by the absolute value (with respect to median)
00247
00248 spc - a list or tuple with the spectrum, no default
00249 threshold - detection threshold, default is 3 sigma, see above for the definition
00250 edge - if non-zero, this number of spectral channels will be rejected at the edge.
00251 Default is not to do any rejection.
00252 minchan - minimum number of consequitive channels exceeding threshold to claim the
00253 detection, default is 3.
00254 tailsearch - if True (default), the algorithm attempts to widen each line until
00255 its flux crosses the median. It merges lines if necessary. Set this
00256 option off if you need to split the lines according to some criterion
00257 splitFeatures - if True, the algorithm attempts to split each detected spectral feature into
00258 a number of spectral lines (just one local extremum). The default action is
00259 not to do it (may give an adverse results if the noise is high)
00260
00261 This method returns a list of tuples each containing start and stop 0-based channel
00262 number of every detected line. Empty list if nothing has been detected.
00263
00264 Note. The median and rms about this median is stored inside this class and can
00265 be obtained with rms and median methods.
00266 """
00267 if edge<0:
00268 raise RuntimeError, "edge parameter of find_lines should be non-negative, you have %s" % edge
00269 if 2*edge>=len(spc):
00270 raise RuntimeError, "edge is too high (%i), you rejected all channels (%i)" % (edge, len(spc))
00271 if threshold<=0:
00272 raise RuntimeError, "threshold parameter of find_lines should be positive, you have %s" % threshold
00273 if minchan<=0:
00274 raise RuntimeError, "minchan parameter of find_lines should be positive, you have %s" % minchan
00275
00276
00277 tmpspc = spc[edge:len(spc)-edge+1]
00278 if len(tmpspc)<2:
00279 raise RuntimeError, "Too many channels are rejected. Decrease edge parameter or provide a longer spectrum."
00280 tmpspc.sort()
00281 self._median=tmpspc[len(tmpspc)/2]
00282
00283 for i in range(len(tmpspc)):
00284 tmpspc[i]-=self._median
00285 tmpspc.sort(cmp=self._absvalComp)
00286 sc = StatCalculator()
00287 for i in tmpspc[:-int(0.2*len(tmpspc))]:
00288 sc.add(i)
00289 self._rms=sc.rms()
00290
00291 self.writeLog("Spectral line detection with edge=%i, threshold=%f, minchan=%i and tailsearch=%s" % (edge,threshold, minchan, tailsearch))
00292 self.writeLog("statistics: median=%f, rms=%f" % (self._median, self._rms))
00293
00294
00295 lines=[]
00296 wasAbove = None
00297 nchan = 0
00298 startchan=None
00299 for i in range(edge,len(spc)-edge):
00300 if abs(spc[i]-self._median)>threshold*self._rms:
00301 isAbove=(spc[i] > self._median)
00302 if nchan!=0:
00303 if wasAbove == isAbove:
00304 nchan+=1
00305 else:
00306 if nchan>=minchan:
00307 lines.append((startchan,i-1))
00308 nchan=1
00309 wasAbove = isAbove
00310 startchan = i
00311 else:
00312 nchan=1
00313 wasAbove = isAbove
00314 startchan = i
00315 else:
00316 if nchan>=minchan:
00317 lines.append((startchan,i-1))
00318 nchan = 0
00319 if nchan>=minchan:
00320 lines.append((startchan,len(spc)-edge-1))
00321
00322 if tailsearch:
00323 for i in range(len(lines)):
00324 wasAbove = None
00325 curRange = list(lines[i])
00326 for x in range(curRange[0],edge,-1):
00327 isAbove=(spc[x] > self._median)
00328 if wasAbove == None:
00329 wasAbove = isAbove
00330 if isAbove!=wasAbove:
00331 curRange[0]=x+1
00332 break
00333 for x in range(curRange[1],len(spc)-edge):
00334 isAbove=(spc[x] > self._median)
00335 if isAbove!=wasAbove:
00336 curRange[1]=x-1
00337 break
00338 lines[i]=tuple(curRange)
00339 self._mergeIntervals(lines,spc)
00340 if splitFeatures:
00341 return self._splitIntervals(lines,spc,threshold,minchan)
00342 return lines