#!/usr/bin/python -tt # # Werner Lustermann, Dominik Neise # ETH Zurich, TU Dortmund # from ctypes import * import numpy as np import pprint # for SlowData from scipy import signal # get the ROOT stuff + my shared libs from ROOT import gSystem # factfits_h.so is made from factfits.h and is used to access the data # make sure the location of factfits_h.so is in LD_LIBRARY_PATH. # having it in PYTHONPATH is *not* sufficient gSystem.Load('factfits_h.so') gSystem.Load('calfactfits_h.so') from ROOT import * class RawDataFeeder( object ): """ Wrapper class for RawData class capable of iterating over multiple RawData Files """ def __init__(self, filelist): """ *filelist* list of files to iterate over the list should contain tuples, or sublists of two filenames the first should be a data file (\*.fits.gz) the second should be an amplitude calibration file(\*.drs.fits.gz) """ # sanity check for input if type(filelist) != type(list()): raise TypeError('filelist should be a list') for entry in filelist: if len(entry) != 2: raise TypeError('the entries of filelist should have length == 2') for path in entry: if type(path) != type(str()): raise TypeError('the entries of filelist should be path, i.e. of type str()') #todo check if 'path' is a valid path # else: throw an Exception, or Warning? self.filelist = filelist self._current_RawData = RawData(filelist[0][0], filelist[0][1], return_dict=True) del filelist[0] def __iter__(self): return self def next(): """ Method being called by the iterator. Since the RawData Objects are simply looped over, the event_id from the RawData object will not be unique. Each RawData obejct will start with event_id = 1 as usual. """ try: return self._current_RawData.next() except StopIteration: # current_RawData was completely processed # delete it (I hope this calls the destructor of the fits file and/or closes it) del self._current_RawData # and remake it, if possible if len(self.filelist) > 0: self._current_RawData = RawData(filelist[0][0], filelist[0][1], return_dict=True) del filelist[0] else: raise class RawData( object ): """ raw data access and calibration - open raw data file and drs calibration file - performs amplitude calibration - performs baseline substraction if wanted - provides all data in an array: row = number of pixel col = length of region of interest """ def __init__(self, data_file_name, calib_file_name, user_action_calib=lambda acal_data, data, blm, tom, gm, scells, nroi: None, baseline_file_name='', return_dict = None, do_calibration = True, use_CalFactFits = True): """ initialize object open data file and calibration data file get basic information about the data in data_file_name allocate buffers for data access data_file_name : fits or fits.gz file of the data including the path calib_file_name : fits or fits.gz file containing DRS calibration data baseline_file_name : npy file containing the baseline values """ self.__module__='pyfact' # manual implementation of default value, but I need to find out # if the user of this class is aware of the new option if return_dict == None: print 'Warning: Rawdata.__init__() has a new option "return_dict"' print 'the default value of this option is False, so nothing changes for you at the moment.' print print 'you probably want, to get a dictionary out of the next() method anyway' print ' so please change your scripts and set this option to True, for the moment' print 'e.g. like this: run = RawData(data_filename, calib_filename, return_dict = True)' print "after a while, the default value, will turn to True .. so you don't have to give the option anymore" print 'and some time later, the option will not be supported anymore' return_dict = False self.return_dict = return_dict self.use_CalFactFits = use_CalFactFits self.do_calibration = do_calibration self.data_file_name = data_file_name self.calib_file_name = calib_file_name self.baseline_file_name = baseline_file_name self.user_action_calib = user_action_calib # baseline correction: True / False if len(baseline_file_name) == 0: self.correct_baseline = False else: self.correct_baseline = True # access data file if use_CalFactFits: try: data_file = CalFactFits(data_file_name, calib_file_name) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data self.data_file = data_file self.data = np.empty( data_file.npix * data_file.nroi, np.float64) data_file.SetNpcaldataPtr(self.data) self.data = self.data.reshape( data_file.npix, data_file.nroi ) self.acal_data = self.data self.nroi = data_file.nroi self.npix = data_file.npix self.nevents = data_file.nevents # Data per event self.event_id = None self.trigger_type = None self.start_cells = None self.board_times = None else: try: data_file = FactFits(self.data_file_name) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data self.data_file = data_file # get basic information about the data file #: region of interest (number of DRS slices read) self.nroi = data_file.GetUInt('NROI') #: number of pixels (should be 1440) self.npix = data_file.GetUInt('NPIX') #: number of events in the data run self.nevents = data_file.GetNumRows() # allocate the data memories self.event_id = c_ulong() self.trigger_type = c_ushort() #: 1D array with raw data self.data = np.zeros( self.npix * self.nroi, np.int16 ).reshape(self.npix ,self.nroi) #: slice where drs readout started self.start_cells = np.zeros( self.npix, np.int16 ) #: time when the FAD was triggered, in some strange units... self.board_times = np.zeros( 40, np.int32 ) # set the pointers to the data++ data_file.SetPtrAddress('EventNum', self.event_id) data_file.SetPtrAddress('TriggerType', self.trigger_type) data_file.SetPtrAddress('StartCellData', self.start_cells) data_file.SetPtrAddress('Data', self.data) data_file.SetPtrAddress('BoardTime', self.board_times) # open the calibration file try: calib_file = FactFits(self.calib_file_name) except IOError: print 'problem accessing calibration file: ', calib_file_name raise #: drs calibration file self.calib_file = calib_file baseline_mean = calib_file.GetN('BaselineMean') gain_mean = calib_file.GetN('GainMean') trigger_offset_mean = calib_file.GetN('TriggerOffsetMean') self.Nblm = baseline_mean / self.npix self.Ngm = gain_mean / self.npix self.Ntom = trigger_offset_mean / self.npix self.blm = np.zeros(baseline_mean, np.float32).reshape(self.npix , self.Nblm) self.gm = np.zeros(gain_mean, np.float32).reshape(self.npix , self.Ngm) self.tom = np.zeros(trigger_offset_mean, np.float32).reshape(self.npix , self.Ntom) calib_file.SetPtrAddress('BaselineMean', self.blm) calib_file.SetPtrAddress('GainMean', self.gm) calib_file.SetPtrAddress('TriggerOffsetMean', self.tom) calib_file.GetRow(0) # make calibration constants double, so we never need to roll self.blm = np.hstack((self.blm, self.blm)) self.gm = np.hstack((self.gm, self.gm)) self.tom = np.hstack((self.tom, self.tom)) self.v_bsl = np.zeros(self.npix) # array of baseline values (all ZERO) def __iter__(self): """ iterator """ return self def next(self): """ used by __iter__ """ if self.use_CalFactFits: if self.data_file.GetCalEvent() == False: raise StopIteration else: self.event_id = self.data_file.event_id self.trigger_type = self.data_file.event_triggertype self.start_cells = self.data_file.event_offset self.board_times = self.data_file.event_boardtimes #self.acal_data = self.data.copy().reshape(self.data_file.npix, self.data_file.nroi) else: if self.data_file.GetNextRow() == False: raise StopIteration else: if self.do_calibration == True: self.calibrate_drs_amplitude() #print 'nevents = ', self.nevents, 'event_id = ', self.event_id.value if self.return_dict: return self.__dict__ else: return self.acal_data, self.start_cells, self.trigger_type.value def next_event(self): """ load the next event from disk and calibrate it """ if self.use_CalFactFits: self.data_file.GetCalEvent() else: self.data_file.GetNextRow() self.calibrate_drs_amplitude() def calibrate_drs_amplitude(self): """ perform the drs amplitude calibration of the event data """ # shortcuts blm = self.blm gm = self.gm tom = self.tom to_mV = 2000./4096. #: 2D array with amplitude calibrated dat in mV acal_data = self.data * to_mV # convert ADC counts to mV for pixel in range( self.npix ): #shortcuts sc = self.start_cells[pixel] roi = self.nroi # rotate the pixel baseline mean to the Data startCell acal_data[pixel,:] -= blm[pixel,sc:sc+roi] # the 'trigger offset mean' does not need to be rolled # on the contrary, it seems there is an offset in the DRS data, # which is related to its distance to the startCell, not to its # distance to the beginning of the physical pipeline in the DRS chip acal_data[pixel,:] -= tom[pixel,0:roi] # rotate the pixel gain mean to the Data startCell acal_data[pixel,:] /= gm[pixel,sc:sc+roi] self.acal_data = acal_data * 1907.35 self.user_action_calib( self.acal_data, np.reshape(self.data, (self.npix, self.nroi) ), blm, tom, gm, self.start_cells, self.nroi) def baseline_read_values(self, file, bsl_hist='bsl_sum/hplt_mean'): """ open ROOT file with baseline histogram and read baseline values file name of the root file bsl_hist path to the histogram containing the basline values """ try: f = TFile(file) except: print 'Baseline data file could not be read: ', file return h = f.Get(bsl_hist) for i in range(self.npix): self.v_bsl[i] = h.GetBinContent(i+1) f.Close() def baseline_correct(self): """ subtract baseline from the data """ for pixel in range(self.npix): self.acal_data[pixel,:] -= self.v_bsl[pixel] def info(self): """ print run information """ print 'data file: ', data_file_name print 'calib file: ', calib_file_name print 'calibration file' print 'N baseline_mean: ', self.Nblm print 'N gain mean: ', self.Ngm print 'N TriggeroffsetMean: ', self.Ntom # ----------------------------------------------------------------------------- class RawDataFake( object ): """ raw data FAKE access similar to real RawData access """ def __init__(self, data_file_name, calib_file_name, user_action_calib=lambda acal_data, data, blm, tom, gm, scells, nroi: None, baseline_file_name=''): self.__module__='pyfact' self.nroi = 300 self.npix = 9 self.nevents = 1000 self.simulator = None self.time = np.ones(1024) * 0.5 self.event_id = c_ulong(0) self.trigger_type = c_ushort(4) self.data = np.zeros( self.npix * self.nroi, np.int16 ).reshape(self.npix ,self.nroi) self.start_cells = np.zeros( self.npix, np.int16 ) self.board_times = np.zeros( 40, np.int32 ) def __iter__(self): """ iterator """ return self def next(self): """ used by __iter__ """ self.event_id = c_ulong(self.event_id.value + 1) self.board_times = self.board_times + 42 if self.event_id.value >= self.nevents: raise StopIteration else: self._make_event_data() return self.__dict__ def _make_event_data(self): sample_times = self.time.cumsum() - time[0] # random start cell self.start_cells = np.ones( self.npix, np.int16 ) * np.random.randint(0,1024) starttime = self.start_cells[0] signal = self._std_sinus_simu(sample_times, starttime) data = np.vstack( (signal,signal) ) for i in range(8): data = np.vstack( (data,signal) ) self.data = data def _std_sinus_simu(self, times, starttime): period = 10 # in ns # give a jitter on starttime starttime = np.random.normal(startime, 0.05) phase = 0.0 signal = 10 * np.sin(times * 2*np.pi/period + starttime + phase) # add some noise noise = np.random.normal(0.0, 0.5, signal.shape) signal += noise return signal def info(self): """ print run information """ print 'data file: ', data_file_name print 'calib file: ', calib_file_name print 'calibration file' print 'N baseline_mean: ', self.Nblm print 'N gain mean: ', self.Ngm print 'N TriggeroffsetMean: ', self.Ntom # ----------------------------------------------------------------------------- class SlowData( FactFits ): """ -Fact SlowData File- A Python wrapper for the fits-class implemented in pyfits.h provides easy access to the fits file meta data. * dictionary of file metadata - self.meta * dict of table metadata - self.columns * variable table column access, thus possibly increased speed while looping """ def __init__(self, path): """ creates meta and columns dictionaries """ self.path = path try: FactFits.__init__(self,path) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data self.meta = self._make_meta_dict() self.columns = self._make_columns_dict() self.treat_meta_dict() # list of columns, which are already registered # see method register() self._registered_cols = [] # dict of column data, this is used, in order to be able to remove # the ctypes of self._table_cols = {} # I need to count the rows, since the normal loop mechanism seems not to work. self._current_row = 0 self.stacked_cols = {} def _make_meta_dict(self): """ This method retrieves meta information about the fits file and stores this information in a dict return: dict key: string - all capital letters value: tuple( numerical value, string comment) """ # intermediate variables for file metadata dict generation keys=self.GetPy_KeyKeys() values=self.GetPy_KeyValues() comments=self.GetPy_KeyComments() types=self.GetPy_KeyTypes() if len(keys) != len(values): raise TypeError('len(keys)',len(keys),' != len(values)', len(values)) if len(keys) != len(types): raise TypeError('len(keys)',len(keys),' != len(types)', len(types)) if len(keys) != len(comments): raise TypeError('len(keys)',len(keys),' != len(comments)', len(comments)) meta_dict = {} for i in range(len(keys)): type = types[i] if type == 'I': value = int(values[i]) elif type == 'F': value = float(values[i]) elif type == 'B': if values[i] == 'T': value = True elif values[i] == 'F': value = False else: raise TypeError("meta-type is 'B', but meta-value is neither 'T' nor 'F'. meta-value:",values[i]) elif type == 'T': value = values[i] else: raise TypeError("unknown meta-type: known meta types are: I,F,B and T. meta-type:",type) meta_dict[keys[i]]=(value, comments[i]) return meta_dict def _make_columns_dict(self): """ This method retrieves information about the columns stored inside the fits files internal binary table. returns: dict key: string column name -- all capital letters values: tuple( number of elements in table field - integer size of element in bytes -- this is not really interesting for any user might be ommited in future versions type - a single character code -- should be translated into a comrehensible word unit - string like 'mV' or 'ADC count' """ # intermediate variables for file table-metadata dict generation keys=self.GetPy_ColumnKeys() #offsets=self.GetPy_ColumnOffsets() #not needed on python level... nums=self.GetPy_ColumnNums() sizes=self.GetPy_ColumnSizes() types=self.GetPy_ColumnTypes() units=self.GetPy_ColumnUnits() # zip the values values = zip(nums,sizes,types,units) # create the columns dictionary columns = dict(zip(keys ,values)) return columns def stack(self, on=True): self.next() for col in self._registered_cols: if isinstance( self.dict[col], type(np.array('')) ): self.stacked_cols[col] = self.dict[col] else: # elif isinstance(self.dict[col], ctypes._SimpleCData): self.stacked_cols[col] = np.array(self.dict[col]) # else: # raise TypeError("I don't know how to stack "+col+". It is of type: "+str(type(self.dict[col]))) def register(self, input_str): columns = self.columns if input_str.lower() == 'all': for col in columns: self._register(col) else: #check if colname is in columns: if input_str not in columns: error_msg = 'colname:'+ input_str +' is not a column in the binary table.\n' error_msg+= 'possible colnames are\n' for key in columns: error_msg += key+'\n' raise KeyError(error_msg) else: self._register(input_str) # 'private' method, do not use def _register( self, colname): columns = self.columns local = None number_of_elements = int(columns[colname][0]) size_of_elements_in_bytes = int(columns[colname][1]) ctypecode_of_elements = columns[colname][2] physical_unit_of_elements = columns[colname][3] # snippet from the C++ source code, or header file to be precise: #case 'L': gLog << "bool(8)"; break; #case 'B': gLog << "byte(8)"; break; #case 'I': gLog << "short(16)"; break; #case 'J': gLog << "int(32)"; break; #case 'K': gLog << "int(64)"; break; #case 'E': gLog << "float(32)"; break; #case 'D': gLog << "double(64)"; break; # the fields inside the columns can either contain single numbers, # or whole arrays of numbers as well. # we treat single elements differently... if number_of_elements == 1: # allocate some memory for a single number according to its type if ctypecode_of_elements == 'J': # J is for a 4byte int, i.e. an unsigned long local = ctypes.c_ulong() un_c_type = long elif ctypecode_of_elements == 'I': # I is for a 2byte int, i.e. an unsinged int local = ctypes.c_ushort() un_c_type = int elif ctypecode_of_elements == 'B': # B is for a byte local = ctypes.c_ubyte() un_c_type = int elif ctypecode_of_elements == 'D': local = ctypes.c_double() un_c_type = float elif ctypecode_of_elements == 'E': local = ctypes.c_float() un_c_type = float elif ctypecode_of_elements == 'A': local = ctypes.c_uchar() un_c_type = chr elif ctypecode_of_elements == 'K': local = ctypes.c_ulonglong() un_c_type = long else: raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements) else: if ctypecode_of_elements == 'B': # B is for a byte nptype = np.int8 elif ctypecode_of_elements == 'A': # A is for a char .. but I don't know how to handle it nptype = np.int8 elif ctypecode_of_elements == 'I': # I is for a 2byte int nptype = np.int16 elif ctypecode_of_elements == 'J': # J is for a 4byte int nptype = np.int32 elif ctypecode_of_elements == 'K': # B is for a byte nptype = np.int64 elif ctypecode_of_elements == 'E': # B is for a byte nptype = np.float32 elif ctypecode_of_elements == 'D': # B is for a byte nptype = np.float64 else: raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements) local = np.zeros( number_of_elements, nptype) # Set the Pointer Address self.SetPtrAddress(colname, local) self._table_cols[colname] = local if number_of_elements > 1: self.__dict__[colname] = local self.dict[colname] = local else: # remove any traces of ctypes: self.__dict__[colname] = local.value self.dict[colname] = local.value self._registered_cols.append(colname) def treat_meta_dict(self): """make 'interesting' meta information available like normal members. non interesting are: TFORM, TUNIT, and TTYPE since these are available via the columns dict. """ self.number_of_rows = self.meta['NAXIS2'][0] self.number_of_columns = self.meta['TFIELDS'][0] # there are some information in the meta dict, which are alsways there: # there are regarded as not interesting: uninteresting_meta = {} uninteresting_meta['arraylike'] = {} uninteresting = ['NAXIS', 'NAXIS1', 'NAXIS2', 'TFIELDS', 'XTENSION','EXTNAME','EXTREL', 'BITPIX', 'PCOUNT', 'GCOUNT', 'ORIGIN', 'PACKAGE', 'COMPILED', 'CREATOR', 'TELESCOP','TIMESYS','TIMEUNIT','VERSION'] for key in uninteresting: if key in self.meta: uninteresting_meta[key]=self.meta[key] del self.meta[key] # the table meta data contains # shortcut to access the meta dict. But this needs to # be cleaned up quickly!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! meta = self.meta # loop over keys: # * try to find array-like keys arraylike = {} singlelike = [] for key in self.meta: stripped = key.rstrip('1234567890') if stripped == key: singlelike.append(key) else: if stripped not in arraylike: arraylike[stripped] = 0 else: arraylike[stripped] += 1 newmeta = {} for key in singlelike: newmeta[key.lower()] = meta[key] for key in arraylike: uninteresting_meta['arraylike'][key.lower()] = [] for i in range(arraylike[key]+1): if key+str(i) in meta: uninteresting_meta['arraylike'][key.lower()].append(meta[key+str(i)]) self.ui_meta = uninteresting_meta # make newmeta self for key in newmeta: self.__dict__[key]=newmeta[key] dict = self.__dict__.copy() del dict['meta'] del dict['ui_meta'] self.dict = dict def __iter__(self): """ iterator """ return self def next(self): """ used by __iter__ """ # Here one might check, if looping makes any sense, and if not # one could stop looping or so... # like this: # # if len(self._registered_cols) == 0: # print 'warning: looping without any registered columns' if self._current_row < self.number_of_rows: if self.GetNextRow() == False: raise StopIteration for col in self._registered_cols: if isinstance(self._table_cols[col], ctypes._SimpleCData): self.__dict__[col] = self._table_cols[col].value self.dict[col] = self._table_cols[col].value for col in self.stacked_cols: if isinstance(self.dict[col], type(np.array(''))): self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],self.dict[col]) ) else: self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],np.array(self.dict[col])) ) self._current_row += 1 else: raise StopIteration return self def show(self): pprint.pprint(self.dict) class fnames( object ): """ organize file names of a FACT data run """ def __init__(self, specifier = ['012', '023', '2011', '11', '24'], rpath = '/scratch_nfs/res/bsl/', zipped = True): """ specifier : list of strings defined as: [ 'DRS calibration file', 'Data file', 'YYYY', 'MM', 'DD'] rpath : directory path for the results; YYYYMMDD will be appended to rpath zipped : use zipped (True) or unzipped (Data) """ self.specifier = specifier self.rpath = rpath self.zipped = zipped self.make( self.specifier, self.rpath, self.zipped ) def make( self, specifier, rpath, zipped ): """ create (make) the filenames names : dictionary of filenames, tags { 'data', 'drscal', 'results' } data : name of the data file drscal : name of the drs calibration file results : radikal of file name(s) for results (to be completed by suffixes) """ self.specifier = specifier if zipped: dpath = '/data00/fact-construction/raw/' ext = '.fits.gz' else: dpath = '/data03/fact-construction/raw/' ext = '.fits' year = specifier[2] month = specifier[3] day = specifier[4] yyyymmdd = year + month + day dfile = specifier[1] cfile = specifier[0] rpath = rpath + yyyymmdd + '/' self.rpath = rpath self.names = {} tmp = dpath + year + '/' + month + '/' + day + '/' + yyyymmdd + '_' self.names['data'] = tmp + dfile + ext self.names['drscal'] = tmp + cfile + '.drs' + ext self.names['results'] = rpath + yyyymmdd + '_' + dfile + '_' + cfile self.data = self.names['data'] self.drscal = self.names['drscal'] self.results = self.names['results'] def info( self ): """ print complete filenames """ print 'file names:' print 'data: ', self.names['data'] print 'drs-cal: ', self.names['drscal'] print 'results: ', self.names['results'] # end of class definition: fnames( object ) def _test_SlowData( filename ): print '-'*70 print "opened :", filename, " as 'file'" print print '-'*70 print 'type file.show() to look at its contents' print "type file.register( columnname ) or file.register('all') in order to register columns" print print " due column-registration you declare, that you would like to retrieve the contents of one of the columns" print " after column-registration, the 'file' has new member variables, they are named like the columns" print " PLEASE NOTE: immediatly after registration, the members exist, but they are empty." print " the values are assigned only, when you call file.next() or when you loop over the 'file'" print print "in order to loop over it, just go like this:" print "for row in file:" print " print row.columnname_one, row.columnname_two" print print "" print '-'*70 def _test_iter( nevents ): """ test for function __iter__ """ data_file_name = '/data00/fact-construction/raw/2011/11/24/20111124_117.fits.gz' calib_file_name = '/data00/fact-construction/raw/2011/11/24/20111124_114.drs.fits.gz' # data_file_name = '/home/luster/win7/FACT/data/raw/20120114/20120114_028.fits.gz' # calib_file_name = '/home/luster/win7/FACT/data/raw/20120114/20120114_022.drs.fits.gz' run = RawData( data_file_name, calib_file_name , return_dict=True) for event in run: print 'ev ', event['event_id'], 'data[0,0] = ', event['acal_data'][0,0], 'start_cell[0] = ', event['start_cells'][0], 'trigger type = ', event['trigger_type'] if run.event_id == nevents: break if __name__ == '__main__': """ tests """ import sys if len(sys.argv) == 1: print 'showing test of iterator of RawData class' print 'in order to test the SlowData classe please use:', sys.argv[0], 'fits-file-name' _test_iter(10) else: print 'showing test of SlowData class' print 'in case you wanted to test the RawData class, please give no commandline arguments' file = SlowData(sys.argv[1]) _test_SlowData(sys.argv[1])