#!/usr/bin/python -tt # # Werner Lustermann, Dominik Neise # ETH Zurich, TU Dortmund # import sys from ctypes import * import numpy as np import pprint # for SlowData from scipy import signal # get the ROOT stuff + my shared libs import ROOT # factfits_h.so is made from factfits.h and is used to access the data # make sure the location of factfits_h.so is in LD_LIBRARY_PATH. # having it in PYTHONPATH is *not* sufficient hostname = ROOT.gSystem.HostName() libz_path_dict = { # hostname : /path/to/libz.so 'isdc' : "/usr/lib64/libz.so", 'neiseLenovo' : "/usr/lib/libz.so", 'factcontrol' : "/usr/lib/libz.so", "max-K50AB" : "/usr/lib/x86_64-linux-gnu/libz.so", "watz" : "/usr/lib/x86_64-linux-gnu/libz.so", "fact03" : "/usr/lib/x86_64-linux-gnu/libz.so", "grolsch" : "/usr/lib/i386-linux-gnu/libz.so", } libz_loaded = False for my_hostname in libz_path_dict: if my_hostname in hostname: ROOT.gSystem.Load(libz_path_dict[my_hostname]) libz_loaded = True if not libz_loaded: print """Warning - Warning - Warning - Warning - Warning - Warning - Warning I most probably need to load libz.so but I don't know where it is. Please edit pyfact.py around line 16-24 and insert your hostname and your path to your libz.so Sorry for the inconvenience. """ sys.exit(-1) #NOTE: This part has to be adapted for gcc > 4.7, where -std=c++11 can/should(?) be used. root_make_string = ROOT.gSystem.GetMakeSharedLib() if not "-std=c++0x" in root_make_string: root_make_string = root_make_string.replace('$Opt', '$Opt -std=c++0x -D HAVE_ZLIB') ROOT.gSystem.SetMakeSharedLib(root_make_string) ROOT.gROOT.ProcessLine(".L extern_Mars_mcore/izstream.h+O") ROOT.gROOT.ProcessLine(".L extern_Mars_mcore/fits.h+O") ROOT.gROOT.ProcessLine(".L extern_Mars_mcore/zfits.h+O") ROOT.gROOT.ProcessLine(".L extern_Mars_mcore/factfits.h+O") ROOT.gROOT.ProcessLine(".L calfactfits.h+O") ROOT.gInterpreter.GenerateDictionary("map","map;string;extern_Mars_mcore/fits.h") ROOT.gInterpreter.GenerateDictionary("pair","map;string;extern_Mars_mcore/fits.h") ROOT.gInterpreter.GenerateDictionary("map","map;string;extern_Mars_mcore/fits.h") ROOT.gInterpreter.GenerateDictionary("pair","map;string;extern_Mars_mcore/fits.h") #ROOT.gSystem.Load('my_string_h.so') ROOT.gSystem.Load('extern_Mars_mcore/fits_h.so') ROOT.gSystem.Load('extern_Mars_mcore/izstream_h.so') ROOT.gSystem.Load('extern_Mars_mcore/zfits_h.so') ROOT.gSystem.Load('extern_Mars_mcore/factfits_h.so') ROOT.gSystem.Load('calfactfits_h.so') from ROOT import * class RawDataFeeder( object ): """ Wrapper class for RawData class capable of iterating over multiple RawData Files """ def __init__(self, filelist): """ *filelist* list of files to iterate over the list should contain tuples, or sublists of two filenames the first should be a data file (\*.fits.gz) the second should be an amplitude calibration file(\*.drs.fits.gz) """ self.__module__ = 'pyfact' # sanity check for input if type(filelist) != type(list()): raise TypeError('filelist should be a list') for entry in filelist: if len(entry) != 2: raise TypeError('the entries of filelist should have length == 2') for path in entry: if type(path) != type(str()): raise TypeError('the entries of filelist should be path, i.e. of type str()') #todo check if 'path' is a valid path # else: throw an Exception, or Warning? self.filelist = filelist self._current_RawData = RawData(filelist[0][0], filelist[0][1], return_dict=True) del filelist[0] def __iter__(self): return self def next(): """ Method being called by the iterator. Since the RawData Objects are simply looped over, the event_id from the RawData object will not be unique. Each RawData obejct will start with event_id = 1 as usual. """ try: return self._current_RawData.next() except StopIteration: # current_RawData was completely processed # delete it (I hope this calls the destructor of the fits file and/or closes it) del self._current_RawData # and remake it, if possible if len(self.filelist) > 0: self._current_RawData = RawData(filelist[0][0], filelist[0][1], return_dict=True) del filelist[0] else: raise class RawData( object ): """ raw data access and calibration class is **iterable** - open raw data file and drs calibration file - performs amplitude calibration - performs baseline substraction if wanted - provides all data in an array: row = number of pixel col = length of region of interest """ def __init__(self, data_file_name, calib_file_name, baseline_file_name='', return_dict = True, use_CalFactFits = True, do_calibration = True, user_action_calib=lambda acal_data, data, blm, tom, gm, scells, nroi: None): """ -constructor- - open data file and calibration data file - get basic information about the data in data_file_name - allocate buffers for data access *data_file_name* : fits or fits.gz file of the data including the path *calib_file_name* : fits or fits.gz file containing DRS calibration data *baseline_file_name* : npy file containing the baseline values *return_dict* : this option will be removed in future releases. formerly the next() method returned only a subset of (important) event information, and it was not transparent how to retrieve the other (less important) information. Nowadays next() returns self.__dict__ which contains everything we were able to find in the fits file. *use_CalFactFits* : formerly the DRS amplitude calibration was implemented in python. But for performance reasons this was now moved into a C++ class called CalFactFits. For test purposes, this option can be set to False, but this is not really maintained anymore. If DRS the DRS calibration algorithm is beeing updated in C++ it may not be updated in the python implementation. *do_calibration* : In case *use_CalFactFits* is False, one may choose not to calibrate the data at all, thus safe quite some time. This is imho only needed in case one is interesting in learning something about the calibration algorithm itself. *user_action_calib* : callback function, intended for tests of the DRS calibration algorithm. but since this is not done in the Python regime anymore, this function is never called. (depending on *use_CalFactFits* of course) """ self.__module__='pyfact' # manual implementation of default value, but I need to find out # if the user of this class is aware of the new option if return_dict == False: print 'DEPRECATION WARNING:' print 'you are using RawData in a way, which is nor supported anymore.' print ' Please set: return_dict = True, in the __init__ call' self.return_dict = return_dict self.use_CalFactFits = use_CalFactFits self.do_calibration = do_calibration self.data_file_name = data_file_name self.calib_file_name = calib_file_name self.baseline_file_name = baseline_file_name self.user_action_calib = user_action_calib # baseline correction: True / False if len(baseline_file_name) == 0: self.correct_baseline = False else: self.correct_baseline = True # access data file if use_CalFactFits: try: data_file = CalFactFits(data_file_name, calib_file_name) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data #: either CalFactFits object or FactFits object, depending on *use_CalFactFits* self.data_file = data_file #: 1440x300 nparray containing the event data. pixel sorted according to CHID self.data = np.empty( data_file.npix * data_file.nroi, np.float64) data_file.SetNpcaldataPtr(self.data) self.data = self.data.reshape( data_file.npix, data_file.nroi ) #: copy of data. here for historical reasons self.acal_data = self.data #: region of interest. (number of DRS slices read). # for FACT data mostly 300. for special runs sometimes 1024. self.nroi = data_file.nroi #: number of Pixel in FACT. should be 1440 self.npix = data_file.npix #: the total number of events in the data_file self.nevents = data_file.nevents # Data per event #: starting at 1 self.event_id = None #: data=4 ; the rest I don't know by heart .. should be documented here :-) self.trigger_type = None #self.start_cells = None #self.board_times = None #: slice where drs readout started for all DRS chips (160) .. but enlarged to the size of 1440 pixel. thus there are always 9 equal numbers inside. self.start_cells = np.zeros( self.npix, np.int16 ) #: each FAD has an onboard clock running from startup time. Currently I don't know the time unit. However this is an array of 40 times, since we have 40 boards. self.board_times = np.zeros( 40, np.int32 ) self._unixtime_tuple = np.zeros( 2, np.int32 ) self.unixtime = None # data_file is a CalFactFits object # data_file.datafile is one of the two FactFits objects hold by a CalFactFits. # sorry for the strange naming .. data_file.datafile.SetPtrAddress('StartCellData', self.start_cells) data_file.datafile.SetPtrAddress('BoardTime', self.board_times) data_file.datafile.SetPtrAddress('UnixTimeUTC', self._unixtime_tuple) else: try: data_file = factfits(self.data_file_name) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data self.data_file = data_file # get basic information about the data file self.nroi = data_file.GetUInt('NROI') self.npix = data_file.GetUInt('NPIX') self.nevents = data_file.GetNumRows() # allocate the data memories self.event_id = c_ulong() self.trigger_type = c_ushort() self.data = np.zeros( self.npix * self.nroi, np.int16 ).reshape(self.npix ,self.nroi) self.start_cells = np.zeros( self.npix, np.int16 ) self.board_times = np.zeros( 40, np.int32 ) self._unixtime_tuple = np.zeros(2, np.int32 ) # set the pointers to the data++ data_file.SetPtrAddress('EventNum', self.event_id) data_file.SetPtrAddress('TriggerType', self.trigger_type) data_file.SetPtrAddress('StartCellData', self.start_cells) data_file.SetPtrAddress('Data', self.data) data_file.SetPtrAddress('BoardTime', self.board_times) data_file.SetPtrAddress('UnixTimeUTC', self._unixtime_tuple) # open the calibration file try: calib_file = factfits(self.calib_file_name) except IOError: print 'problem accessing calibration file: ', calib_file_name raise #: drs calibration file self.calib_file = calib_file baseline_mean = calib_file.GetN('BaselineMean') gain_mean = calib_file.GetN('GainMean') trigger_offset_mean = calib_file.GetN('TriggerOffsetMean') self.Nblm = baseline_mean / self.npix self.Ngm = gain_mean / self.npix self.Ntom = trigger_offset_mean / self.npix self.blm = np.zeros(baseline_mean, np.float32).reshape(self.npix , self.Nblm) self.gm = np.zeros(gain_mean, np.float32).reshape(self.npix , self.Ngm) self.tom = np.zeros(trigger_offset_mean, np.float32).reshape(self.npix , self.Ntom) calib_file.SetPtrAddress('BaselineMean', self.blm) calib_file.SetPtrAddress('GainMean', self.gm) calib_file.SetPtrAddress('TriggerOffsetMean', self.tom) calib_file.GetRow(0) # make calibration constants double, so we never need to roll self.blm = np.hstack((self.blm, self.blm)) self.gm = np.hstack((self.gm, self.gm)) self.tom = np.hstack((self.tom, self.tom)) self.v_bsl = np.zeros(self.npix) # array of baseline values (all ZERO) def __iter__(self): """ iterator """ return self def next(self): """ used by __iter__ returns self.__dict__ """ if self.use_CalFactFits: if self.data_file.GetCalEvent() == False: raise StopIteration else: self.event_id = self.data_file.event_id self.trigger_type = self.data_file.event_triggertype #self.start_cells = self.data_file.event_offset #self.board_times = self.data_file.event_boardtimes #self.acal_data = self.data.copy().reshape(self.data_file.npix, self.data_file.nroi) self.unixtime = self._unixtime_tuple[0] + self._unixtime_tuple[1]/1.e6 else: if self.data_file.GetNextRow() == False: raise StopIteration else: if self.do_calibration == True: self.calibrate_drs_amplitude() #print 'nevents = ', self.nevents, 'event_id = ', self.event_id.value if self.return_dict: return self.__dict__ else: return self.acal_data, self.start_cells, self.trigger_type.value def next_event(self): """ ---- DEPRICATED ---- load the next event from disk and calibrate it """ if self.use_CalFactFits: self.data_file.GetCalEvent() else: self.data_file.GetNextRow() self.calibrate_drs_amplitude() def calibrate_drs_amplitude(self): """ --- DEPRICATED --- since the DRS calibration is done by the C++ class CalFactFits perform the drs amplitude calibration of the event data """ # shortcuts blm = self.blm gm = self.gm tom = self.tom to_mV = 2000./4096. #: 2D array with amplitude calibrated dat in mV acal_data = self.data * to_mV # convert ADC counts to mV for pixel in range( self.npix ): #shortcuts sc = self.start_cells[pixel] roi = self.nroi # rotate the pixel baseline mean to the Data startCell acal_data[pixel,:] -= blm[pixel,sc:sc+roi] # the 'trigger offset mean' does not need to be rolled # on the contrary, it seems there is an offset in the DRS data, # which is related to its distance to the startCell, not to its # distance to the beginning of the physical pipeline in the DRS chip acal_data[pixel,:] -= tom[pixel,0:roi] # rotate the pixel gain mean to the Data startCell acal_data[pixel,:] /= gm[pixel,sc:sc+roi] self.acal_data = acal_data * 1907.35 self.user_action_calib( self.acal_data, np.reshape(self.data, (self.npix, self.nroi) ), blm, tom, gm, self.start_cells, self.nroi) def baseline_read_values(self, file, bsl_hist='bsl_sum/hplt_mean'): """ open ROOT file with baseline histogram and read baseline values *file* : name of the root file *bsl_hist* : path to the histogram containing the basline values """ try: f = TFile(file) except: print 'Baseline data file could not be read: ', file return h = f.Get(bsl_hist) for i in range(self.npix): self.v_bsl[i] = h.GetBinContent(i+1) f.Close() def baseline_correct(self): """ subtract baseline from the data DN 08.06.2011: I didn't use this function at all so far... don't know how well it works. """ for pixel in range(self.npix): self.acal_data[pixel,:] -= self.v_bsl[pixel] def info(self): """ print run information not very well implemented ... we need more info here. """ print 'data file: ', self.data_file_name print 'calib file: ', self.calib_file_name print '... we need more information printed here ... ' # ----------------------------------------------------------------------------- class RawDataFake( object ): """ raw data FAKE access similar to real RawData access DO NOT USE ... its not working """ def __init__(self, data_file_name, calib_file_name, user_action_calib=lambda acal_data, data, blm, tom, gm, scells, nroi: None, baseline_file_name=''): self.__module__='pyfact' self.nroi = 300 self.npix = 9 self.nevents = 1000 self.simulator = None self.time = np.ones(1024) * 0.5 self.event_id = c_ulong(0) self.trigger_type = c_ushort(4) self.data = np.zeros( self.npix * self.nroi, np.int16 ).reshape(self.npix ,self.nroi) self.start_cells = np.zeros( self.npix, np.int16 ) self.board_times = np.zeros( 40, np.int32 ) def __iter__(self): """ iterator """ return self def next(self): """ used by __iter__ """ self.event_id = c_ulong(self.event_id.value + 1) self.board_times = self.board_times + 42 if self.event_id.value >= self.nevents: raise StopIteration else: self._make_event_data() return self.__dict__ def _make_event_data(self): sample_times = self.time.cumsum() - time[0] # random start cell self.start_cells = np.ones( self.npix, np.int16 ) * np.random.randint(0,1024) starttime = self.start_cells[0] signal = self._std_sinus_simu(sample_times, starttime) data = np.vstack( (signal,signal) ) for i in range(8): data = np.vstack( (data,signal) ) self.data = data def _std_sinus_simu(self, times, starttime): period = 10 # in ns # give a jitter on starttime starttime = np.random.normal(startime, 0.05) phase = 0.0 signal = 10 * np.sin(times * 2*np.pi/period + starttime + phase) # add some noise noise = np.random.normal(0.0, 0.5, signal.shape) signal += noise return signal def info(self): """ print run information """ print 'data file: ', data_file_name print 'calib file: ', calib_file_name print 'calibration file' print 'N baseline_mean: ', self.Nblm print 'N gain mean: ', self.Ngm print 'N TriggeroffsetMean: ', self.Ntom # ----------------------------------------------------------------------------- import ctypes class SlowData( object ): """ -Fact SlowData File- A Python wrapper for the fits-class implemented in factfits.h provides easy access to the fits file meta data. * dictionary of file metadata - self.meta * dict of table metadata - self.columns * variable table column access, thus possibly increased speed while looping """ def __del__(self): del self.f def __init__(self, path): """ creates meta and columns dictionaries """ import os if not os.path.exists(path): raise IOError(path+' was not found') self.path = path self.__module__ = 'pyfact' try: self.f = factfits(path) except IOError: print 'problem accessing data file: ', data_file_name raise # stop ! no data self.meta = self._make_meta_dict() self.columns = self._make_columns_dict() self._treat_meta_dict() # list of columns, which are already registered # see method register() self._registered_cols = [] # dict of column data, this is used, in order to be able to remove # the ctypes of self._table_cols = {} # I need to count the rows, since the normal loop mechanism seems not to work. self._current_row = 0 self.stacked_cols = {} def _make_meta_dict__old(self): """ This method retrieves meta information about the fits file and stores this information in a dict return: dict key: string - all capital letters value: tuple( numerical value, string comment) """ # abbreviation f = self.f # intermediate variables for file metadata dict generation keys=f.GetPy_KeyKeys() values=f.GetPy_KeyValues() comments=f.GetPy_KeyComments() types=f.GetPy_KeyTypes() if len(keys) != len(values): raise TypeError('len(keys)',len(keys),' != len(values)', len(values)) if len(keys) != len(types): raise TypeError('len(keys)',len(keys),' != len(types)', len(types)) if len(keys) != len(comments): raise TypeError('len(keys)',len(keys),' != len(comments)', len(comments)) meta_dict = {} for i in range(len(keys)): type = types[i] if type == 'I': value = int(values[i]) elif type == 'F': value = float(values[i]) elif type == 'B': if values[i] == 'T': value = True elif values[i] == 'F': value = False else: raise TypeError("meta-type is 'B', but meta-value is neither 'T' nor 'F'. meta-value:",values[i]) elif type == 'T': value = values[i] else: raise TypeError("unknown meta-type: known meta types are: I,F,B and T. meta-type:",type) meta_dict[keys[i]]=(value, comments[i]) return meta_dict def _make_meta_dict(self): meta_dict = {} for key,entry in self.f.GetKeys(): type = entry.type fitsString = entry.fitsString # the original 80-char line from the FITS header comment = entry.comment value = entry.value if type == 'I': value = int(value) elif type == 'F': value = float(value) elif type == 'B': if value == 'T': value = True elif value == 'F': value = False else: raise TypeError("meta-type is 'B', but meta-value is neither 'T' nor 'F'. meta-value:",value) elif type == 'T': value = value else: raise TypeError("unknown meta-type: known meta types are: I,F,B and T. meta-type:",type) meta_dict[key]=(value, comment) return meta_dict def _make_columns_dict(self): """ This method retrieves information about the columns stored inside the fits files internal binary table. returns: dict key: string column name -- all capital letters values: tuple( number of elements in table field - integer size of element in bytes -- this is not really interesting for any user might be ommited in future versions type - a single character code -- should be translated into a comrehensible word unit - string like 'mV' or 'ADC count' """ ## abbreviation #f = self.f # ## intermediate variables for file table-metadata dict generation #keys=f.GetPy_ColumnKeys() ##offsets=self.GetPy_ColumnOffsets() #not needed on python level... #nums=f.GetPy_ColumnNums() #sizes=f.GetPy_ColumnSizes() #types=f.GetPy_ColumnTypes() #units=f.GetPy_ColumnUnits() ## zip the values #values = zip(nums,sizes,types,units) ## create the columns dictionary #columns = dict(zip(keys ,values)) columns = {} for key,col in self.f.GetColumns(): columns[key]=( col.num, col.size, col.type, col.unit) return columns def stack(self, on=True): self.next() for col in self._registered_cols: if isinstance( self.dict[col], type(np.array('')) ): self.stacked_cols[col] = self.dict[col] else: # elif isinstance(self.dict[col], ctypes._SimpleCData): self.stacked_cols[col] = np.array(self.dict[col]) # else: # raise TypeError("I don't know how to stack "+col+". It is of type: "+str(type(self.dict[col]))) def register(self, col_name): """ register for a column in the fits file after the call, this SlowData object will have a new member variable self.col_name, if col_name is a key in self.colums the value will be updated after each call of next(), or while iterating over self. NB: the initial value is zero(s) *col_name* : name of a key in self.columns, or 'all' to choose all. """ columns = self.columns if col_name.lower() == 'all': for col in columns: self._register(col) else: #check if colname is in columns: if col_name not in columns: error_msg = 'colname:'+ col_name +' is not a column in the binary table.\n' error_msg+= 'possible colnames are\n' for key in columns: error_msg += key+' ' raise KeyError(error_msg) else: self._register(col_name) # 'private' method, do not use def _register( self, colname): columns = self.columns f = self.f local = None number_of_elements = int(columns[colname][0]) size_of_elements_in_bytes = int(columns[colname][1]) ctypecode_of_elements = columns[colname][2] physical_unit_of_elements = columns[colname][3] # snippet from the C++ source code, or header file to be precise: #case 'L': gLog << "bool(8)"; break; #case 'B': gLog << "byte(8)"; break; #case 'I': gLog << "short(16)"; break; #case 'J': gLog << "int(32)"; break; #case 'K': gLog << "int(64)"; break; #case 'E': gLog << "float(32)"; break; #case 'D': gLog << "double(64)"; break; # the fields inside the columns can either contain single numbers, # or whole arrays of numbers as well. # we treat single elements differently... if number_of_elements == 0: return if number_of_elements == 1: # allocate some memory for a single number according to its type if ctypecode_of_elements == 'J': # J is for a 4byte int, i.e. an unsigned long local = ctypes.c_ulong() un_c_type = long elif ctypecode_of_elements == 'I': # I is for a 2byte int, i.e. an unsinged int local = ctypes.c_ushort() un_c_type = int elif ctypecode_of_elements == 'B': # B is for a byte local = ctypes.c_ubyte() un_c_type = int elif ctypecode_of_elements == 'D': local = ctypes.c_double() un_c_type = float elif ctypecode_of_elements == 'E': local = ctypes.c_float() un_c_type = float elif ctypecode_of_elements == 'A': local = ctypes.c_uchar() un_c_type = chr elif ctypecode_of_elements == 'K': local = ctypes.c_ulonglong() un_c_type = long else: raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements) else: if ctypecode_of_elements == 'B': # B is for a byte nptype = np.int8 elif ctypecode_of_elements == 'A': # A is for a char .. but I don't know how to handle it nptype = np.int8 elif ctypecode_of_elements == 'I': # I is for a 2byte int nptype = np.int16 elif ctypecode_of_elements == 'J': # J is for a 4byte int nptype = np.int32 elif ctypecode_of_elements == 'K': # B is for a byte nptype = np.int64 elif ctypecode_of_elements == 'E': # B is for a byte nptype = np.float32 elif ctypecode_of_elements == 'D': # B is for a byte nptype = np.float64 else: raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements) local = np.zeros( number_of_elements, nptype) # Set the Pointer Address try: f.SetPtrAddress(colname, local) except TypeError: print 'something was wrong with SetPtrAddress()' print 'Type of colname', type(colname) print 'colname:', colname print 'Type of local', type(local) print 'length of local', len(local) print 'local should be alle zeros, since "local = np.zeros( number_of_elements, nptype)" ' raise self._table_cols[colname] = local if number_of_elements > 1: self.__dict__[colname] = local self.dict[colname] = local else: # remove any traces of ctypes: self.__dict__[colname] = local.value self.dict[colname] = local.value self._registered_cols.append(colname) def _treat_meta_dict(self): """make 'interesting' meta information available like normal members. non interesting are: TFORM, TUNIT, and TTYPE since these are available via the columns dict. """ self.number_of_rows = self.meta['NAXIS2'][0] self.number_of_columns = self.meta['TFIELDS'][0] # there are some information in the meta dict, which are alsways there: # there are regarded as not interesting: uninteresting_meta = {} uninteresting_meta['arraylike'] = {} uninteresting = ['NAXIS', 'NAXIS1', 'NAXIS2', 'TFIELDS', 'XTENSION','EXTNAME','EXTREL', 'BITPIX', 'PCOUNT', 'GCOUNT', 'ORIGIN', 'PACKAGE', 'COMPILED', 'CREATOR', 'TELESCOP','TIMESYS','TIMEUNIT','VERSION'] for key in uninteresting: if key in self.meta: uninteresting_meta[key]=self.meta[key] del self.meta[key] # the table meta data contains # shortcut to access the meta dict. But this needs to # be cleaned up quickly!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! meta = self.meta # loop over keys: # * try to find array-like keys arraylike = {} singlelike = [] for key in self.meta: stripped = key.rstrip('1234567890') if stripped == key: singlelike.append(key) else: if stripped not in arraylike: arraylike[stripped] = 0 else: arraylike[stripped] += 1 newmeta = {} for key in singlelike: newmeta[key.lower()] = meta[key] for key in arraylike: uninteresting_meta['arraylike'][key.lower()] = [] for i in range(arraylike[key]+1): if key+str(i) in meta: uninteresting_meta['arraylike'][key.lower()].append(meta[key+str(i)]) self.ui_meta = uninteresting_meta # make newmeta self for key in newmeta: self.__dict__[key]=newmeta[key] dict = self.__dict__.copy() del dict['meta'] del dict['ui_meta'] self.dict = dict def __iter__(self): """ iterator """ return self def next(self): """ use to iterate over the file do not forget to call register() before iterating over the file call show() in order to find out, what parameters register() accepts. or just call register('all') in case you are unsure. returns self """ # abbreviaition f = self.f # Here one might check, if looping makes any sense, and if not # one could stop looping or so... # like this: # # if len(self._registered_cols) == 0: # print 'warning: looping without any registered columns' if self._current_row < self.number_of_rows: if f.GetNextRow() == False: raise StopIteration for col in self._registered_cols: if isinstance(self._table_cols[col], ctypes._SimpleCData): self.__dict__[col] = self._table_cols[col].value self.dict[col] = self._table_cols[col].value for col in self.stacked_cols: if isinstance(self.dict[col], type(np.array(''))): self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],self.dict[col]) ) else: self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],np.array(self.dict[col])) ) self._current_row += 1 else: raise StopIteration return self def show(self): """ """ pprint.pprint(self.dict) class fnames( object ): """ organize file names of a FACT data run """ def __init__(self, specifier = ['012', '023', '2011', '11', '24'], rpath = '/scratch_nfs/res/bsl/', zipped = True): """ specifier : list of strings defined as: [ 'DRS calibration file', 'Data file', 'YYYY', 'MM', 'DD'] rpath : directory path for the results; YYYYMMDD will be appended to rpath zipped : use zipped (True) or unzipped (Data) """ self.specifier = specifier self.rpath = rpath self.zipped = zipped self.make( self.specifier, self.rpath, self.zipped ) def make( self, specifier, rpath, zipped ): """ create (make) the filenames names : dictionary of filenames, tags { 'data', 'drscal', 'results' } data : name of the data file drscal : name of the drs calibration file results : radikal of file name(s) for results (to be completed by suffixes) """ self.specifier = specifier if zipped: dpath = '/data00/fact-construction/raw/' ext = '.fits.gz' else: dpath = '/data03/fact-construction/raw/' ext = '.fits' year = specifier[2] month = specifier[3] day = specifier[4] yyyymmdd = year + month + day dfile = specifier[1] cfile = specifier[0] rpath = rpath + yyyymmdd + '/' self.rpath = rpath self.names = {} tmp = dpath + year + '/' + month + '/' + day + '/' + yyyymmdd + '_' self.names['data'] = tmp + dfile + ext self.names['drscal'] = tmp + cfile + '.drs' + ext self.names['results'] = rpath + yyyymmdd + '_' + dfile + '_' + cfile self.data = self.names['data'] self.drscal = self.names['drscal'] self.results = self.names['results'] def info( self ): """ print complete filenames """ print 'file names:' print 'data: ', self.names['data'] print 'drs-cal: ', self.names['drscal'] print 'results: ', self.names['results'] # end of class definition: fnames( object ) def _test_SlowData( filename ): print '-'*70 print "opened :", filename, " as 'file'" print print '-'*70 print 'type file.show() to look at its contents' print "type file.register( columnname ) or file.register('all') in order to register columns" print print " due column-registration you declare, that you would like to retrieve the contents of one of the columns" print " after column-registration, the 'file' has new member variables, they are named like the columns" print " PLEASE NOTE: immediatly after registration, the members exist, but they are empty." print " the values are assigned only, when you call file.next() or when you loop over the 'file'" print print "in order to loop over it, just go like this:" print "for row in file:" print " print row.columnname_one, row.columnname_two" print print "" print '-'*70 def _test_iter( nevents ): """ test for function __iter__ """ data_file_name = '/fact/raw/2011/11/24/20111124_117.fits.gz' calib_file_name = '/fact/raw/2011/11/24/20111124_114.drs.fits.gz' print 'the files for this test are:' print 'data file:', data_file_name print 'calib file:', calib_file_name run = RawData( data_file_name, calib_file_name , return_dict=True) for event in run: print 'ev ', event['event_id'], 'data[0,0] = ', event['acal_data'][0,0], 'start_cell[0] = ', event['start_cells'][0], 'trigger type = ', event['trigger_type'] if run.event_id == nevents: break if __name__ == '__main__': """ tests """ f = fits(sys.argv[1]) test_m1 = ROOT.std.map(str,ROOT.fits.Entry)() test_m2 = ROOT.std.map(str,ROOT.fits.Table.Column)() print "len(test_m1)", len(test_m1) print "len(test_m2)", len(test_m2) for k1 in f.GetKeys(): pass print k1 for k2 in f.GetColumns(): pass print k2 sd = SlowData(sys.argv[1])