source: fact/tools/pyscripts/pyfact/factfits.py@ 13354

Last change on this file since 13354 was 13354, checked in by neise, 13 years ago
initial commit of new python wrapper class. usable for reading of slow data files
  • Property svn:executable set to *
File size: 12.5 KB
Line 
1#!/usr/bin/python -itt
2import numpy as np
3import pprint
4import ctypes
5
6from ROOT import gSystem
7gSystem.Load('pyfits_h.so')
8from ROOT import *
9
10class FactFits( fits ):
11 """ -Fact Fits File-
12 A Python wrapper for the fits-class implemented in pyfits.h
13 provides easy access to the fits file meta data.
14 * dictionary of file metadata - self.meta
15 * dict of table metadata - self.columns
16 * variable table column access, thus possibly increased speed while looping
17 """
18 def __init__(self, path):
19 """ creates meta and columns dictionaries
20 """
21 self.path = path
22 try:
23 fits.__init__(self,path)
24 except IOError:
25 print 'problem accessing data file: ', data_file_name
26 raise # stop ! no data
27
28 self.meta = self._make_meta_dict()
29 self.columns = self._make_columns_dict()
30
31 self.treat_meta_dict()
32
33
34 # list of columns, which are already registered
35 # see method register()
36 self._registered_cols = []
37 # dict of column data, this is used, in order to be able to remove
38 # the ctypes of
39 self._table_cols = {}
40
41 # I need to count the rows, since the normal loop mechanism seems not to work.
42 self._current_row = 0
43
44 self.stacked_cols = {}
45
46 def _make_meta_dict(self):
47 # intermediate variables for file metadata dict generation
48 keys=self.GetPy_KeyKeys()
49 values=self.GetPy_KeyValues()
50 comments=self.GetPy_KeyComments()
51 types=self.GetPy_KeyTypes()
52
53 if len(keys) != len(values):
54 raise TypeError('len(keys)',len(keys),' != len(values)', len(values))
55 if len(keys) != len(types):
56 raise TypeError('len(keys)',len(keys),' != len(types)', len(types))
57 if len(keys) != len(comments):
58 raise TypeError('len(keys)',len(keys),' != len(comments)', len(comments))
59
60 meta_dict = {}
61 for i in range(len(keys)):
62 type = types[i]
63 if type == 'I':
64 value = int(values[i])
65 elif type == 'F':
66 value = float(values[i])
67 elif type == 'B':
68 if values[i] == 'T':
69 value = True
70 elif values[i] == 'F':
71 value = False
72 else:
73 raise TypeError("meta-type is 'B', but meta-value is neither 'T' nor 'F'. meta-value:",values[i])
74 elif type == 'T':
75 value = values[i]
76 else:
77 raise TypeError("unknown meta-type: known meta types are: I,F,B and T. meta-type:",type)
78 meta_dict[keys[i]]=(value, comments[i])
79 return meta_dict
80
81
82 def _make_columns_dict(self):
83 # intermediate variables for file table-metadata dict generation
84 keys=self.GetPy_ColumnKeys()
85 #offsets=self.GetPy_ColumnOffsets() #not needed on python level...
86 nums=self.GetPy_ColumnNums()
87 sizes=self.GetPy_ColumnSizes()
88 types=self.GetPy_ColumnTypes()
89 units=self.GetPy_ColumnUnits()
90
91 # zip the values
92 values = zip(nums,sizes,types,units)
93 # create the columns dictionary
94 columns = dict(zip(keys ,values))
95 return columns
96
97 def stack(self, on=True):
98 self.next()
99 for col in self._registered_cols:
100 if isinstance( self.dict[col], type(np.array('')) ):
101 self.stacked_cols[col] = self.dict[col]
102 else:
103# elif isinstance(self.dict[col], ctypes._SimpleCData):
104 self.stacked_cols[col] = np.array(self.dict[col])
105# else:
106# raise TypeError("I don't know how to stack "+col+". It is of type: "+str(type(self.dict[col])))
107
108 def register(self, input_str):
109 columns = self.columns
110 if input_str.lower() == 'all':
111 for col in columns:
112 self._register(col)
113 else:
114 #check if colname is in columns:
115 if input_str not in columns:
116 error_msg = 'colname:'+ input_str +' is not a column in the binary table.\n'
117 error_msg+= 'possible colnames are\n'
118 for key in columns:
119 error_msg += key+'\n'
120 raise KeyError(error_msg)
121 else:
122 self._register(input_str)
123
124 # 'private' method, do not use
125 def _register( self, colname):
126 columns = self.columns
127 local = None
128
129 number_of_elements = int(columns[colname][0])
130 size_of_elements_in_bytes = int(columns[colname][1])
131 ctypecode_of_elements = columns[colname][2]
132 physical_unit_of_elements = columns[colname][3]
133
134 # snippet from the C++ source code, or header file to be precise:
135 #case 'L': gLog << "bool(8)"; break;
136 #case 'B': gLog << "byte(8)"; break;
137 #case 'I': gLog << "short(16)"; break;
138 #case 'J': gLog << "int(32)"; break;
139 #case 'K': gLog << "int(64)"; break;
140 #case 'E': gLog << "float(32)"; break;
141 #case 'D': gLog << "double(64)"; break;
142
143
144
145 # the fields inside the columns can either contain single numbers,
146 # or whole arrays of numbers as well.
147 # we treat single elements differently...
148 if number_of_elements == 1:
149 # allocate some memory for a single number according to its type
150 if ctypecode_of_elements == 'J': # J is for a 4byte int, i.e. an unsigned long
151 local = ctypes.c_ulong()
152 un_c_type = long
153 elif ctypecode_of_elements == 'I': # I is for a 2byte int, i.e. an unsinged int
154 local = ctypes.c_ushort()
155 un_c_type = int
156 elif ctypecode_of_elements == 'B': # B is for a byte
157 local = ctypes.c_ubyte()
158 un_c_type = int
159 elif ctypecode_of_elements == 'D':
160 local = ctypes.c_double()
161 un_c_type = float
162 elif ctypecode_of_elements == 'E':
163 local = ctypes.c_float()
164 un_c_type = float
165 elif ctypecode_of_elements == 'A':
166 local = ctypes.c_uchar()
167 un_c_type = chr
168 elif ctypecode_of_elements == 'K':
169 local = ctypes.c_ulonglong()
170 un_c_type = long
171 else:
172 raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements)
173 else:
174 if ctypecode_of_elements == 'B': # B is for a byte
175 nptype = np.int8
176 elif ctypecode_of_elements == 'A': # A is for a char .. but I don't know how to handle it
177 nptype = np.int8
178 elif ctypecode_of_elements == 'I': # I is for a 2byte int
179 nptype = np.int16
180 elif ctypecode_of_elements == 'J': # J is for a 4byte int
181 nptype = np.int32
182 elif ctypecode_of_elements == 'K': # B is for a byte
183 nptype = np.int64
184 elif ctypecode_of_elements == 'E': # B is for a byte
185 nptype = np.float32
186 elif ctypecode_of_elements == 'D': # B is for a byte
187 nptype = np.float64
188 else:
189 raise TypeError('unknown ctypecode_of_elements:',ctypecode_of_elements)
190 local = np.zeros( number_of_elements, nptype)
191
192 # Set the Pointer Address
193 self.SetPtrAddress(colname, local)
194 self._table_cols[colname] = local
195 if number_of_elements > 1:
196 self.__dict__[colname] = local
197 self.dict[colname] = local
198 else:
199 # remove any traces of ctypes:
200 self.__dict__[colname] = local.value
201 self.dict[colname] = local.value
202 self._registered_cols.append(colname)
203
204 def treat_meta_dict(self):
205 """make 'interesting' meta information available like normal members.
206 non interesting are:
207 TFORM, TUNIT, and TTYPE
208 since these are available via the columns dict.
209 """
210
211 self.number_of_rows = self.meta['NAXIS2'][0]
212 self.number_of_columns = self.meta['TFIELDS'][0]
213
214 # there are some information in the meta dict, which are alsways there:
215 # there are regarded as not interesting:
216 uninteresting_meta = {}
217 uninteresting_meta['arraylike'] = {}
218 uninteresting = ['NAXIS', 'NAXIS1', 'NAXIS2',
219 'TFIELDS',
220 'XTENSION','EXTNAME','EXTREL',
221 'BITPIX', 'PCOUNT', 'GCOUNT',
222 'ORIGIN',
223 'PACKAGE', 'COMPILED', 'CREATOR',
224 'TELESCOP','TIMESYS','TIMEUNIT','VERSION']
225 for key in uninteresting:
226 if key in self.meta:
227 uninteresting_meta[key]=self.meta[key]
228 del self.meta[key]
229
230 meta = self.meta
231 # loop over keys:
232 # * try to find array-like keys
233 arraylike = {}
234 singlelike = []
235 for key in self.meta:
236 stripped = key.rstrip('1234567890')
237 if stripped == key:
238 singlelike.append(key)
239 else:
240 if stripped not in arraylike:
241 arraylike[stripped] = 0
242 else:
243 arraylike[stripped] += 1
244 newmeta = {}
245 for key in singlelike:
246 newmeta[key.lower()] = meta[key]
247 for key in arraylike:
248 uninteresting_meta['arraylike'][key.lower()] = []
249 for i in range(arraylike[key]+1):
250 if key+str(i) in meta:
251 uninteresting_meta['arraylike'][key.lower()].append(meta[key+str(i)])
252 self.ui_meta = uninteresting_meta
253 # make newmeta self
254 for key in newmeta:
255 self.__dict__[key]=newmeta[key]
256
257 dict = self.__dict__.copy()
258 del dict['meta']
259 del dict['ui_meta']
260 self.dict = dict
261
262 def __iter__(self):
263 """ iterator """
264 return self
265
266 def next(self):
267 """ used by __iter__ """
268 # Here one might check, if looping makes any sense, and if not
269 # one could stop looping or so...
270 # like this:
271 #
272 # if len(self._registered_cols) == 0:
273 # print 'warning: looping without any registered columns'
274 if self._current_row < self.number_of_rows:
275 if self.GetNextRow() == False:
276 raise StopIteration
277 for col in self._registered_cols:
278 if isinstance(self._table_cols[col], ctypes._SimpleCData):
279 self.__dict__[col] = self._table_cols[col].value
280 self.dict[col] = self._table_cols[col].value
281
282 for col in self.stacked_cols:
283 if isinstance(self.dict[col], type(np.array(''))):
284 self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],self.dict[col]) )
285 else:
286# elif isinstance(self.dict[col], ctypes._SimpleCData):
287 self.stacked_cols[col] = np.vstack( (self.stacked_cols[col],np.array(self.dict[col])) )
288 #else:
289 #raise TypeError("I don't know how to stack "+col+". It is of type: "+str(type(self.dict[col])))
290
291
292
293
294 self._current_row += 1
295 else:
296 raise StopIteration
297 return self
298
299 def show(self):
300 pprint.pprint(self.dict)
301
302if __name__ == '__main__':
303 import sys
304 if len(sys.argv) == 1:
305 print 'usage:', sys.argv[0], 'fits-file-name'
306
307 file = FactFits(sys.argv[1])
308 print '-'*70
309 print "opened :", sys.argv[1], " as 'file'"
310 print
311 print '-'*70
312 print 'type file.show() to look at its contents'
313 print "type file.register( columnname ) or file.register('all') in order to register columns"
314 print
315 print " due column-registration you declare, that you would like to retrieve the contents of one of the columns"
316 print " after column-registration, the 'file' has new member variables, they are named like the columns"
317 print " PLEASE NOTE: immediatly after registration, the members exist, but they are empty."
318 print " the values are assigned only, when you call file.next() or when you loop over the 'file'"
319 print
320 print "in order to loop over it, just go like this:"
321 print "for row in file:"
322 print " print row.columnname_one, row.columnname_two"
323 print
324 print ""
325 print '-'*70
326
Note: See TracBrowser for help on using the repository browser.