# coding: utf-8 import calendar import numpy as np from astropy.io import fits import pandas as pd from sqlalchemy import create_engine night_int = 20150721 def fjd(datetime_inst): """ convert datetime instance to FJD """ return calendar.timegm(datetime_inst.utctimetuple()) / (24.*3600.) def create_DB_connection(): from ConfigParser import SafeConfigParser config = SafeConfigParser() config.optionxform = str # this make the parsing case sensitive config.read('config.ini') factdb = create_engine( "mysql+mysqldb://{user}:{pw}@{host}/{db}".format( user=config.get('database', 'user'), pw=config.get('database', 'password'), host=config.get('database', 'host'), db=config.get('database', 'database'), ) ) return factdb def get_data_runs_from_run_db(night_int, factdb): keys = [ 'fRunID', 'fNight', 'fRunStart', 'fRunStop', ] sql_query = """SELECT {comma_sep_keys} FROM RunInfo WHERE fRunTypeKey=1 AND fNight={night:d}; """ sql_query = sql_query.format( comma_sep_keys=', '.join(keys), night=night_int, ) data_runs = pd.read_sql_query( sql_query, factdb, parse_dates=['fRunStart', 'fRunStop'], ) return data_runs def get_trigger_rates(night_int, base_path='/fact/aux/'): night_string = str(night_int) fits_file = fits.open( base_path+'{y}/{m}/{d}/{n}.FTM_CONTROL_TRIGGER_RATES.fits'.format( ni=night_string, y=night_string[0:4], m=night_string[4:6], d=night_string[6:8], ) ) trigger_rates = fits_file[1].data return trigger_rates def add_ratio_and_more_to_dataframe(data_runs, trigger_rates): # create new columns and pre-assign some hopefully good NULL-like-values data_runs['number_of_measurements'] = 0 data_runs['median_of_board_rates'] = np.nan data_runs['std_dev_of_board_rates'] = np.nan data_runs['fBoardTriggerRateRatioAboveThreshold'] = np.nan for dataframe_index in data_runs.index: this_run = data_runs.ix[dataframe_index] mask = ( (trigger_rates['Time'] > fjd(this_run['fRunStart'])) * (trigger_rates['Time'] < fjd(this_run['fRunStop'])) ) this_board_rates = trigger_rates['BoardRate'][mask] N = this_board_rates.shape[0] data_runs.ix[dataframe_index, 'number_of_measurements'] = N left, med, right = np.percentile( this_board_rates, [50-20, 50, 50+20]) something_like_width = (right - left)/2. # 40% of the area of the normal distrubution # fall between -0.52*sigma and +0.52*sigma. # The estimation of sigma in a distorted CDF # is less affected near the median. (but of course less accurate as well.) std_dev = something_like_width / 0.52 median_board_rates = np.median(this_board_rates, axis=1) over = (median_board_rates > med+3*std_dev).sum() data_runs.ix[dataframe_index, 'median_of_board_rates'] = med data_runs.ix[dataframe_index, 'std_dev_of_board_rates'] = std_dev data_runs.ix[dataframe_index, 'fBoardTriggerRateRatioAboveThreshold'] = float(over)/N def main(night_int): factdb = create_DB_connection() data_runs = get_data_runs_from_run_db(night_int, factdb) trigger_rates = get_trigger_rates(night_int) add_ratio_and_more_to_dataframe(data_runs, trigger_rates) return data_runs if __name__ == '__main__': data_runs = main(night_int) data_runs.to_csv('foo.bar', index=False)