wiki:DatabaseBasedAnalysis/Python

Here is an example how to do a simple database based analysis in Python. This examples illustrates how to fill theta-square plots and are the Python counterparts to the example given on the main page.

The following Python code was kindly provided by Julian Kemp MSc.

Query

Both examples on this page read the following query from an ascii file query.txt:

SELECT
   Images.*,
   Position.X,
   Position.Y
FROM
   Images
LEFT JOIN Position USING (FileId, EvtNumber)
LEFT JOIN RunInfo  USING (FileId)
WHERE
   fSourceKey=5
AND
   fRunTypeKey=1
AND
   FileId BETWEEN 180100000 AND 180200000
AND
   fZenithDistanceMax<35
AND
   fR750Cor>0.9*fR750Ref
AND
   NumUsedPixels>5.5
AND
   NumIslands<3.5
AND
   Leakage1<0.1
AND
   Width*Length*PI() < LOG10(Size)*898-1535; 

One shot example

The first example runs the query and filling the on- and off-histograms in a single python program:

import numpy as np
import matplotlib.pyplot as plt
import mysql.connector

def FillHistogram(bins,n,value,weight=1):
    idx = np.searchsorted(bins,value)
    if not idx==0 and not idx>n.size:
        n[idx-1]+=weight

mm2deg = 0.0117193246260285378

# Create bins for the histograms
bins = np.linspace(0,1,56)
non = np.zeros(55)
noff = np.zeros(55)

# Read the MySQL query from a text file
queryFile = open('query.txt')
query = queryFile.read()
queryFile.close()

# Open a connection to the MySQL database
conn = mysql.connector.connect(user='fact',password='[...]',host='ihp-pc45.ethz.ch',database='factdata',port='3306')
cursor = conn.cursor(dictionary=True)
cursor.execute(query)

# Loop over all events received from the database
for Event in cursor:
    # Intialize all variables needed in the calculations
    Length = Event['Length']
    Width = Event['Width']
    NumIslands = Event['NumIslands']
    NumUsedPixels = Event['NumUsedPixels']
    Leakage1 = Event['Leakage1']
    Size = Event['Size']
    X = Event['X']
    Y = Event['Y']
    CosDelta = Event['CosDelta']
    SinDelta = Event['SinDelta']
    M3Long = Event['M3Long']
    SlopeLong = Event['SlopeLong']
    MeanX = Event['MeanX']
    MeanY = Event['MeanY']

    # First calculate all cuts to speedup the analysis
    area =  np.pi*Width*Length

    # The abberation correction does increase also Width and Length by 1.02
    cutq = (NumIslands<3.5 and NumUsedPixels>5.5 and Leakage1<0.1)
    if not cutq:
        continue

    cut0 = area<(np.log10(Size)*898-1535)
    if not cut0:
        continue

    # Loop over all wobble positions in the camera
    for angle in range(0,360,60):
        # ----------- Source dependent parameter calculation ----------
        cr = np.cos(np.deg2rad(angle))
        sr = np.sin(np.deg2rad(angle))

        px = cr*X-sr*Y
        py = cr*Y+sr*X

        dx = MeanX-px*1.02
        dy = MeanY-py*1.02

        norm = np.sqrt(dx*dx+dy*dy)
        dist = norm*mm2deg

        lx = np.min([np.max([(CosDelta*dy-SinDelta*dx)/norm,-1.]),1.])
        ly = np.min([np.max([(CosDelta*dx+SinDelta*dy)/norm,-1.]),1.])

        alpha = np.arcsin(lx)
        sgn = np.sign(ly)

        # ------------------------ Application ---------------------
        m3l = M3Long*sgn*mm2deg
        slope = SlopeLong*sgn/mm2deg

        # -------------------------- Analysis ----------------------
        xi = 1.39252+0.154247*slope+1.67972*(1-1/(1+4.86232*Leakage1))

        sign1 = m3l+0.07
        sign2 = (dist-0.5)*7.2-slope

        disp = (-xi if (sign1<0 or sign2<0) else xi)*(1-Width/Length)

        thetasq = disp*disp+dist*dist-2*disp*dist*np.sqrt(1-lx*lx)

        if angle==0:
            FillHistogram(bins,non,thetasq)
        else:
            FillHistogram(bins,noff,thetasq,weight=1./5.)

conn.close()

plt.hist(bins[:-1],bins=bins,histtype='step',weights=non)
plt.hist(bins[:-1],bins=bins,histtype='step',weights=noff)
plt.show()

Two step example

This example works similar to rootifysql and a ROOT macro. It first requests the data from a database an stores it into a file and then processed the file with a second program.

Requesting and storing the data

import numpy as np
import pickle
import mysql.connector

# Read the MySQL query from a text file
queryFile = open('query.txt')
query = queryFile.read()
queryFile.close()

# Open a connection to the MySQL database
conn = mysql.connector.connect(user='fact',password='[...]',host='ihp-pc45.ethz.ch',database='factdata',port='3306')
cursor = conn.cursor()
cursor.execute(query)

with open('crab-data-only.p','wb') as outFile:
    description = np.array(cursor.description).T[0]
    pickle.dump(description,outFile,pickle.HIGHEST_PROTOCOL)
    for Event in cursor:
        pickle.dump(Event,outFile,pickle.HIGHEST_PROTOCOL)

conn.close()

Reading and processing the data

import numpy as np
import pickle
import matplotlib.pyplot as plt

def FillHistogram(bins,n,value,weight=1):
    idx = np.searchsorted(bins,value)
    if not idx==0 and not idx>n.size:
        n[idx-1]+=weight

inFile = open('crab-data-only.p','rb')
description = pickle.load(inFile)

# Find indices corresponding to the different variables
Length_idx = np.nonzero(description=='Length')[0][0]
Width_idx = np.nonzero(description=='Width')[0][0]
NumIslands_idx = np.nonzero(description=='NumIslands')[0][0]
NumUsedPixels_idx = np.nonzero(description=='NumUsedPixels')[0][0]
Leakage1_idx = np.nonzero(description=='Leakage1')[0][0]
Size_idx = np.nonzero(description=='Size')[0][0]
X_idx = np.nonzero(description=='X')[0][0]
Y_idx = np.nonzero(description=='Y')[0][0]
CosDelta_idx = np.nonzero(description=='CosDelta')[0][0]
SinDelta_idx = np.nonzero(description=='SinDelta')[0][0]
M3Long_idx = np.nonzero(description=='M3Long')[0][0]
SlopeLong_idx = np.nonzero(description=='SlopeLong')[0][0]
MeanX_idx = np.nonzero(description=='MeanX')[0][0]
MeanY_idx = np.nonzero(description=='MeanY')[0][0]

mm2deg = 0.0117193246260285378

# Create bins for the histogram
bins = np.linspace(0,1,56)
non = np.zeros(55)
noff = np.zeros(55)

while True:
    try:
        Event = pickle.load(inFile)
    except:
        break

    # Intialize all variables needed in the calculations
    Length = Event[Length_idx]
    Width = Event[Width_idx]
    NumIslands = Event[NumIslands_idx]
    NumUsedPixels = Event[NumUsedPixels_idx]
    Leakage1 = Event[Leakage1_idx]
    Size = Event[Size_idx]
    X = Event[X_idx]
    Y = Event[Y_idx]
    CosDelta = Event[CosDelta_idx]
    SinDelta = Event[SinDelta_idx]
    M3Long = Event[M3Long_idx]
    SlopeLong = Event[SlopeLong_idx]
    MeanX = Event[MeanX_idx]
    MeanY = Event[MeanY_idx]

    # First calculate all cuts to speedup the analysis
    area =  np.pi*Width*Length

    # The abberation correction does increase also Width and Length by 1.02
    cutq = (NumIslands<3.5 and NumUsedPixels>5.5 and Leakage1<0.1)
    if not cutq:
        continue

    cut0 = area<(np.log10(Size)*898-1535)
    if not cut0:
        continue

    #print(area,cutq,cut0)

    # Loop over all wobble positions in the camera
    for angle in range(0,360,60):
        # ----------- Source dependent parameter calculation ----------
        cr = np.cos(np.deg2rad(angle))
        sr = np.sin(np.deg2rad(angle))

        px = cr*X-sr*Y
        py = cr*Y+sr*X

        dx = MeanX-px*1.02
        dy = MeanY-py*1.02

        norm = np.sqrt(dx*dx+dy*dy)
        dist = norm*mm2deg

        lx = np.min([np.max([(CosDelta*dy-SinDelta*dx)/norm,-1.]),1.])
        ly = np.min([np.max([(CosDelta*dx+SinDelta*dy)/norm,-1.]),1.])

        alpha = np.arcsin(lx)
        sgn = np.sign(ly)

        # ------------------------ Application ---------------------
        m3l = M3Long*sgn*mm2deg
        slope = SlopeLong*sgn/mm2deg

        # -------------------------- Analysis ----------------------
        xi = 1.39252+0.154247*slope+1.67972*(1-1/(1+4.86232*Leakage1))

        sign1 = m3l+0.07
        sign2 = (dist-0.5)*7.2-slope

        disp = (-xi if (sign1<0 or sign2<0) else xi)*(1-Width/Length)

        thetasq = disp*disp+dist*dist-2*disp*dist*np.sqrt(1-lx*lx)

        if angle==0:
            FillHistogram(bins,non,thetasq)
        else:
            FillHistogram(bins,noff,thetasq,weight=1./5.)

plt.hist(bins[:-1],bins=bins,histtype='step',weights=non)
plt.hist(bins[:-1],bins=bins,histtype='step',weights=noff)
plt.show()
Last modified 6 years ago Last modified on 09/02/18 14:54:56
Note: See TracWiki for help on using the wiki.