wiki:DatabaseBasedAnalysis/Python

Version 1 (modified by tbretz, 6 years ago) ( diff )

--

Here is an example how to do a simple database based analysis in Python. This examples illustrates how to fill theta-square plots and are the Python counterparts to the example given on the main page.

Query

Both examples on this page read the following query from an ascii file query.txt:

SELECT
   Events.*,
   Position.X,
   Position.Y
FROM
   Events
LEFT JOIN Position USING (FileId, EvtNumber)
LEFT JOIN RunInfo  USING (FileId)
WHERE
   fSourceKey=5
AND
   fRunTypeKey=1
AND
   FileId BETWEEN 180100000 AND 180200000
AND
   fZenithDistanceMax<35
AND
   fR750Cor>0.9*fR750Ref
AND
   NumUsedPixels>5.5
AND
   NumIslands<3.5
AND
   Leakage1<0.1
AND
   Width*Length*PI() < LOG10(Size)*898-1535; 

One shot example

The first example runs the query and filling the on- and off-histograms in a single python program:

import numpy as np
import matplotlib.pyplot as plt
import mysql.connector

def FillHistogram(bins,n,value,weight=1):
    idx = np.searchsorted(bins,value)
    if not idx==0 and not idx>n.size:
        n[idx-1]+=weight

mm2deg = 0.0117193246260285378

# Create bins for the histograms
bins = np.linspace(0,1,56)
non = np.zeros(55)
noff = np.zeros(55)

# Read the MySQL query from a text file
queryFile = open('query.txt')
query = queryFile.read()
queryFile.close()

# Open a connection to the MySQL database
conn = mysql.connector.connect(user='fact',password='[...]',host='ihp-pc45.ethz.ch',database='factdata',port='3306')
cursor = conn.cursor(dictionary=True)
cursor.execute(query)

# Loop over all events received from the database
for Event in cursor:
    # Intialize all variables needed in the calculations
    Length = Event['Length']
    Width = Event['Width']
    NumIslands = Event['NumIslands']
    NumUsedPixels = Event['NumUsedPixels']
    Leakage1 = Event['Leakage1']
    Size = Event['Size']
    X = Event['X']
    Y = Event['Y']
    CosDelta = Event['CosDelta']
    SinDelta = Event['SinDelta']
    M3Long = Event['M3Long']
    SlopeLong = Event['SlopeLong']
    MeanX = Event['MeanX']
    MeanY = Event['MeanY']

    # First calculate all cuts to speedup the analysis
    area =  np.pi*Width*Length

    # The abberation correction does increase also Width and Length by 1.02
    cutq = (NumIslands<3.5 and NumUsedPixels>5.5 and Leakage1<0.1)
    if not cutq:
        continue

    cut0 = area<(np.log10(Size)*898-1535)
    if not cut0:
        continue

    # Loop over all wobble positions in the camera
    for angle in range(0,360,60):
        # ----------- Source dependent parameter calculation ----------
        cr = np.cos(np.deg2rad(angle))
        sr = np.sin(np.deg2rad(angle))

        px = cr*X-sr*Y
        py = cr*Y+sr*X

        dx = MeanX-px
        dy = MeanY-py

        norm = np.sqrt(dx*dx+dy*dy)
        dist = norm*mm2deg

        lx = np.min([np.max([(CosDelta*dy-SinDelta*dx)/norm,-1.]),1.])
        ly = np.min([np.max([(CosDelta*dx+SinDelta*dy)/norm,-1.]),1.])

        alpha = np.arcsin(lx)
        sgn = np.sign(ly)

        # ------------------------ Application ---------------------
        m3l = M3Long*sgn*mm2deg
        slope = SlopeLong*sgn/mm2deg

        # -------------------------- Analysis ----------------------
        xi = 1.39252+0.154247*slope+1.67972*(1-1/(1+4.86232*Leakage1))

        sign1 = m3l+0.07
        sign2 = (dist-0.5)*7.2-slope

        disp = (-xi if (sign1<0 or sign2<0) else xi)*(1-Width/Length)

        thetasq = disp*disp+dist*dist-2*disp*dist*np.sqrt(1-lx*lx)

        if angle==0:
            FillHistogram(bins,non,thetasq)
        else:
            FillHistogram(bins,noff,thetasq,weight=1./5.)

conn.close()

plt.hist(bins[:-1],bins=bins,histtype='step',weights=non)
plt.hist(bins[:-1],bins=bins,histtype='step',weights=noff)
plt.show()

Two step example

This example works similar to rootifysql and a ROOT macro. It first requests the data from a database an stores it into a file and then processed the file with a second program.

Requesting and storing the data

import numpy as np
import pickle
import mysql.connector

# Read the MySQL query from a text file
queryFile = open('query.txt')
query = queryFile.read()
queryFile.close()

# Open a connection to the MySQL database
conn = mysql.connector.connect(user='fact',password='[...]',host='ihp-pc45.ethz.ch',database='factdata',port='3306')
cursor = conn.cursor()
cursor.execute(query)

with open('crab-data-only.p','wb') as outFile:
    description = np.array(cursor.description).T[0]
    pickle.dump(description,outFile,pickle.HIGHEST_PROTOCOL)
    for Event in cursor:
        pickle.dump(Event,outFile,pickle.HIGHEST_PROTOCOL)

conn.close()

Reading and processing the data

import numpy as np
import pickle
import matplotlib.pyplot as plt

def FillHistogram(bins,n,value,weight=1):
    idx = np.searchsorted(bins,value)
    if not idx==0 and not idx>n.size:
        n[idx-1]+=weight

inFile = open('crab-data-only.p','rb')
description = pickle.load(inFile)

# Find indices corresponding to the different variables
Length_idx = np.nonzero(description=='Length')[0][0]
Width_idx = np.nonzero(description=='Width')[0][0]
NumIslands_idx = np.nonzero(description=='NumIslands')[0][0]
NumUsedPixels_idx = np.nonzero(description=='NumUsedPixels')[0][0]
Leakage1_idx = np.nonzero(description=='Leakage1')[0][0]
Size_idx = np.nonzero(description=='Size')[0][0]
X_idx = np.nonzero(description=='X')[0][0]
Y_idx = np.nonzero(description=='Y')[0][0]
CosDelta_idx = np.nonzero(description=='CosDelta')[0][0]
SinDelta_idx = np.nonzero(description=='SinDelta')[0][0]
M3Long_idx = np.nonzero(description=='M3Long')[0][0]
SlopeLong_idx = np.nonzero(description=='SlopeLong')[0][0]
MeanX_idx = np.nonzero(description=='MeanX')[0][0]
MeanY_idx = np.nonzero(description=='MeanY')[0][0]

mm2deg = 0.0117193246260285378

# Create bins for the histogram
bins = np.linspace(0,1,56)
non = np.zeros(55)
noff = np.zeros(55)

while True:
    try:
        Event = pickle.load(inFile)
    except:
        break

    # Intialize all variables needed in the calculations
    Length = Event[Length_idx]
    Width = Event[Width_idx]
    NumIslands = Event[NumIslands_idx]
    NumUsedPixels = Event[NumUsedPixels_idx]
    Leakage1 = Event[Leakage1_idx]
    Size = Event[Size_idx]
    X = Event[X_idx]
    Y = Event[Y_idx]
    CosDelta = Event[CosDelta_idx]
    SinDelta = Event[SinDelta_idx]
    M3Long = Event[M3Long_idx]
    SlopeLong = Event[SlopeLong_idx]
    MeanX = Event[MeanX_idx]
    MeanY = Event[MeanY_idx]

    # First calculate all cuts to speedup the analysis
    area =  np.pi*Width*Length

    # The abberation correction does increase also Width and Length by 1.02
    cutq = (NumIslands<3.5 and NumUsedPixels>5.5 and Leakage1<0.1)
    if not cutq:
        continue

    cut0 = area<(np.log10(Size)*898-1535)
    if not cut0:
        continue

    #print(area,cutq,cut0)

    # Loop over all wobble positions in the camera
    for angle in range(0,360,60):
        # ----------- Source dependent parameter calculation ----------
        cr = np.cos(np.deg2rad(angle))
        sr = np.sin(np.deg2rad(angle))

        px = cr*X-sr*Y
        py = cr*Y+sr*X

        dx = MeanX-px
        dy = MeanY-py

        norm = np.sqrt(dx*dx+dy*dy)
        dist = norm*mm2deg

        lx = np.min([np.max([(CosDelta*dy-SinDelta*dx)/norm,-1.]),1.])
        ly = np.min([np.max([(CosDelta*dx+SinDelta*dy)/norm,-1.]),1.])

        alpha = np.arcsin(lx)
        sgn = np.sign(ly)

        # ------------------------ Application ---------------------
        m3l = M3Long*sgn*mm2deg
        slope = SlopeLong*sgn/mm2deg

        # -------------------------- Analysis ----------------------
        xi = 1.39252+0.154247*slope+1.67972*(1-1/(1+4.86232*Leakage1))

        sign1 = m3l+0.07
        sign2 = (dist-0.5)*7.2-slope

        disp = (-xi if (sign1<0 or sign2<0) else xi)*(1-Width/Length)

        thetasq = disp*disp+dist*dist-2*disp*dist*np.sqrt(1-lx*lx)

        if angle==0:
            FillHistogram(bins,non,thetasq)
        else:
            FillHistogram(bins,noff,thetasq,weight=1./5.)

plt.hist(bins[:-1],bins=bins,histtype='step',weights=non)
plt.hist(bins[:-1],bins=bins,histtype='step',weights=noff)
plt.show()
Note: See TracWiki for help on using the wiki.