From a6ce93cd9f5fb1a15c809d68e0db111b61b92800 Mon Sep 17 00:00:00 2001 From: Sebastian Wehling-Benatelli Date: Wed, 10 Aug 2016 16:30:28 +0200 Subject: [PATCH 1/8] [modification] added missing docstring content --- pylot/core/util/utils.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py index b24efb4d..db0f75a5 100644 --- a/pylot/core/util/utils.py +++ b/pylot/core/util/utils.py @@ -18,7 +18,6 @@ def _pickle_method(m): return getattr, (m.im_self, m.im_func.func_name) def fit_curve(x, y): - return splev, splrep(x, y) def getindexbounds(f, eta): @@ -180,18 +179,18 @@ def getHash(time): def getLogin(): ''' - - :return: + returns the actual user's login id + :return: User ID ''' return pwd.getpwuid(os.getuid())[0] def getOwner(fn): ''' - - :param fn: - :type fn: - :return: + takes filename and returns the User ID of the owner of the file + :param fn: filename + :type fn: str + :return: User ID ''' return pwd.getpwuid(os.stat(fn).st_uid).pw_name @@ -200,12 +199,11 @@ def getPatternLine(fn, pattern): """ Takes a file name and a pattern string to search for in the file and returns the first line which contains the pattern string otherwise None. - - :param fn: file name + :param fn: filename :type fn: str :param pattern: pattern string to search for :type pattern: str - :return: the complete line containing pattern or None + :return: the complete line containing the pattern string or None >>> getPatternLine('utils.py', 'python') '#!/usr/bin/env python\\n' @@ -223,8 +221,8 @@ def getPatternLine(fn, pattern): def isSorted(iterable): ''' - - :param iterable: + takes a python variable and returns True if the iterable is sorted and False otherwise + :param iterable: a python variable :type iterable: :return: ''' @@ -233,12 +231,12 @@ def isSorted(iterable): def prepTimeAxis(stime, trace): ''' - + takes a starttime 'stime' and a trace object 'trace' and return a vector containing as much time stamps as data values in the trace object are available :param stime: :type stime: :param trace: - :type trace: - :return: + :type trace: + :return: time axis ''' nsamp = trace.stats.npts srate = trace.stats.sampling_rate @@ -294,7 +292,6 @@ def runProgram(cmd, parameter=None): """ run an external program specified by cmd with parameters input returning the stdout output - :param cmd: name of the command to run :type cmd: str :param parameter: filename of parameter file or parameter string From 36e7bc1bb287a9376107943cfb8f8ae6ccc80591 Mon Sep 17 00:00:00 2001 From: sebastianp Date: Thu, 11 Aug 2016 16:13:53 +0200 Subject: [PATCH 2/8] [documentation] adding docstring to all the methods. --- pylot/core/pick/compare.py | 146 +++++++++++++++++++++++++++++++++---- pylot/core/util/pdf.py | 4 +- pylot/core/util/utils.py | 8 +- 3 files changed, 137 insertions(+), 21 deletions(-) diff --git a/pylot/core/pick/compare.py b/pylot/core/pick/compare.py index 2cb128b7..a56e58e8 100644 --- a/pylot/core/pick/compare.py +++ b/pylot/core/pick/compare.py @@ -339,16 +339,30 @@ class PDFDictionary(object): class PDFstatistics(object): - ''' - To do: - plots for std, quantiles, - ''' + """ + This object can be used to get various statistic values from probabillity density functions. + Takes a path as argument. + """ + + def __init__(self, directory): + """Initiates some values needed when dealing with pdfs later""" self.directory = directory self.evtlist = list() self.return_phase = None + def readTheta(self, arname, dir, fnpattern): + """ + Loads an array from file into object instance. + :param arname: Name of Array beeing created. + :type arname: string + :param dir: Directory where file is to be found. + :type dir: string + :param fnpattern: file name pattern for reading multiple files into one array. + :type fnpattern: string + :return: a list with all args* from the files. + """ exec('self.' + arname +' = []') filelist = glob.glob1(dir, fnpattern) for file in filelist: @@ -359,8 +373,14 @@ class PDFstatistics(object): exec('self.' + arname + ' += list') fid.close() + def makeFileList(self, fn_pattern='*.xml'): - evtlist = list() + """ + Takes a file pattern and searches for that recursively in the set path for the object. + :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml' + :type fn_pattern: string + :return: creates a list of events saved in the PDFstatistics object. + """ evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern) if not evtlist: for root, _, files in os.walk(self.directory): @@ -369,7 +389,9 @@ class PDFstatistics(object): evtlist.append(os.path.join(root, file)) self.evtlist = evtlist + def __iter__(self): + """Iterating over the PDFstatistics object yields every single pdf from the list of events""" assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...' for evt in self.evtlist: self.getPDFDict(self.directory, evt) @@ -379,13 +401,30 @@ class PDFstatistics(object): except KeyError: continue + def set_return_phase(self, type): + """ + Sets the phase typ of event data that is returned on iteration over the object. + :param type: can be either p (p-phase) or s (s-phase). + :type type: string + :return: - + """ if type.upper() not in 'PS': raise ValueError("phase type must be either 'P' or 'S'!") else: self.return_phase = type.upper() + def getQD(self,value): + """ + Takes a probability value and and returns the distance + between two complementary quantiles. + For example: getQD(0.3) yields Quantile(1-0.3) - Quantile(0.3) + :param value: 0 < value < 0.5 + :type value: float + :return: returns a list of all quantile distances for all pdfs in + the list of events. + """ QDlist = [] for pdf in self: QD = pdf.quantile_distance(value) @@ -394,6 +433,16 @@ class PDFstatistics(object): def getQDQ(self,value): + """ + Takes a probability value and and returns the fraction of + two quantile distances. + For example: + getQDQ(x) = getQD(0.5-x)/getQD(x) + (Quantile(1-0.5-x) - Quantile(x)) / (Quantile(1-x) - Quantile(x)) + :param value: 0 < value < 0.25 + :return: returns a list of all quantile fractions for all pdfs in + the list of events. + """ QDQlist = [] for pdf in self: QDQ = pdf.qtile_dist_quot(value) @@ -402,6 +451,12 @@ class PDFstatistics(object): def getSTD(self): + """ + Iterates over PDFstatistics object and returns the standard + deviation of all pdfs in the list of events. + :return: saves an instance of self.p_stdarray or + self.s_stdarray, depending on set phase. + """ std = [] for pdf in self: try: @@ -413,6 +468,10 @@ class PDFstatistics(object): def set_stdarray(self, array): + """ + Helper function for self.getSTD(). This function + should not be called directly. + """ if self.return_phase == 'P': self.p_stdarray = array elif self.return_phase == 'S': @@ -423,6 +482,22 @@ class PDFstatistics(object): def getBinList(self,l_boundary,u_boundary,nbins = 100): + """ + Helper function for self.histplot(). Takes in two boundaries and + a number of bins and creates a list of bins which can be passed + to self.histplot(). + :param l_boundary: Any number. + :type l_boundary: float + :param u_boundary: Any number that is greater than l_boundary. + :type u_boundary: float + :param nbins: Any positive integer. + :type nbins: int + :return: A list of equidistant bins. + """ + if u_boundary <= l_boundary: + raise ValueError('Upper boundary must be greather than lower!') + elif nbins <= 0: + raise ValueError('Number of bins is not valid.') binlist = [] for i in range(nbins): binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins) @@ -430,28 +505,57 @@ class PDFstatistics(object): def histplot(self, array, binlist, xlab = 'Values', - ylab = 'Frequency', title = None, label=None, - fnout = None): + ylab = 'Frequency', title = None, fnout = None): + """ + Method to quickly show some distribution of data. Takes array like data, + and a list of bins. Editing detail and inserting a legend is not possible. + :param array: List of values. + :type array: Array like + :param binlist: List of bins. + :type binlist: list + :param xlab: A label for the x-axes. + :type xlab: str + :param ylab: A label for the y-axes. + :type ylab: str + :param title: A title for the Plot. + :type title: str + :param fnout: A path to save the plot instead of showing. + Has to contain filename and type. Like: 'path/to/file.png' + :type fnout. str + :return: - + """ import matplotlib.pyplot as plt plt.hist(array,bins = binlist) - plt.xlabel('Values') - plt.ylabel('Frequency') + plt.xlabel(xlab) + plt.ylabel(ylab) if title: - title_str = 'Quantile distance quotient distribution' - if label: - title_str += ' (' + label + ')' - plt.title(title_str) + plt.title(title) if fnout: - plt.savefig(fnout+'histplot.png') + plt.savefig(fnout) else: plt.show() def getPDFDict(self, month, evt): + """ + Helper function for __iter__(). Should not be called directly. + """ self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt)) def getStatistics(self): + """ + On call function will get mean, median and standard deviation values + from self.p_stdarray and self.s_stdarray. Both must be + instances before calling this function. + :return: Creates instances of self.p_mean, self.p_std_std and self.p_median + for both phases (simultaneously) for the PDFstatistics object. + """ + if not self.p_stdarray or not self.s_stdarray: + raise NotImplementedError('Arrays are not properly set yet!') + elif type(self.p_stdarray) != type(np.zeros(1)) or type(self.s_stdarray) != type(np.zeros(1)): + raise TypeError('Array is not a proper numpy array.') + self.p_mean = self.p_stdarray.mean() self.p_std_std = self.p_stdarray.std() self.p_median = np.median(self.p_stdarray) @@ -460,12 +564,22 @@ class PDFstatistics(object): self.s_median = np.median(self.s_stdarray) - def writeThetaToFile(self,array,out_dir,filename = None): - fid = open(os.path.join(out_dir,filename), 'w') + def writeThetaToFile(self,array,out_dir): + """ + Method to write array like data to file. Useful since acquiring can take + serious amount of time when dealing with large databases. + :param array: List of values. + :type array: list + :param out_dir: Path to save file to including file name. + :type out_dir: str + :return: Saves a file at given output directory. + """ + fid = open(os.path.join(out_dir), 'w') for val in array: fid.write(str(val)+'\n') fid.close() + def main(): root_dir ='/home/sebastianp/Codetesting/xmls/' Insheim = PDFstatistics(root_dir) diff --git a/pylot/core/util/pdf.py b/pylot/core/util/pdf.py index 94c847ce..814b7112 100644 --- a/pylot/core/util/pdf.py +++ b/pylot/core/util/pdf.py @@ -363,13 +363,15 @@ class ProbabilityDensityFunction(object): return m def quantile_distance(self, prob_value): + if 0 >= prob_value or prob_value >= 0.5: + raise ValueError('Value out of range.') ql = self.quantile(prob_value) qu = self.quantile(1 - prob_value) return qu - ql def qtile_dist_quot(self,x): - if x <= 0 or x >= 0.5: + if x <= 0 or x >= 0.25: raise ValueError('Value out of range.') return self.quantile_distance(0.5-x)/self.quantile_distance(x) diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py index b24efb4d..1149ae4f 100644 --- a/pylot/core/util/utils.py +++ b/pylot/core/util/utils.py @@ -223,10 +223,10 @@ def getPatternLine(fn, pattern): def isSorted(iterable): ''' - - :param iterable: - :type iterable: - :return: + Takes an iterable and checks if args* are in order. + :param iterable: any with defined __ls__() and __gs__() + :type iterable: list + :return: Boolean ''' return sorted(iterable) == iterable From 49727b25df5e33d108aa75bcef1cfd4684ca3600 Mon Sep 17 00:00:00 2001 From: Sebastianw Wehling-Benatelli Date: Fri, 12 Aug 2016 09:50:36 +0200 Subject: [PATCH 3/8] [change] improved docstrings and testing of utils.py --- pylot/core/util/utils.py | 132 ++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 36 deletions(-) diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py index 1149ae4f..f49dd67d 100644 --- a/pylot/core/util/utils.py +++ b/pylot/core/util/utils.py @@ -83,8 +83,8 @@ def clims(lim1, lim2): def demeanTrace(trace, window): """ - returns the DATA where each trace is demean by the average value within - WINDOW + takes a trace object and returns the same trace object but with data + demeaned within a certain time window :param trace: waveform trace object :type trace: `~obspy.core.stream.Trace` :param window: @@ -101,9 +101,9 @@ def findComboBoxIndex(combo_box, val): Function findComboBoxIndex takes a QComboBox object and a string and returns either 0 or the index throughout all QComboBox items. :param combo_box: Combo box object. - :type combo_box: QComboBox + :type combo_box: `~QComboBox` :param val: Name of a combo box to search for. - :type val: + :type val: basestring :return: index value of item with name val or 0 """ return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0 @@ -111,21 +111,35 @@ def findComboBoxIndex(combo_box, val): def find_nearest(array, value): ''' - Function find_nearest takes an array and a value and returns the - index of the nearest value found in the array. - :param array: - :param value: - :return: + function find_nearest takes an array and a value and returns the + index of the nearest value found in the array + :param array: array containing values + :type array: `~numpy.ndarray` + :param value: number searched for + :return: index of the array item being nearest to the value + + >>> a = np.array([ 1.80339578, -0.72546654, 0.95769195, -0.98320759, 0.85922623]) + >>> find_nearest(a, 1.3) + 2 + >>> find_nearest(a, 0) + 1 + >>> find_nearest(a, 2) + 0 + >>> find_nearest(a, -1) + 3 + >>> a = np.array([ 1.1, -0.7, 0.9, -0.9, 0.8]) + >>> find_nearest(a, 0.849) + 4 ''' return (np.abs(array - value)).argmin() def fnConstructor(s): ''' - - :param s: - :type s: - :return: + takes a string and returns a valid filename (especially on windows machines) + :param s: desired filename + :type s: str + :return: valid filename ''' if type(s) is str: s = s.split(':')[-1] @@ -143,7 +157,21 @@ def fnConstructor(s): def four_digits(year): - if year + 2000 < UTCDateTime.utcnow().year: + """ + takes a two digit year integer and returns the correct four digit equivalent + from the last 100 years + :param year: two digit year + :type year: int + :return: four digit year correspondant + + >>> four_digits(20) + 1920 + >>> four_digits(16) + 2016 + >>> four_digits(00) + 2000 + """ + if year + 2000 <= UTCDateTime.utcnow().year: year += 2000 else: year += 1900 @@ -152,10 +180,11 @@ def four_digits(year): def getGlobalTimes(stream): ''' - - :param stream: - :type stream - :return: + takes a stream object and returns the latest end and the earliest start + time of all contained trace objects + :param stream: seismological data stream + :type stream: `~obspy.core.stream.Stream` + :return: minimum start time and maximum end time ''' min_start = UTCDateTime() max_end = None @@ -169,6 +198,8 @@ def getGlobalTimes(stream): def getHash(time): ''' + takes a time object and returns the corresponding SHA1 hash of the + formatted date string :param time: time object for which a hash should be calculated :type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object :return: str @@ -180,27 +211,26 @@ def getHash(time): def getLogin(): ''' - - :return: + returns the actual user's login ID + :return: login ID ''' return pwd.getpwuid(os.getuid())[0] def getOwner(fn): ''' - - :param fn: - :type fn: - :return: + takes a filename and return the login ID of the actual owner of the file + :param fn: filename of the file tested + :type fn: str + :return: login ID of the file's owner ''' return pwd.getpwuid(os.stat(fn).st_uid).pw_name def getPatternLine(fn, pattern): """ - Takes a file name and a pattern string to search for in the file and - returns the first line which contains the pattern string otherwise None. - + takes a file name and a pattern string to search for in the file and + returns the first line which contains the pattern string otherwise 'None' :param fn: file name :type fn: str :param pattern: pattern string to search for @@ -223,22 +253,52 @@ def getPatternLine(fn, pattern): def isSorted(iterable): ''' - Takes an iterable and checks if args* are in order. - :param iterable: any with defined __ls__() and __gs__() - :type iterable: list + takes an iterable and returns 'True' if the items are in order otherwise + 'False' + :param iterable: an iterable object + :type iterable: :return: Boolean + + >>> isSorted(1) + Traceback (most recent call last): + ... + AssertionError: object is not iterable; object: 1 + >>> isSorted([1,2,3,4]) + True + >>> isSorted('abcd') + True + >>> isSorted('bcad') + False + >>> isSorted([2,3,1,4]) + False ''' + assert isIterable(iterable), 'object is not iterable; object: {' \ + '0}'.format(iterable) + if type(iterable) is str: + iterable = [s for s in iterable] return sorted(iterable) == iterable +def isIterable(obj): + """ + takes a python object and returns 'True' is the object is iterable and + 'False' otherwise + :param obj: a python object + :return: True of False + """ + try: + iterator = iter(obj) + except TypeError as te: + return False + return True + def prepTimeAxis(stime, trace): ''' - - :param stime: - :type stime: - :param trace: - :type trace: - :return: + takes a starttime and a trace object and returns a valid time axis for + plotting + :param stime: start time of the actual seismogram as UTCDateTime + :param trace: seismic trace object + :return: valid numpy array with time stamps for plotting ''' nsamp = trace.stats.npts srate = trace.stats.sampling_rate From 08749840bae23d3b30c45c6235ba1293032a2656 Mon Sep 17 00:00:00 2001 From: sebastianp Date: Tue, 16 Aug 2016 09:14:56 +0200 Subject: [PATCH 4/8] resetting oversampling to 1 "one" --- pylot/core/util/pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylot/core/util/pdf.py b/pylot/core/util/pdf.py index 814b7112..da9020c5 100644 --- a/pylot/core/util/pdf.py +++ b/pylot/core/util/pdf.py @@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object): raise ValueError('value out of bounds: {0}'.format(value)) return self.prob_limits((value, self.axis[-1])) - def prob_limits(self, limits, oversampling=10.): + def prob_limits(self, limits, oversampling=1.): sampling = self.incr / oversampling lim = np.arange(limits[0], limits[1], sampling) data = self.data(lim) From 71f59b3c64446fea02c682c27a0cba6ebd4eeef6 Mon Sep 17 00:00:00 2001 From: Sebastianw Wehling-Benatelli Date: Thu, 18 Aug 2016 10:22:30 +0200 Subject: [PATCH 5/8] [change] extracted plotting and started new module for plotting; improved docstring quality --- pylot/core/pick/compare.py | 157 +++++++++++------------------------- pylot/core/util/pdf.py | 27 ++++++- pylot/core/util/plotting.py | 57 +++++++++++++ 3 files changed, 132 insertions(+), 109 deletions(-) create mode 100644 pylot/core/util/plotting.py diff --git a/pylot/core/pick/compare.py b/pylot/core/pick/compare.py index a56e58e8..15f604f6 100644 --- a/pylot/core/pick/compare.py +++ b/pylot/core/pick/compare.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import copy +import operator import os import numpy as np import glob @@ -151,7 +152,6 @@ class Comparison(object): return rlist def get_array(self, phase, method_name): - import operator method = operator.methodcaller(method_name) pdf_list = self.get_all(phase) rarray = map(method, pdf_list) @@ -258,6 +258,15 @@ class PDFDictionary(object): 'time is not implemented yet! Sorry!') return PDFDictionary(picksdict_from_picks(cat[0])) + def get_all(self, phase): + rlist = list() + for phases in self.pdf_data.values(): + try: + rlist.append(phases[phase]) + except KeyError: + continue + return rlist + def generate_pdf_data(self, type='exp'): """ Returns probabiliy density function dictionary containing the @@ -350,31 +359,9 @@ class PDFstatistics(object): self.directory = directory self.evtlist = list() self.return_phase = None + self.make_fnlist() - - def readTheta(self, arname, dir, fnpattern): - """ - Loads an array from file into object instance. - :param arname: Name of Array beeing created. - :type arname: string - :param dir: Directory where file is to be found. - :type dir: string - :param fnpattern: file name pattern for reading multiple files into one array. - :type fnpattern: string - :return: a list with all args* from the files. - """ - exec('self.' + arname +' = []') - filelist = glob.glob1(dir, fnpattern) - for file in filelist: - fid = open(os.path.join(dir,file), 'r') - list = [] - for line in fid.readlines(): - list.append(eval(line)) - exec('self.' + arname + ' += list') - fid.close() - - - def makeFileList(self, fn_pattern='*.xml'): + def make_fnlist(self, fn_pattern='*.xml'): """ Takes a file pattern and searches for that recursively in the set path for the object. :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml' @@ -389,7 +376,6 @@ class PDFstatistics(object): evtlist.append(os.path.join(root, file)) self.evtlist = evtlist - def __iter__(self): """Iterating over the PDFstatistics object yields every single pdf from the list of events""" assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...' @@ -401,7 +387,6 @@ class PDFstatistics(object): except KeyError: continue - def set_return_phase(self, type): """ Sets the phase typ of event data that is returned on iteration over the object. @@ -414,40 +399,45 @@ class PDFstatistics(object): else: self.return_phase = type.upper() - - def getQD(self,value): + def quantile_distances(self, value): """ - Takes a probability value and and returns the distance - between two complementary quantiles. - For example: getQD(0.3) yields Quantile(1-0.3) - Quantile(0.3) - :param value: 0 < value < 0.5 + takes a probability value and and returns the distance + between two complementary quantiles + + .. math:: + + QA_\alpha = Q(1 - \alpha) - Q(\alpha) + + :param value: probability value :math:\alpha :type value: float - :return: returns a list of all quantile distances for all pdfs in + :return: list of all quantile distances for all pdfs in the list of events. """ - QDlist = [] + rlist = [] for pdf in self: - QD = pdf.quantile_distance(value) - QDlist.append(QD) - return QDlist + rval = pdf.quantile_distance(value) + rlist.append(rval) + return rlist - def getQDQ(self,value): + def quantile_distance_fractions(self, value): """ - Takes a probability value and and returns the fraction of - two quantile distances. - For example: - getQDQ(x) = getQD(0.5-x)/getQD(x) - (Quantile(1-0.5-x) - Quantile(x)) / (Quantile(1-x) - Quantile(x)) - :param value: 0 < value < 0.25 + takes a probability value and returns the fraction of two + corresponding quantile distances + + .. math:: + + Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)} + + :param value: probability value :math:\alpha :return: returns a list of all quantile fractions for all pdfs in the list of events. """ - QDQlist = [] + rlist = list() for pdf in self: - QDQ = pdf.qtile_dist_quot(value) - QDQlist.append(QDQ) - return QDQlist + rval = pdf.quantile_dist_frac(value) + rlist.append(rval) + return rlist def getSTD(self): @@ -481,61 +471,6 @@ class PDFstatistics(object): 'Actual phase type: {0}'.format(self.return_phase)) - def getBinList(self,l_boundary,u_boundary,nbins = 100): - """ - Helper function for self.histplot(). Takes in two boundaries and - a number of bins and creates a list of bins which can be passed - to self.histplot(). - :param l_boundary: Any number. - :type l_boundary: float - :param u_boundary: Any number that is greater than l_boundary. - :type u_boundary: float - :param nbins: Any positive integer. - :type nbins: int - :return: A list of equidistant bins. - """ - if u_boundary <= l_boundary: - raise ValueError('Upper boundary must be greather than lower!') - elif nbins <= 0: - raise ValueError('Number of bins is not valid.') - binlist = [] - for i in range(nbins): - binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins) - return binlist - - - def histplot(self, array, binlist, xlab = 'Values', - ylab = 'Frequency', title = None, fnout = None): - """ - Method to quickly show some distribution of data. Takes array like data, - and a list of bins. Editing detail and inserting a legend is not possible. - :param array: List of values. - :type array: Array like - :param binlist: List of bins. - :type binlist: list - :param xlab: A label for the x-axes. - :type xlab: str - :param ylab: A label for the y-axes. - :type ylab: str - :param title: A title for the Plot. - :type title: str - :param fnout: A path to save the plot instead of showing. - Has to contain filename and type. Like: 'path/to/file.png' - :type fnout. str - :return: - - """ - import matplotlib.pyplot as plt - plt.hist(array,bins = binlist) - plt.xlabel(xlab) - plt.ylabel(ylab) - if title: - plt.title(title) - if fnout: - plt.savefig(fnout) - else: - plt.show() - - def getPDFDict(self, month, evt): """ Helper function for __iter__(). Should not be called directly. @@ -583,13 +518,19 @@ class PDFstatistics(object): def main(): root_dir ='/home/sebastianp/Codetesting/xmls/' Insheim = PDFstatistics(root_dir) - Insheim.makeFileList() + Insheim.make_fnlist() Insheim.set_return_phase('p') Insheim.getSTD() - qdlist = Insheim.getQDQ(0.3) - binlist = Insheim.getBinList(0.,3.) + qdlist = Insheim.quantile_distance_fractions(0.2) print qdlist if __name__ == "__main__": - main() \ No newline at end of file + import cProfile + + pr = cProfile.Profile() + pr.enable() + main() + pr.disable() + # after your program ends + pr.print_stats(sort="calls") \ No newline at end of file diff --git a/pylot/core/util/pdf.py b/pylot/core/util/pdf.py index 814b7112..dc0d78f7 100644 --- a/pylot/core/util/pdf.py +++ b/pylot/core/util/pdf.py @@ -363,6 +363,18 @@ class ProbabilityDensityFunction(object): return m def quantile_distance(self, prob_value): + """ + takes a probability value and and returns the distance + between two complementary quantiles + + .. math:: + + QA_\alpha = Q(1 - \alpha) - Q(\alpha) + + :param value: probability value :math:\alpha + :type value: float + :return: quantile distance + """ if 0 >= prob_value or prob_value >= 0.5: raise ValueError('Value out of range.') ql = self.quantile(prob_value) @@ -370,7 +382,20 @@ class ProbabilityDensityFunction(object): return qu - ql - def qtile_dist_quot(self,x): + def quantile_dist_frac(self, x): + """ + takes a probability value and returns the fraction of two + corresponding quantile distances ( + :func:`pylot.core.util.pdf.ProbabilityDensityFunction + #quantile_distance`) + + .. math:: + + Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)} + + :param value: probability value :math:\alpha + :return: quantile distance fraction + """ if x <= 0 or x >= 0.25: raise ValueError('Value out of range.') return self.quantile_distance(0.5-x)/self.quantile_distance(x) diff --git a/pylot/core/util/plotting.py b/pylot/core/util/plotting.py new file mode 100644 index 00000000..3efc3d86 --- /dev/null +++ b/pylot/core/util/plotting.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import matplotlib.pyplot as plt + +def create_bin_list(l_boundary, u_boundary, nbins=100): + """ + takes two boundaries and a number of bins and creates a list of bins for + histogram plotting + :param l_boundary: Any number. + :type l_boundary: float + :param u_boundary: Any number that is greater than l_boundary. + :type u_boundary: float + :param nbins: Any positive integer. + :type nbins: int + :return: A list of equidistant bins. + """ + if u_boundary <= l_boundary: + raise ValueError('Upper boundary must be greather than lower!') + elif nbins <= 0: + raise ValueError('Number of bins is not valid.') + binlist = [] + for i in range(nbins): + binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins) + return binlist + + +def histplot(array, binlist, xlab='Values', + ylab='Frequency', title=None, fnout=None): + """ + function to quickly show some distribution of data. Takes array like data, + and a list of bins. Editing detail and inserting a legend is not possible. + :param array: List of values. + :type array: Array like + :param binlist: List of bins. + :type binlist: list + :param xlab: A label for the x-axes. + :type xlab: str + :param ylab: A label for the y-axes. + :type ylab: str + :param title: A title for the Plot. + :type title: str + :param fnout: A path to save the plot instead of showing. + Has to contain filename and type. Like: 'path/to/file.png' + :type fnout. str + :return: - + """ + + plt.hist(array, bins=binlist) + plt.xlabel(xlab) + plt.ylabel(ylab) + if title: + plt.title(title) + if fnout: + plt.savefig(fnout) + else: + plt.show() \ No newline at end of file From f9d40439a98c0575db30454f0fbf7ad0296b9a16 Mon Sep 17 00:00:00 2001 From: Sebastianw Wehling-Benatelli Date: Fri, 19 Aug 2016 16:08:18 +0200 Subject: [PATCH 6/8] [change] oversampling reduced to avoid computational overhead --- pylot/core/util/pdf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylot/core/util/pdf.py b/pylot/core/util/pdf.py index dc0d78f7..39174b38 100644 --- a/pylot/core/util/pdf.py +++ b/pylot/core/util/pdf.py @@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object): raise ValueError('value out of bounds: {0}'.format(value)) return self.prob_limits((value, self.axis[-1])) - def prob_limits(self, limits, oversampling=10.): + def prob_limits(self, limits, oversampling=1.): sampling = self.incr / oversampling lim = np.arange(limits[0], limits[1], sampling) data = self.data(lim) From c500f1b8bb8e0c167acbb0a5bc5e19a4edff70da Mon Sep 17 00:00:00 2001 From: Sebastianw Wehling-Benatelli Date: Fri, 19 Aug 2016 16:09:29 +0200 Subject: [PATCH 7/8] [add] added new utility function to find a substring in a list of strings --- pylot/core/util/utils.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py index e86132d2..9f7697dc 100644 --- a/pylot/core/util/utils.py +++ b/pylot/core/util/utils.py @@ -107,6 +107,34 @@ def findComboBoxIndex(combo_box, val): """ return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0 +def find_in_list(list, str): + """ + takes a list of strings and a string and returns the first list item + matching the string pattern + :param list: list to search in + :param str: pattern to search for + :return: first list item containing pattern + + .. example:: + + >>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456'] + >>> find_in_list(l, 'dir') + '/dir/e1234.123.12' + >>> find_in_list(l, 'e1234') + '/dir/e1234.123.12' + >>> find_in_list(l, 'e2') + '/dir/e2345.123.12' + >>> find_in_list(l, 'ABC') + 'abc123' + >>> find_in_list(l, 'f456') + 'def456' + >>> find_in_list(l, 'gurke') + + """ + rlist = [s for s in list if str.lower() in s.lower()] + if rlist: + return rlist[0] + return None def find_nearest(array, value): ''' From f428a72e86c8dd494ec27e1899fee49273588638 Mon Sep 17 00:00:00 2001 From: Sebastianw Wehling-Benatelli Date: Fri, 19 Aug 2016 16:32:31 +0200 Subject: [PATCH 8/8] [enhancement] improved functionality and flexibility of PDFstatistics class --- pylot/core/pick/compare.py | 190 +++++++++++++++---------------------- 1 file changed, 75 insertions(+), 115 deletions(-) diff --git a/pylot/core/pick/compare.py b/pylot/core/pick/compare.py index 15f604f6..3150d157 100644 --- a/pylot/core/pick/compare.py +++ b/pylot/core/pick/compare.py @@ -12,6 +12,7 @@ from obspy import read_events from pylot.core.io.phases import picksdict_from_picks from pylot.core.util.pdf import ProbabilityDensityFunction +from pylot.core.util.utils import find_in_list from pylot.core.util.version import get_git_version as _getVersionString __version__ = _getVersionString() @@ -356,9 +357,9 @@ class PDFstatistics(object): def __init__(self, directory): """Initiates some values needed when dealing with pdfs later""" - self.directory = directory - self.evtlist = list() - self.return_phase = None + self._rootdir = directory + self._evtlist = list() + self._rphase = None self.make_fnlist() def make_fnlist(self, fn_pattern='*.xml'): @@ -368,136 +369,97 @@ class PDFstatistics(object): :type fn_pattern: string :return: creates a list of events saved in the PDFstatistics object. """ - evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern) - if not evtlist: - for root, _, files in os.walk(self.directory): - for file in files: - if file.endswith(fn_pattern[1:]): - evtlist.append(os.path.join(root, file)) - self.evtlist = evtlist + evtlist = list() + for root, _, files in os.walk(self.root): + for file in files: + if file.endswith(fn_pattern[1:]): + evtlist.append(os.path.join(root, file)) + self._evtlist = evtlist def __iter__(self): - """Iterating over the PDFstatistics object yields every single pdf from the list of events""" - assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...' - for evt in self.evtlist: - self.getPDFDict(self.directory, evt) - for station, pdfs in self.pdfdict.pdf_data.items(): - try: - yield pdfs[self.return_phase] - except KeyError: - continue + for evt in self._evtlist: + yield PDFDictionary.from_quakeml(evt) - def set_return_phase(self, type): + def __getitem__(self, item): + evt = find_in_list(self._evtlist, item) + if evt: + return PDFDictionary.from_quakeml(evt) + return None + + @property + def root(self): + return self._rootdir + + @root.setter + def root(self, value): + if os.path.exists(value): + self._rootdir = value + else: + raise ValueError("path doesn't exist: %s" % value) + + @property + def curphase(self): """ - Sets the phase typ of event data that is returned on iteration over the object. - :param type: can be either p (p-phase) or s (s-phase). - :type type: string + return the current phase type of interest + :return: current phase + """ + return self._rphase + + @curphase.setter + def curphase(self, type): + """ + setter method for property curphase + :param type: specify the phase type of interest + :type type: string ('p' or 's') :return: - """ if type.upper() not in 'PS': raise ValueError("phase type must be either 'P' or 'S'!") else: - self.return_phase = type.upper() + self._rphase = type.upper() - def quantile_distances(self, value): + def get(self, property='std', value=None): """ - takes a probability value and and returns the distance - between two complementary quantiles - - .. math:: - - QA_\alpha = Q(1 - \alpha) - Q(\alpha) + takes a property str and a probability value and returns all + property's values for the current phase of interest + :func:`self.curphase` + :param property: property name (default: 'std') + :type property: str :param value: probability value :math:\alpha :type value: float - :return: list of all quantile distances for all pdfs in - the list of events. + :return: list containing all property's values """ + assert isinstance(self.curphase, + str), 'phase has to be set before being ' \ + 'able to iterate over items...' rlist = [] - for pdf in self: - rval = pdf.quantile_distance(value) - rlist.append(rval) - return rlist + method_options = dict(STD='standard_deviation', + Q='quantile', + QD='quantile_distance', + QDF='quantile_dist_frac') - - def quantile_distance_fractions(self, value): - """ - takes a probability value and returns the fraction of two - corresponding quantile distances - - .. math:: - - Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)} - - :param value: probability value :math:\alpha - :return: returns a list of all quantile fractions for all pdfs in - the list of events. - """ - rlist = list() - for pdf in self: - rval = pdf.quantile_dist_frac(value) - rlist.append(rval) - return rlist - - - def getSTD(self): - """ - Iterates over PDFstatistics object and returns the standard - deviation of all pdfs in the list of events. - :return: saves an instance of self.p_stdarray or - self.s_stdarray, depending on set phase. - """ - std = [] - for pdf in self: + # create method caller for easy mapping + if property.upper() == 'STD': + method = operator.methodcaller(method_options[property.upper()]) + elif value is not None: try: - std.append(pdf.standard_deviation()) + method = operator.methodcaller(method_options[property.upper()], + value) except KeyError: - continue - std = np.array(std) - self.set_stdarray(std) - - - def set_stdarray(self, array): - """ - Helper function for self.getSTD(). This function - should not be called directly. - """ - if self.return_phase == 'P': - self.p_stdarray = array - elif self.return_phase == 'S': - self.s_stdarray = array + raise KeyError('unknwon property: {0}'.format(property.upper())) else: - raise ValueError('phase type not set properly...\n' - 'Actual phase type: {0}'.format(self.return_phase)) + raise ValueError("for call to method {0} value has to be " + "defined but is 'None' ".format(method_options[ + property.upper()])) + for pdf_dict in self: + # create worklist + wlist = pdf_dict.get_all(self.curphase) + # map method calls to object in worklist + rlist += map(method, wlist) - def getPDFDict(self, month, evt): - """ - Helper function for __iter__(). Should not be called directly. - """ - self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt)) - - - def getStatistics(self): - """ - On call function will get mean, median and standard deviation values - from self.p_stdarray and self.s_stdarray. Both must be - instances before calling this function. - :return: Creates instances of self.p_mean, self.p_std_std and self.p_median - for both phases (simultaneously) for the PDFstatistics object. - """ - if not self.p_stdarray or not self.s_stdarray: - raise NotImplementedError('Arrays are not properly set yet!') - elif type(self.p_stdarray) != type(np.zeros(1)) or type(self.s_stdarray) != type(np.zeros(1)): - raise TypeError('Array is not a proper numpy array.') - - self.p_mean = self.p_stdarray.mean() - self.p_std_std = self.p_stdarray.std() - self.p_median = np.median(self.p_stdarray) - self.s_mean = self.s_stdarray.mean() - self.s_std_std = self.s_stdarray.std() - self.s_median = np.median(self.s_stdarray) - + return rlist def writeThetaToFile(self,array,out_dir): """ @@ -518,10 +480,8 @@ class PDFstatistics(object): def main(): root_dir ='/home/sebastianp/Codetesting/xmls/' Insheim = PDFstatistics(root_dir) - Insheim.make_fnlist() - Insheim.set_return_phase('p') - Insheim.getSTD() - qdlist = Insheim.quantile_distance_fractions(0.2) + Insheim.curphase = 'p' + qdlist = Insheim.get('qdf', 0.2) print qdlist