diff --git a/pylot/core/pick/compare.py b/pylot/core/pick/compare.py index 2cb128b7..3150d157 100644 --- a/pylot/core/pick/compare.py +++ b/pylot/core/pick/compare.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import copy +import operator import os import numpy as np import glob @@ -11,6 +12,7 @@ from obspy import read_events from pylot.core.io.phases import picksdict_from_picks from pylot.core.util.pdf import ProbabilityDensityFunction +from pylot.core.util.utils import find_in_list from pylot.core.util.version import get_git_version as _getVersionString __version__ = _getVersionString() @@ -151,7 +153,6 @@ class Comparison(object): return rlist def get_array(self, phase, method_name): - import operator method = operator.methodcaller(method_name) pdf_list = self.get_all(phase) rarray = map(method, pdf_list) @@ -258,6 +259,15 @@ class PDFDictionary(object): 'time is not implemented yet! Sorry!') return PDFDictionary(picksdict_from_picks(cat[0])) + def get_all(self, phase): + rlist = list() + for phases in self.pdf_data.values(): + try: + rlist.append(phases[phase]) + except KeyError: + continue + return rlist + def generate_pdf_data(self, type='exp'): """ Returns probabiliy density function dictionary containing the @@ -339,143 +349,148 @@ class PDFDictionary(object): class PDFstatistics(object): - ''' - To do: - plots for std, quantiles, - ''' + """ + This object can be used to get various statistic values from probabillity density functions. + Takes a path as argument. + """ + + def __init__(self, directory): - self.directory = directory - self.evtlist = list() - self.return_phase = None + """Initiates some values needed when dealing with pdfs later""" + self._rootdir = directory + self._evtlist = list() + self._rphase = None + self.make_fnlist() - def readTheta(self, arname, dir, fnpattern): - exec('self.' + arname +' = []') - filelist = glob.glob1(dir, fnpattern) - for file in filelist: - fid = open(os.path.join(dir,file), 'r') - list = [] - for line in fid.readlines(): - list.append(eval(line)) - exec('self.' + arname + ' += list') - fid.close() - - def makeFileList(self, fn_pattern='*.xml'): + def make_fnlist(self, fn_pattern='*.xml'): + """ + Takes a file pattern and searches for that recursively in the set path for the object. + :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml' + :type fn_pattern: string + :return: creates a list of events saved in the PDFstatistics object. + """ evtlist = list() - evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern) - if not evtlist: - for root, _, files in os.walk(self.directory): - for file in files: - if file.endswith(fn_pattern[1:]): - evtlist.append(os.path.join(root, file)) - self.evtlist = evtlist + for root, _, files in os.walk(self.root): + for file in files: + if file.endswith(fn_pattern[1:]): + evtlist.append(os.path.join(root, file)) + self._evtlist = evtlist def __iter__(self): - assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...' - for evt in self.evtlist: - self.getPDFDict(self.directory, evt) - for station, pdfs in self.pdfdict.pdf_data.items(): - try: - yield pdfs[self.return_phase] - except KeyError: - continue + for evt in self._evtlist: + yield PDFDictionary.from_quakeml(evt) - def set_return_phase(self, type): + def __getitem__(self, item): + evt = find_in_list(self._evtlist, item) + if evt: + return PDFDictionary.from_quakeml(evt) + return None + + @property + def root(self): + return self._rootdir + + @root.setter + def root(self, value): + if os.path.exists(value): + self._rootdir = value + else: + raise ValueError("path doesn't exist: %s" % value) + + @property + def curphase(self): + """ + return the current phase type of interest + :return: current phase + """ + return self._rphase + + @curphase.setter + def curphase(self, type): + """ + setter method for property curphase + :param type: specify the phase type of interest + :type type: string ('p' or 's') + :return: - + """ if type.upper() not in 'PS': raise ValueError("phase type must be either 'P' or 'S'!") else: - self.return_phase = type.upper() + self._rphase = type.upper() - def getQD(self,value): - QDlist = [] - for pdf in self: - QD = pdf.quantile_distance(value) - QDlist.append(QD) - return QDlist + def get(self, property='std', value=None): + """ + takes a property str and a probability value and returns all + property's values for the current phase of interest + :func:`self.curphase` + :param property: property name (default: 'std') + :type property: str + :param value: probability value :math:\alpha + :type value: float + :return: list containing all property's values + """ + assert isinstance(self.curphase, + str), 'phase has to be set before being ' \ + 'able to iterate over items...' + rlist = [] + method_options = dict(STD='standard_deviation', + Q='quantile', + QD='quantile_distance', + QDF='quantile_dist_frac') - def getQDQ(self,value): - QDQlist = [] - for pdf in self: - QDQ = pdf.qtile_dist_quot(value) - QDQlist.append(QDQ) - return QDQlist - - - def getSTD(self): - std = [] - for pdf in self: + # create method caller for easy mapping + if property.upper() == 'STD': + method = operator.methodcaller(method_options[property.upper()]) + elif value is not None: try: - std.append(pdf.standard_deviation()) + method = operator.methodcaller(method_options[property.upper()], + value) except KeyError: - continue - std = np.array(std) - self.set_stdarray(std) - - - def set_stdarray(self, array): - if self.return_phase == 'P': - self.p_stdarray = array - elif self.return_phase == 'S': - self.s_stdarray = array + raise KeyError('unknwon property: {0}'.format(property.upper())) else: - raise ValueError('phase type not set properly...\n' - 'Actual phase type: {0}'.format(self.return_phase)) + raise ValueError("for call to method {0} value has to be " + "defined but is 'None' ".format(method_options[ + property.upper()])) + for pdf_dict in self: + # create worklist + wlist = pdf_dict.get_all(self.curphase) + # map method calls to object in worklist + rlist += map(method, wlist) - def getBinList(self,l_boundary,u_boundary,nbins = 100): - binlist = [] - for i in range(nbins): - binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins) - return binlist + return rlist - - def histplot(self, array, binlist, xlab = 'Values', - ylab = 'Frequency', title = None, label=None, - fnout = None): - import matplotlib.pyplot as plt - plt.hist(array,bins = binlist) - plt.xlabel('Values') - plt.ylabel('Frequency') - if title: - title_str = 'Quantile distance quotient distribution' - if label: - title_str += ' (' + label + ')' - plt.title(title_str) - if fnout: - plt.savefig(fnout+'histplot.png') - else: - plt.show() - - - def getPDFDict(self, month, evt): - self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt)) - - - def getStatistics(self): - self.p_mean = self.p_stdarray.mean() - self.p_std_std = self.p_stdarray.std() - self.p_median = np.median(self.p_stdarray) - self.s_mean = self.s_stdarray.mean() - self.s_std_std = self.s_stdarray.std() - self.s_median = np.median(self.s_stdarray) - - - def writeThetaToFile(self,array,out_dir,filename = None): - fid = open(os.path.join(out_dir,filename), 'w') + def writeThetaToFile(self,array,out_dir): + """ + Method to write array like data to file. Useful since acquiring can take + serious amount of time when dealing with large databases. + :param array: List of values. + :type array: list + :param out_dir: Path to save file to including file name. + :type out_dir: str + :return: Saves a file at given output directory. + """ + fid = open(os.path.join(out_dir), 'w') for val in array: fid.write(str(val)+'\n') fid.close() + def main(): root_dir ='/home/sebastianp/Codetesting/xmls/' Insheim = PDFstatistics(root_dir) - Insheim.makeFileList() - Insheim.set_return_phase('p') - Insheim.getSTD() - qdlist = Insheim.getQDQ(0.3) - binlist = Insheim.getBinList(0.,3.) + Insheim.curphase = 'p' + qdlist = Insheim.get('qdf', 0.2) print qdlist if __name__ == "__main__": - main() \ No newline at end of file + import cProfile + + pr = cProfile.Profile() + pr.enable() + main() + pr.disable() + # after your program ends + pr.print_stats(sort="calls") \ No newline at end of file diff --git a/pylot/core/util/pdf.py b/pylot/core/util/pdf.py index 94c847ce..39174b38 100644 --- a/pylot/core/util/pdf.py +++ b/pylot/core/util/pdf.py @@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object): raise ValueError('value out of bounds: {0}'.format(value)) return self.prob_limits((value, self.axis[-1])) - def prob_limits(self, limits, oversampling=10.): + def prob_limits(self, limits, oversampling=1.): sampling = self.incr / oversampling lim = np.arange(limits[0], limits[1], sampling) data = self.data(lim) @@ -363,13 +363,40 @@ class ProbabilityDensityFunction(object): return m def quantile_distance(self, prob_value): + """ + takes a probability value and and returns the distance + between two complementary quantiles + + .. math:: + + QA_\alpha = Q(1 - \alpha) - Q(\alpha) + + :param value: probability value :math:\alpha + :type value: float + :return: quantile distance + """ + if 0 >= prob_value or prob_value >= 0.5: + raise ValueError('Value out of range.') ql = self.quantile(prob_value) qu = self.quantile(1 - prob_value) return qu - ql - def qtile_dist_quot(self,x): - if x <= 0 or x >= 0.5: + def quantile_dist_frac(self, x): + """ + takes a probability value and returns the fraction of two + corresponding quantile distances ( + :func:`pylot.core.util.pdf.ProbabilityDensityFunction + #quantile_distance`) + + .. math:: + + Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)} + + :param value: probability value :math:\alpha + :return: quantile distance fraction + """ + if x <= 0 or x >= 0.25: raise ValueError('Value out of range.') return self.quantile_distance(0.5-x)/self.quantile_distance(x) diff --git a/pylot/core/util/plotting.py b/pylot/core/util/plotting.py new file mode 100644 index 00000000..3efc3d86 --- /dev/null +++ b/pylot/core/util/plotting.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import matplotlib.pyplot as plt + +def create_bin_list(l_boundary, u_boundary, nbins=100): + """ + takes two boundaries and a number of bins and creates a list of bins for + histogram plotting + :param l_boundary: Any number. + :type l_boundary: float + :param u_boundary: Any number that is greater than l_boundary. + :type u_boundary: float + :param nbins: Any positive integer. + :type nbins: int + :return: A list of equidistant bins. + """ + if u_boundary <= l_boundary: + raise ValueError('Upper boundary must be greather than lower!') + elif nbins <= 0: + raise ValueError('Number of bins is not valid.') + binlist = [] + for i in range(nbins): + binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins) + return binlist + + +def histplot(array, binlist, xlab='Values', + ylab='Frequency', title=None, fnout=None): + """ + function to quickly show some distribution of data. Takes array like data, + and a list of bins. Editing detail and inserting a legend is not possible. + :param array: List of values. + :type array: Array like + :param binlist: List of bins. + :type binlist: list + :param xlab: A label for the x-axes. + :type xlab: str + :param ylab: A label for the y-axes. + :type ylab: str + :param title: A title for the Plot. + :type title: str + :param fnout: A path to save the plot instead of showing. + Has to contain filename and type. Like: 'path/to/file.png' + :type fnout. str + :return: - + """ + + plt.hist(array, bins=binlist) + plt.xlabel(xlab) + plt.ylabel(ylab) + if title: + plt.title(title) + if fnout: + plt.savefig(fnout) + else: + plt.show() \ No newline at end of file diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py index b24efb4d..9f7697dc 100644 --- a/pylot/core/util/utils.py +++ b/pylot/core/util/utils.py @@ -18,7 +18,6 @@ def _pickle_method(m): return getattr, (m.im_self, m.im_func.func_name) def fit_curve(x, y): - return splev, splrep(x, y) def getindexbounds(f, eta): @@ -83,8 +82,8 @@ def clims(lim1, lim2): def demeanTrace(trace, window): """ - returns the DATA where each trace is demean by the average value within - WINDOW + takes a trace object and returns the same trace object but with data + demeaned within a certain time window :param trace: waveform trace object :type trace: `~obspy.core.stream.Trace` :param window: @@ -101,31 +100,73 @@ def findComboBoxIndex(combo_box, val): Function findComboBoxIndex takes a QComboBox object and a string and returns either 0 or the index throughout all QComboBox items. :param combo_box: Combo box object. - :type combo_box: QComboBox + :type combo_box: `~QComboBox` :param val: Name of a combo box to search for. - :type val: + :type val: basestring :return: index value of item with name val or 0 """ return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0 +def find_in_list(list, str): + """ + takes a list of strings and a string and returns the first list item + matching the string pattern + :param list: list to search in + :param str: pattern to search for + :return: first list item containing pattern + + .. example:: + + >>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456'] + >>> find_in_list(l, 'dir') + '/dir/e1234.123.12' + >>> find_in_list(l, 'e1234') + '/dir/e1234.123.12' + >>> find_in_list(l, 'e2') + '/dir/e2345.123.12' + >>> find_in_list(l, 'ABC') + 'abc123' + >>> find_in_list(l, 'f456') + 'def456' + >>> find_in_list(l, 'gurke') + + """ + rlist = [s for s in list if str.lower() in s.lower()] + if rlist: + return rlist[0] + return None def find_nearest(array, value): ''' - Function find_nearest takes an array and a value and returns the - index of the nearest value found in the array. - :param array: - :param value: - :return: + function find_nearest takes an array and a value and returns the + index of the nearest value found in the array + :param array: array containing values + :type array: `~numpy.ndarray` + :param value: number searched for + :return: index of the array item being nearest to the value + + >>> a = np.array([ 1.80339578, -0.72546654, 0.95769195, -0.98320759, 0.85922623]) + >>> find_nearest(a, 1.3) + 2 + >>> find_nearest(a, 0) + 1 + >>> find_nearest(a, 2) + 0 + >>> find_nearest(a, -1) + 3 + >>> a = np.array([ 1.1, -0.7, 0.9, -0.9, 0.8]) + >>> find_nearest(a, 0.849) + 4 ''' return (np.abs(array - value)).argmin() def fnConstructor(s): ''' - - :param s: - :type s: - :return: + takes a string and returns a valid filename (especially on windows machines) + :param s: desired filename + :type s: str + :return: valid filename ''' if type(s) is str: s = s.split(':')[-1] @@ -143,7 +184,21 @@ def fnConstructor(s): def four_digits(year): - if year + 2000 < UTCDateTime.utcnow().year: + """ + takes a two digit year integer and returns the correct four digit equivalent + from the last 100 years + :param year: two digit year + :type year: int + :return: four digit year correspondant + + >>> four_digits(20) + 1920 + >>> four_digits(16) + 2016 + >>> four_digits(00) + 2000 + """ + if year + 2000 <= UTCDateTime.utcnow().year: year += 2000 else: year += 1900 @@ -152,10 +207,11 @@ def four_digits(year): def getGlobalTimes(stream): ''' - - :param stream: - :type stream - :return: + takes a stream object and returns the latest end and the earliest start + time of all contained trace objects + :param stream: seismological data stream + :type stream: `~obspy.core.stream.Stream` + :return: minimum start time and maximum end time ''' min_start = UTCDateTime() max_end = None @@ -169,6 +225,8 @@ def getGlobalTimes(stream): def getHash(time): ''' + takes a time object and returns the corresponding SHA1 hash of the + formatted date string :param time: time object for which a hash should be calculated :type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object :return: str @@ -180,32 +238,31 @@ def getHash(time): def getLogin(): ''' - - :return: + returns the actual user's login ID + :return: login ID ''' return pwd.getpwuid(os.getuid())[0] def getOwner(fn): ''' - - :param fn: - :type fn: - :return: + takes a filename and return the login ID of the actual owner of the file + :param fn: filename of the file tested + :type fn: str + :return: login ID of the file's owner ''' return pwd.getpwuid(os.stat(fn).st_uid).pw_name def getPatternLine(fn, pattern): """ - Takes a file name and a pattern string to search for in the file and - returns the first line which contains the pattern string otherwise None. - + takes a file name and a pattern string to search for in the file and + returns the first line which contains the pattern string otherwise 'None' :param fn: file name :type fn: str :param pattern: pattern string to search for :type pattern: str - :return: the complete line containing pattern or None + :return: the complete line containing the pattern string or None >>> getPatternLine('utils.py', 'python') '#!/usr/bin/env python\\n' @@ -223,22 +280,52 @@ def getPatternLine(fn, pattern): def isSorted(iterable): ''' - - :param iterable: + takes an iterable and returns 'True' if the items are in order otherwise + 'False' + :param iterable: an iterable object :type iterable: - :return: + :return: Boolean + + >>> isSorted(1) + Traceback (most recent call last): + ... + AssertionError: object is not iterable; object: 1 + >>> isSorted([1,2,3,4]) + True + >>> isSorted('abcd') + True + >>> isSorted('bcad') + False + >>> isSorted([2,3,1,4]) + False ''' + assert isIterable(iterable), 'object is not iterable; object: {' \ + '0}'.format(iterable) + if type(iterable) is str: + iterable = [s for s in iterable] return sorted(iterable) == iterable +def isIterable(obj): + """ + takes a python object and returns 'True' is the object is iterable and + 'False' otherwise + :param obj: a python object + :return: True of False + """ + try: + iterator = iter(obj) + except TypeError as te: + return False + return True + def prepTimeAxis(stime, trace): ''' - - :param stime: - :type stime: - :param trace: - :type trace: - :return: + takes a starttime and a trace object and returns a valid time axis for + plotting + :param stime: start time of the actual seismogram as UTCDateTime + :param trace: seismic trace object + :return: valid numpy array with time stamps for plotting ''' nsamp = trace.stats.npts srate = trace.stats.sampling_rate @@ -294,7 +381,6 @@ def runProgram(cmd, parameter=None): """ run an external program specified by cmd with parameters input returning the stdout output - :param cmd: name of the command to run :type cmd: str :param parameter: filename of parameter file or parameter string