Merge branch 'develop' of ariadne.geophysik.ruhr-uni-bochum.de:/data/git/pylot into develop

2016-08-24 13:13:41 +02:00 · 2016-08-24 13:13:41 +02:00 · e51704f2b7
commit e51704f2b7
parent 9c3770ea9f 74d31afa3d
4 changed files with 339 additions and 154 deletions
--- a/pylot/core/pick/compare.py
+++ b/pylot/core/pick/compare.py
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-

 import copy
+import operator
 import os
 import numpy as np
 import glob
@ -11,6 +12,7 @@ from obspy import read_events

 from pylot.core.io.phases import picksdict_from_picks
 from pylot.core.util.pdf import ProbabilityDensityFunction
+from pylot.core.util.utils import find_in_list
 from pylot.core.util.version import get_git_version as _getVersionString

 __version__ = _getVersionString()
@ -151,7 +153,6 @@ class Comparison(object):
        return rlist

    def get_array(self, phase, method_name):
-        import operator
        method = operator.methodcaller(method_name)
        pdf_list = self.get_all(phase)
        rarray = map(method, pdf_list)
@ -258,6 +259,15 @@ class PDFDictionary(object):
                                      'time is not implemented yet! Sorry!')
        return PDFDictionary(picksdict_from_picks(cat[0]))

+    def get_all(self, phase):
+        rlist = list()
+        for phases in self.pdf_data.values():
+            try:
+                rlist.append(phases[phase])
+            except KeyError:
+                continue
+        return rlist
+
    def generate_pdf_data(self, type='exp'):
        """
        Returns probabiliy density function dictionary containing the
@ -339,143 +349,148 @@ class PDFDictionary(object):


 class PDFstatistics(object):
-    '''
-    To do:
-    plots for std, quantiles,
-    '''
+    """
+    This object can be used to get various statistic values from probabillity density functions.
+    Takes a path as argument.
+    """
+
+
    def __init__(self, directory):
-        self.directory = directory
-        self.evtlist = list()
-        self.return_phase = None
+        """Initiates some values needed when dealing with pdfs later"""
+        self._rootdir = directory
+        self._evtlist = list()
+        self._rphase = None
+        self.make_fnlist()

-    def readTheta(self, arname, dir, fnpattern):
-        exec('self.' + arname +' = []')
-        filelist = glob.glob1(dir, fnpattern)
-        for file in filelist:
-            fid = open(os.path.join(dir,file), 'r')
-            list = []
-            for line in fid.readlines():
-                list.append(eval(line))
-            exec('self.' + arname + ' += list')
-            fid.close()
-
-    def makeFileList(self, fn_pattern='*.xml'):
+    def make_fnlist(self, fn_pattern='*.xml'):
+        """
+        Takes a file pattern and searches for that recursively in the set path for the object.
+        :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml'
+        :type  fn_pattern: string
+        :return: creates a list of events saved in the PDFstatistics object.
+        """
        evtlist = list()
-        evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern)
-        if not evtlist:
-             for root, _, files in os.walk(self.directory):
-                 for file in files:
-                     if file.endswith(fn_pattern[1:]):
-                         evtlist.append(os.path.join(root, file))
-        self.evtlist = evtlist
+        for root, _, files in os.walk(self.root):
+            for file in files:
+                if file.endswith(fn_pattern[1:]):
+                    evtlist.append(os.path.join(root, file))
+        self._evtlist = evtlist

    def __iter__(self):
-        assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...'
-        for evt in self.evtlist:
-            self.getPDFDict(self.directory, evt)
-            for station, pdfs in self.pdfdict.pdf_data.items():
-                try:
-                    yield pdfs[self.return_phase]
-                except KeyError:
-                    continue
+        for evt in self._evtlist:
+            yield PDFDictionary.from_quakeml(evt)

-    def set_return_phase(self, type):
+    def __getitem__(self, item):
+        evt = find_in_list(self._evtlist, item)
+        if evt:
+            return PDFDictionary.from_quakeml(evt)
+        return None
+
+    @property
+    def root(self):
+        return self._rootdir
+
+    @root.setter
+    def root(self, value):
+        if os.path.exists(value):
+            self._rootdir = value
+        else:
+            raise ValueError("path doesn't exist: %s" % value)
+
+    @property
+    def curphase(self):
+        """
+        return the current phase type of interest
+        :return: current phase
+        """
+        return self._rphase
+
+    @curphase.setter
+    def curphase(self, type):
+        """
+        setter method for property curphase
+        :param type: specify the phase type of interest
+        :type  type: string ('p' or 's')
+        :return: -
+        """
        if type.upper() not in 'PS':
            raise ValueError("phase type must be either 'P' or 'S'!")
        else:
-            self.return_phase = type.upper()
+            self._rphase = type.upper()

-    def getQD(self,value):
-        QDlist = []
-        for pdf in self:
-            QD = pdf.quantile_distance(value)
-            QDlist.append(QD)
-        return QDlist
+    def get(self, property='std', value=None):
+        """
+        takes a property str and a probability value and returns all
+        property's values for the current phase of interest
+        :func:`self.curphase`

+        :param property: property name (default: 'std')
+        :type property: str
+        :param value: probability value :math:\alpha
+        :type  value: float
+        :return: list containing all property's values
+        """
+        assert isinstance(self.curphase,
+                          str), 'phase has to be set before being ' \
+                                'able to iterate over items...'
+        rlist = []
+        method_options = dict(STD='standard_deviation',
+                              Q='quantile',
+                              QD='quantile_distance',
+                              QDF='quantile_dist_frac')

-    def getQDQ(self,value):
-        QDQlist = []
-        for pdf in self:
-            QDQ = pdf.qtile_dist_quot(value)
-            QDQlist.append(QDQ)
-        return QDQlist
-
-
-    def getSTD(self):
-        std = []
-        for pdf in self:
+        # create method caller for easy mapping
+        if property.upper() == 'STD':
+            method = operator.methodcaller(method_options[property.upper()])
+        elif value is not None:
            try:
-                std.append(pdf.standard_deviation())
+                method = operator.methodcaller(method_options[property.upper()],
+                                               value)
            except KeyError:
-                continue
-        std = np.array(std)
-        self.set_stdarray(std)
-
-
-    def set_stdarray(self, array):
-        if self.return_phase == 'P':
-            self.p_stdarray = array
-        elif self.return_phase == 'S':
-            self.s_stdarray = array
+                raise KeyError('unknwon property: {0}'.format(property.upper()))
        else:
-            raise ValueError('phase type not set properly...\n'
-                             'Actual phase type: {0}'.format(self.return_phase))
+            raise ValueError("for call to method {0} value has to be "
+                             "defined but is 'None' ".format(method_options[
+                                                             property.upper()]))

+        for pdf_dict in self:
+            # create worklist
+            wlist = pdf_dict.get_all(self.curphase)
+            # map method calls to object in worklist
+            rlist += map(method, wlist)

-    def getBinList(self,l_boundary,u_boundary,nbins = 100):
-        binlist = []
-        for i in range(nbins):
-            binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins)
-        return  binlist
+        return rlist

-
-    def histplot(self, array, binlist, xlab = 'Values',
-                 ylab = 'Frequency', title = None, label=None,
-                 fnout = None):
-        import matplotlib.pyplot as plt
-        plt.hist(array,bins = binlist)
-        plt.xlabel('Values')
-        plt.ylabel('Frequency')
-        if title:
-            title_str = 'Quantile distance quotient distribution'
-            if label:
-                title_str += ' (' + label + ')'
-            plt.title(title_str)
-        if fnout:
-            plt.savefig(fnout+'histplot.png')
-        else:
-            plt.show()
-
-
-    def getPDFDict(self, month, evt):
-        self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt))
-
-
-    def getStatistics(self):
-        self.p_mean = self.p_stdarray.mean()
-        self.p_std_std = self.p_stdarray.std()
-        self.p_median = np.median(self.p_stdarray)
-        self.s_mean = self.s_stdarray.mean()
-        self.s_std_std = self.s_stdarray.std()
-        self.s_median = np.median(self.s_stdarray)
-
-
-    def writeThetaToFile(self,array,out_dir,filename = None):
-        fid = open(os.path.join(out_dir,filename), 'w')
+    def writeThetaToFile(self,array,out_dir):
+        """
+        Method to write array like data to file. Useful since acquiring can take
+        serious amount of time when dealing with large databases.
+        :param array: List of values.
+        :type  array: list
+        :param out_dir: Path to save file to including file name.
+        :type  out_dir: str
+        :return: Saves a file at given output directory.
+        """
+        fid = open(os.path.join(out_dir), 'w')
        for val in array:
            fid.write(str(val)+'\n')
        fid.close()

+
 def main():
    root_dir ='/home/sebastianp/Codetesting/xmls/'
    Insheim = PDFstatistics(root_dir)
-    Insheim.makeFileList()
-    Insheim.set_return_phase('p')
-    Insheim.getSTD()
-    qdlist = Insheim.getQDQ(0.3)
-    binlist = Insheim.getBinList(0.,3.)
+    Insheim.curphase = 'p'
+    qdlist = Insheim.get('qdf', 0.2)
    print qdlist


 if __name__ == "__main__":
-    main()
+    import cProfile
+
+    pr = cProfile.Profile()
+    pr.enable()
+    main()
+    pr.disable()
+    # after your program ends
+    pr.print_stats(sort="calls")
--- a/pylot/core/util/pdf.py
+++ b/pylot/core/util/pdf.py
@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object):
            raise ValueError('value out of bounds: {0}'.format(value))
        return self.prob_limits((value, self.axis[-1]))

-    def prob_limits(self, limits, oversampling=10.):
+    def prob_limits(self, limits, oversampling=1.):
        sampling = self.incr / oversampling
        lim = np.arange(limits[0], limits[1], sampling)
        data = self.data(lim)
@ -363,13 +363,40 @@ class ProbabilityDensityFunction(object):
        return m

    def quantile_distance(self, prob_value):
+        """
+        takes a probability value and and returns the distance
+        between two complementary quantiles
+
+        .. math::
+
+            QA_\alpha = Q(1 - \alpha) - Q(\alpha)
+
+        :param value: probability value :math:\alpha
+        :type  value: float
+        :return: quantile distance
+        """
+        if 0 >= prob_value or prob_value >= 0.5:
+            raise ValueError('Value out of range.')
        ql = self.quantile(prob_value)
        qu = self.quantile(1 - prob_value)
        return qu - ql


-    def qtile_dist_quot(self,x):
-        if x <= 0 or x >= 0.5:
+    def quantile_dist_frac(self, x):
+        """
+        takes a probability value and returns the fraction of two
+        corresponding quantile distances (
+        :func:`pylot.core.util.pdf.ProbabilityDensityFunction
+        #quantile_distance`)
+
+        .. math::
+
+            Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)}
+
+        :param value: probability value :math:\alpha
+        :return: quantile distance fraction
+        """
+        if x <= 0 or x >= 0.25:
            raise ValueError('Value out of range.')
        return self.quantile_distance(0.5-x)/self.quantile_distance(x)

--- a/pylot/core/util/plotting.py
+++ b/pylot/core/util/plotting.py
@ -0,0 +1,57 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import matplotlib.pyplot as plt
+
+def create_bin_list(l_boundary, u_boundary, nbins=100):
+    """
+    takes two boundaries and a number of bins and creates a list of bins for
+    histogram plotting
+    :param l_boundary: Any number.
+    :type  l_boundary: float
+    :param u_boundary: Any number that is greater than l_boundary.
+    :type  u_boundary: float
+    :param nbins: Any positive integer.
+    :type  nbins: int
+    :return: A list of equidistant bins.
+    """
+    if u_boundary <= l_boundary:
+        raise ValueError('Upper boundary must be greather than lower!')
+    elif nbins <= 0:
+        raise ValueError('Number of bins is not valid.')
+    binlist = []
+    for i in range(nbins):
+        binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins)
+    return binlist
+
+
+def histplot(array, binlist, xlab='Values',
+             ylab='Frequency', title=None, fnout=None):
+    """
+    function to quickly show some distribution of data. Takes array like data,
+    and a list of bins. Editing detail and inserting a legend is not possible.
+    :param array: List of values.
+    :type  array: Array like
+    :param binlist: List of bins.
+    :type  binlist: list
+    :param xlab: A label for the x-axes.
+    :type  xlab: str
+    :param ylab: A label for the y-axes.
+    :type  ylab: str
+    :param title: A title for the Plot.
+    :type  title: str
+    :param fnout: A path to save the plot instead of showing.
+                  Has to contain filename and type. Like: 'path/to/file.png'
+    :type  fnout. str
+    :return: -
+    """
+
+    plt.hist(array, bins=binlist)
+    plt.xlabel(xlab)
+    plt.ylabel(ylab)
+    if title:
+        plt.title(title)
+    if fnout:
+        plt.savefig(fnout)
+    else:
+        plt.show()
--- a/pylot/core/util/utils.py
+++ b/pylot/core/util/utils.py
@ -18,7 +18,6 @@ def _pickle_method(m):
        return getattr, (m.im_self, m.im_func.func_name)

 def fit_curve(x, y):
-
    return splev, splrep(x, y)

 def getindexbounds(f, eta):
@ -83,8 +82,8 @@ def clims(lim1, lim2):

 def demeanTrace(trace, window):
    """
-    returns the DATA where each trace is demean by the average value within
-    WINDOW
+    takes a trace object and returns the same trace object but with data
+    demeaned within a certain time window
    :param trace: waveform trace object
    :type trace: `~obspy.core.stream.Trace`
    :param window:
@ -101,31 +100,73 @@ def findComboBoxIndex(combo_box, val):
    Function findComboBoxIndex takes a QComboBox object and a string and
    returns either 0 or the index throughout all QComboBox items.
    :param combo_box: Combo box object.
-    :type combo_box: QComboBox
+    :type combo_box: `~QComboBox`
    :param val: Name of a combo box to search for.
-    :type val:
+    :type val: basestring
    :return: index value of item with name val or 0
    """
    return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0

+def find_in_list(list, str):
+    """
+    takes a list of strings and a string and returns the first list item
+    matching the string pattern
+    :param list: list to search in
+    :param str: pattern to search for
+    :return: first list item containing pattern
+
+    .. example::
+
+    >>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456']
+    >>> find_in_list(l, 'dir')
+    '/dir/e1234.123.12'
+    >>> find_in_list(l, 'e1234')
+    '/dir/e1234.123.12'
+    >>> find_in_list(l, 'e2')
+    '/dir/e2345.123.12'
+    >>> find_in_list(l, 'ABC')
+    'abc123'
+    >>> find_in_list(l, 'f456')
+    'def456'
+    >>> find_in_list(l, 'gurke')
+
+    """
+    rlist = [s for s in list if str.lower() in s.lower()]
+    if rlist:
+        return rlist[0]
+    return None

 def find_nearest(array, value):
    '''
-    Function find_nearest takes an array and a value and returns the
-    index of the nearest value found in the array.
-    :param array:
-    :param value:
-    :return:
+    function find_nearest takes an array and a value and returns the
+    index of the nearest value found in the array
+    :param array: array containing values
+    :type array: `~numpy.ndarray`
+    :param value: number searched for
+    :return: index of the array item being nearest to the value
+
+    >>> a = np.array([ 1.80339578, -0.72546654,  0.95769195, -0.98320759, 0.85922623])
+    >>> find_nearest(a, 1.3)
+    2
+    >>> find_nearest(a, 0)
+    1
+    >>> find_nearest(a, 2)
+    0
+    >>> find_nearest(a, -1)
+    3
+    >>> a = np.array([ 1.1, -0.7,  0.9, -0.9, 0.8])
+    >>> find_nearest(a, 0.849)
+    4
    '''
    return (np.abs(array - value)).argmin()


 def fnConstructor(s):
    '''
-
-    :param s:
-    :type s:
-    :return:
+    takes a string and returns a valid filename (especially on windows machines)
+    :param s: desired filename
+    :type s: str
+    :return: valid filename
    '''
    if type(s) is str:
        s = s.split(':')[-1]
@ -143,7 +184,21 @@ def fnConstructor(s):


 def four_digits(year):
-    if year + 2000 < UTCDateTime.utcnow().year:
+    """
+    takes a two digit year integer and returns the correct four digit equivalent
+    from the last 100 years
+    :param year: two digit year
+    :type year: int
+    :return: four digit year correspondant
+
+    >>> four_digits(20)
+    1920
+    >>> four_digits(16)
+    2016
+    >>> four_digits(00)
+    2000
+    """
+    if year + 2000 <= UTCDateTime.utcnow().year:
        year += 2000
    else:
        year += 1900
@ -152,10 +207,11 @@ def four_digits(year):

 def getGlobalTimes(stream):
    '''
-
-    :param stream:
-    :type stream
-    :return:
+    takes a stream object and returns the latest end and the earliest start
+    time of all contained trace objects
+    :param stream: seismological data stream
+    :type stream: `~obspy.core.stream.Stream`
+    :return: minimum start time and maximum end time
    '''
    min_start = UTCDateTime()
    max_end = None
@ -169,6 +225,8 @@ def getGlobalTimes(stream):

 def getHash(time):
    '''
+    takes a time object and returns the corresponding SHA1 hash of the
+    formatted date string
    :param time: time object for which a hash should be calculated
    :type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object
    :return: str
@ -180,32 +238,31 @@ def getHash(time):

 def getLogin():
    '''
-
-    :return:
+    returns the actual user's login ID
+    :return: login ID
    '''
    return pwd.getpwuid(os.getuid())[0]


 def getOwner(fn):
    '''
-
-    :param fn:
-    :type fn:
-    :return:
+    takes a filename and return the login ID of the actual owner of the file
+    :param fn: filename of the file tested
+    :type fn: str
+    :return: login ID of the file's owner
    '''
    return pwd.getpwuid(os.stat(fn).st_uid).pw_name


 def getPatternLine(fn, pattern):
    """
-    Takes a file name and a pattern string to search for in the file and
-    returns the first line which contains the pattern string otherwise None.
-
+    takes a file name and a pattern string to search for in the file and
+    returns the first line which contains the pattern string otherwise 'None'
    :param fn: file name
    :type fn: str
    :param pattern: pattern string to search for
    :type pattern: str
-    :return: the complete line containing pattern or None
+    :return: the complete line containing the pattern string or None

    >>> getPatternLine('utils.py', 'python')
    '#!/usr/bin/env python\\n'
@ -223,22 +280,52 @@ def getPatternLine(fn, pattern):

 def isSorted(iterable):
    '''
-
-    :param iterable:
+    takes an iterable and returns 'True' if the items are in order otherwise
+    'False'
+    :param iterable: an iterable object
    :type iterable:
-    :return:
+    :return: Boolean
+
+    >>> isSorted(1)
+    Traceback (most recent call last):
+    ...
+    AssertionError: object is not iterable; object: 1
+    >>> isSorted([1,2,3,4])
+    True
+    >>> isSorted('abcd')
+    True
+    >>> isSorted('bcad')
+    False
+    >>> isSorted([2,3,1,4])
+    False
    '''
+    assert isIterable(iterable), 'object is not iterable; object: {' \
+                                 '0}'.format(iterable)
+    if type(iterable) is str:
+        iterable = [s for s in iterable]
    return sorted(iterable) == iterable


+def isIterable(obj):
+    """
+    takes a python object and returns 'True' is the object is iterable and
+    'False' otherwise
+    :param obj: a python object
+    :return: True of False
+    """
+    try:
+        iterator = iter(obj)
+    except TypeError as te:
+        return False
+    return True
+
 def prepTimeAxis(stime, trace):
    '''
-
-    :param stime:
-    :type stime:
-    :param trace:
-     :type trace:
-    :return:
+    takes a starttime and a trace object and returns a valid time axis for
+    plotting
+    :param stime: start time of the actual seismogram as UTCDateTime
+    :param trace: seismic trace object
+    :return: valid numpy array with time stamps for plotting
    '''
    nsamp = trace.stats.npts
    srate = trace.stats.sampling_rate
@ -294,7 +381,6 @@ def runProgram(cmd, parameter=None):
    """
    run an external program specified by cmd with parameters input returning the
    stdout output
-
    :param cmd: name of the command to run
    :type cmd: str
    :param parameter: filename of parameter file  or parameter string