Merge branch 'develop' of ariadne.geophysik.ruhr-uni-bochum.de:/data/git/pylot into develop

2016-08-24 13:13:41 +02:00 · 2016-08-24 13:13:41 +02:00 · e51704f2b7
commit e51704f2b7
parent 9c3770ea9f 74d31afa3d
4 changed files with 339 additions and 154 deletions
--- a/pylot/core/pick/compare.py
+++ b/pylot/core/pick/compare.py
@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 import copy
 import operator
 import os
 import numpy as np
 import glob
@ -11,6 +12,7 @@ from obspy import read_events
 from pylot.core.io.phases import picksdict_from_picks
 from pylot.core.util.pdf import ProbabilityDensityFunction
 from pylot.core.util.utils import find_in_list
 from pylot.core.util.version import get_git_version as _getVersionString
 __version__ = _getVersionString()
@ -151,7 +153,6 @@ class Comparison(object):
        return rlist
    def get_array(self, phase, method_name):
        import operator
        method = operator.methodcaller(method_name)
        pdf_list = self.get_all(phase)
        rarray = map(method, pdf_list)
@ -258,6 +259,15 @@ class PDFDictionary(object):
                                      'time is not implemented yet! Sorry!')
        return PDFDictionary(picksdict_from_picks(cat[0]))
    def get_all(self, phase):
        rlist = list()
        for phases in self.pdf_data.values():
            try:
                rlist.append(phases[phase])
            except KeyError:
                continue
        return rlist
    def generate_pdf_data(self, type='exp'):
        """
        Returns probabiliy density function dictionary containing the
@ -339,143 +349,148 @@ class PDFDictionary(object):
 class PDFstatistics(object):
-    '''
+    """
-    To do:
+    This object can be used to get various statistic values from probabillity density functions.
-    plots for std, quantiles,
+    Takes a path as argument.
-    '''
+    """
    def __init__(self, directory):
-        self.directory = directory
+        """Initiates some values needed when dealing with pdfs later"""
-        self.evtlist = list()
+        self._rootdir = directory
-        self.return_phase = None
+        self._evtlist = list()
        self._rphase = None
        self.make_fnlist()
-    def readTheta(self, arname, dir, fnpattern):
+    def make_fnlist(self, fn_pattern='*.xml'):
-        exec('self.' + arname +' = []')
+        """
-        filelist = glob.glob1(dir, fnpattern)
+        Takes a file pattern and searches for that recursively in the set path for the object.
-        for file in filelist:
+        :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml'
-            fid = open(os.path.join(dir,file), 'r')
+        :type  fn_pattern: string
-            list = []
+        :return: creates a list of events saved in the PDFstatistics object.
-            for line in fid.readlines():
+        """
                list.append(eval(line))
            exec('self.' + arname + ' += list')
            fid.close()
    def makeFileList(self, fn_pattern='*.xml'):
        evtlist = list()
-        evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern)
+        for root, _, files in os.walk(self.root):
-        if not evtlist:
+            for file in files:
-             for root, _, files in os.walk(self.directory):
+                if file.endswith(fn_pattern[1:]):
-                 for file in files:
+                    evtlist.append(os.path.join(root, file))
-                     if file.endswith(fn_pattern[1:]):
+        self._evtlist = evtlist
                         evtlist.append(os.path.join(root, file))
        self.evtlist = evtlist
    def __iter__(self):
-        assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...'
+        for evt in self._evtlist:
-        for evt in self.evtlist:
+            yield PDFDictionary.from_quakeml(evt)
            self.getPDFDict(self.directory, evt)
            for station, pdfs in self.pdfdict.pdf_data.items():
                try:
                    yield pdfs[self.return_phase]
                except KeyError:
                    continue
-    def set_return_phase(self, type):
+    def __getitem__(self, item):
        evt = find_in_list(self._evtlist, item)
        if evt:
            return PDFDictionary.from_quakeml(evt)
        return None
    @property
    def root(self):
        return self._rootdir
    @root.setter
    def root(self, value):
        if os.path.exists(value):
            self._rootdir = value
        else:
            raise ValueError("path doesn't exist: %s" % value)
    @property
    def curphase(self):
        """
        return the current phase type of interest
        :return: current phase
        """
        return self._rphase
    @curphase.setter
    def curphase(self, type):
        """
        setter method for property curphase
        :param type: specify the phase type of interest
        :type  type: string ('p' or 's')
        :return: -
        """
        if type.upper() not in 'PS':
            raise ValueError("phase type must be either 'P' or 'S'!")
        else:
-            self.return_phase = type.upper()
+            self._rphase = type.upper()
-    def getQD(self,value):
+    def get(self, property='std', value=None):
-        QDlist = []
+        """
-        for pdf in self:
+        takes a property str and a probability value and returns all
-            QD = pdf.quantile_distance(value)
+        property's values for the current phase of interest
-            QDlist.append(QD)
+        :func:`self.curphase`
        return QDlist
        :param property: property name (default: 'std')
        :type property: str
        :param value: probability value :math:\alpha
        :type  value: float
        :return: list containing all property's values
        """
        assert isinstance(self.curphase,
                          str), 'phase has to be set before being ' \
                                'able to iterate over items...'
        rlist = []
        method_options = dict(STD='standard_deviation',
                              Q='quantile',
                              QD='quantile_distance',
                              QDF='quantile_dist_frac')
-    def getQDQ(self,value):
+        # create method caller for easy mapping
-        QDQlist = []
+        if property.upper() == 'STD':
-        for pdf in self:
+            method = operator.methodcaller(method_options[property.upper()])
-            QDQ = pdf.qtile_dist_quot(value)
+        elif value is not None:
            QDQlist.append(QDQ)
        return QDQlist
    def getSTD(self):
        std = []
        for pdf in self:
            try:
-                std.append(pdf.standard_deviation())
+                method = operator.methodcaller(method_options[property.upper()],
                                               value)
            except KeyError:
-                continue
+                raise KeyError('unknwon property: {0}'.format(property.upper()))
        std = np.array(std)
        self.set_stdarray(std)
    def set_stdarray(self, array):
        if self.return_phase == 'P':
            self.p_stdarray = array
        elif self.return_phase == 'S':
            self.s_stdarray = array
        else:
-            raise ValueError('phase type not set properly...\n'
+            raise ValueError("for call to method {0} value has to be "
-                             'Actual phase type: {0}'.format(self.return_phase))
+                             "defined but is 'None' ".format(method_options[
                                                             property.upper()]))
        for pdf_dict in self:
            # create worklist
            wlist = pdf_dict.get_all(self.curphase)
            # map method calls to object in worklist
            rlist += map(method, wlist)
-    def getBinList(self,l_boundary,u_boundary,nbins = 100):
+        return rlist
        binlist = []
        for i in range(nbins):
            binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins)
        return  binlist
-
+    def writeThetaToFile(self,array,out_dir):
-    def histplot(self, array, binlist, xlab = 'Values',
+        """
-                 ylab = 'Frequency', title = None, label=None,
+        Method to write array like data to file. Useful since acquiring can take
-                 fnout = None):
+        serious amount of time when dealing with large databases.
-        import matplotlib.pyplot as plt
+        :param array: List of values.
-        plt.hist(array,bins = binlist)
+        :type  array: list
-        plt.xlabel('Values')
+        :param out_dir: Path to save file to including file name.
-        plt.ylabel('Frequency')
+        :type  out_dir: str
-        if title:
+        :return: Saves a file at given output directory.
-            title_str = 'Quantile distance quotient distribution'
+        """
-            if label:
+        fid = open(os.path.join(out_dir), 'w')
                title_str += ' (' + label + ')'
            plt.title(title_str)
        if fnout:
            plt.savefig(fnout+'histplot.png')
        else:
            plt.show()
    def getPDFDict(self, month, evt):
        self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt))
    def getStatistics(self):
        self.p_mean = self.p_stdarray.mean()
        self.p_std_std = self.p_stdarray.std()
        self.p_median = np.median(self.p_stdarray)
        self.s_mean = self.s_stdarray.mean()
        self.s_std_std = self.s_stdarray.std()
        self.s_median = np.median(self.s_stdarray)
    def writeThetaToFile(self,array,out_dir,filename = None):
        fid = open(os.path.join(out_dir,filename), 'w')
        for val in array:
            fid.write(str(val)+'\n')
        fid.close()
 def main():
    root_dir ='/home/sebastianp/Codetesting/xmls/'
    Insheim = PDFstatistics(root_dir)
-    Insheim.makeFileList()
+    Insheim.curphase = 'p'
-    Insheim.set_return_phase('p')
+    qdlist = Insheim.get('qdf', 0.2)
    Insheim.getSTD()
    qdlist = Insheim.getQDQ(0.3)
    binlist = Insheim.getBinList(0.,3.)
    print qdlist
 if __name__ == "__main__":
-    main()
+    import cProfile
    pr = cProfile.Profile()
    pr.enable()
    main()
    pr.disable()
    # after your program ends
    pr.print_stats(sort="calls")
--- a/pylot/core/util/pdf.py
+++ b/pylot/core/util/pdf.py
@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object):
            raise ValueError('value out of bounds: {0}'.format(value))
        return self.prob_limits((value, self.axis[-1]))
-    def prob_limits(self, limits, oversampling=10.):
+    def prob_limits(self, limits, oversampling=1.):
        sampling = self.incr / oversampling
        lim = np.arange(limits[0], limits[1], sampling)
        data = self.data(lim)
@ -363,13 +363,40 @@ class ProbabilityDensityFunction(object):
        return m
    def quantile_distance(self, prob_value):
        """
        takes a probability value and and returns the distance
        between two complementary quantiles
        .. math::
            QA_\alpha = Q(1 - \alpha) - Q(\alpha)
        :param value: probability value :math:\alpha
        :type  value: float
        :return: quantile distance
        """
        if 0 >= prob_value or prob_value >= 0.5:
            raise ValueError('Value out of range.')
        ql = self.quantile(prob_value)
        qu = self.quantile(1 - prob_value)
        return qu - ql
-    def qtile_dist_quot(self,x):
+    def quantile_dist_frac(self, x):
-        if x <= 0 or x >= 0.5:
+        """
        takes a probability value and returns the fraction of two
        corresponding quantile distances (
        :func:`pylot.core.util.pdf.ProbabilityDensityFunction
        #quantile_distance`)
        .. math::
            Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)}
        :param value: probability value :math:\alpha
        :return: quantile distance fraction
        """
        if x <= 0 or x >= 0.25:
            raise ValueError('Value out of range.')
        return self.quantile_distance(0.5-x)/self.quantile_distance(x)
--- a/pylot/core/util/plotting.py
+++ b/pylot/core/util/plotting.py
@ -0,0 +1,57 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import matplotlib.pyplot as plt
 def create_bin_list(l_boundary, u_boundary, nbins=100):
    """
    takes two boundaries and a number of bins and creates a list of bins for
    histogram plotting
    :param l_boundary: Any number.
    :type  l_boundary: float
    :param u_boundary: Any number that is greater than l_boundary.
    :type  u_boundary: float
    :param nbins: Any positive integer.
    :type  nbins: int
    :return: A list of equidistant bins.
    """
    if u_boundary <= l_boundary:
        raise ValueError('Upper boundary must be greather than lower!')
    elif nbins <= 0:
        raise ValueError('Number of bins is not valid.')
    binlist = []
    for i in range(nbins):
        binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins)
    return binlist
 def histplot(array, binlist, xlab='Values',
             ylab='Frequency', title=None, fnout=None):
    """
    function to quickly show some distribution of data. Takes array like data,
    and a list of bins. Editing detail and inserting a legend is not possible.
    :param array: List of values.
    :type  array: Array like
    :param binlist: List of bins.
    :type  binlist: list
    :param xlab: A label for the x-axes.
    :type  xlab: str
    :param ylab: A label for the y-axes.
    :type  ylab: str
    :param title: A title for the Plot.
    :type  title: str
    :param fnout: A path to save the plot instead of showing.
                  Has to contain filename and type. Like: 'path/to/file.png'
    :type  fnout. str
    :return: -
    """
    plt.hist(array, bins=binlist)
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    if title:
        plt.title(title)
    if fnout:
        plt.savefig(fnout)
    else:
        plt.show()
--- a/pylot/core/util/utils.py
+++ b/pylot/core/util/utils.py
@ -18,7 +18,6 @@ def _pickle_method(m):
        return getattr, (m.im_self, m.im_func.func_name)
 def fit_curve(x, y):
    return splev, splrep(x, y)
 def getindexbounds(f, eta):
@ -83,8 +82,8 @@ def clims(lim1, lim2):
 def demeanTrace(trace, window):
    """
-    returns the DATA where each trace is demean by the average value within
+    takes a trace object and returns the same trace object but with data
-    WINDOW
+    demeaned within a certain time window
    :param trace: waveform trace object
    :type trace: `~obspy.core.stream.Trace`
    :param window:
@ -101,31 +100,73 @@ def findComboBoxIndex(combo_box, val):
    Function findComboBoxIndex takes a QComboBox object and a string and
    returns either 0 or the index throughout all QComboBox items.
    :param combo_box: Combo box object.
-    :type combo_box: QComboBox
+    :type combo_box: `~QComboBox`
    :param val: Name of a combo box to search for.
-    :type val:
+    :type val: basestring
    :return: index value of item with name val or 0
    """
    return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0
 def find_in_list(list, str):
    """
    takes a list of strings and a string and returns the first list item
    matching the string pattern
    :param list: list to search in
    :param str: pattern to search for
    :return: first list item containing pattern
    .. example::
    >>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456']
    >>> find_in_list(l, 'dir')
    '/dir/e1234.123.12'
    >>> find_in_list(l, 'e1234')
    '/dir/e1234.123.12'
    >>> find_in_list(l, 'e2')
    '/dir/e2345.123.12'
    >>> find_in_list(l, 'ABC')
    'abc123'
    >>> find_in_list(l, 'f456')
    'def456'
    >>> find_in_list(l, 'gurke')
    """
    rlist = [s for s in list if str.lower() in s.lower()]
    if rlist:
        return rlist[0]
    return None
 def find_nearest(array, value):
    '''
-    Function find_nearest takes an array and a value and returns the
+    function find_nearest takes an array and a value and returns the
-    index of the nearest value found in the array.
+    index of the nearest value found in the array
-    :param array:
+    :param array: array containing values
-    :param value:
+    :type array: `~numpy.ndarray`
-    :return:
+    :param value: number searched for
    :return: index of the array item being nearest to the value
    >>> a = np.array([ 1.80339578, -0.72546654,  0.95769195, -0.98320759, 0.85922623])
    >>> find_nearest(a, 1.3)
    2
    >>> find_nearest(a, 0)
    1
    >>> find_nearest(a, 2)
    0
    >>> find_nearest(a, -1)
    3
    >>> a = np.array([ 1.1, -0.7,  0.9, -0.9, 0.8])
    >>> find_nearest(a, 0.849)
    4
    '''
    return (np.abs(array - value)).argmin()
 def fnConstructor(s):
    '''
-
+    takes a string and returns a valid filename (especially on windows machines)
-    :param s:
+    :param s: desired filename
-    :type s:
+    :type s: str
-    :return:
+    :return: valid filename
    '''
    if type(s) is str:
        s = s.split(':')[-1]
@ -143,7 +184,21 @@ def fnConstructor(s):
 def four_digits(year):
-    if year + 2000 < UTCDateTime.utcnow().year:
+    """
    takes a two digit year integer and returns the correct four digit equivalent
    from the last 100 years
    :param year: two digit year
    :type year: int
    :return: four digit year correspondant
    >>> four_digits(20)
    1920
    >>> four_digits(16)
    2016
    >>> four_digits(00)
    2000
    """
    if year + 2000 <= UTCDateTime.utcnow().year:
        year += 2000
    else:
        year += 1900
@ -152,10 +207,11 @@ def four_digits(year):
 def getGlobalTimes(stream):
    '''
-
+    takes a stream object and returns the latest end and the earliest start
-    :param stream:
+    time of all contained trace objects
-    :type stream
+    :param stream: seismological data stream
-    :return:
+    :type stream: `~obspy.core.stream.Stream`
    :return: minimum start time and maximum end time
    '''
    min_start = UTCDateTime()
    max_end = None
@ -169,6 +225,8 @@ def getGlobalTimes(stream):
 def getHash(time):
    '''
    takes a time object and returns the corresponding SHA1 hash of the
    formatted date string
    :param time: time object for which a hash should be calculated
    :type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object
    :return: str
@ -180,32 +238,31 @@ def getHash(time):
 def getLogin():
    '''
-
+    returns the actual user's login ID
-    :return:
+    :return: login ID
    '''
    return pwd.getpwuid(os.getuid())[0]
 def getOwner(fn):
    '''
-
+    takes a filename and return the login ID of the actual owner of the file
-    :param fn:
+    :param fn: filename of the file tested
-    :type fn:
+    :type fn: str
-    :return:
+    :return: login ID of the file's owner
    '''
    return pwd.getpwuid(os.stat(fn).st_uid).pw_name
 def getPatternLine(fn, pattern):
    """
-    Takes a file name and a pattern string to search for in the file and
+    takes a file name and a pattern string to search for in the file and
-    returns the first line which contains the pattern string otherwise None.
+    returns the first line which contains the pattern string otherwise 'None'
    :param fn: file name
    :type fn: str
    :param pattern: pattern string to search for
    :type pattern: str
-    :return: the complete line containing pattern or None
+    :return: the complete line containing the pattern string or None
    >>> getPatternLine('utils.py', 'python')
    '#!/usr/bin/env python\\n'
@ -223,22 +280,52 @@ def getPatternLine(fn, pattern):
 def isSorted(iterable):
    '''
-
+    takes an iterable and returns 'True' if the items are in order otherwise
-    :param iterable:
+    'False'
    :param iterable: an iterable object
    :type iterable:
-    :return:
+    :return: Boolean
    >>> isSorted(1)
    Traceback (most recent call last):
    ...
    AssertionError: object is not iterable; object: 1
    >>> isSorted([1,2,3,4])
    True
    >>> isSorted('abcd')
    True
    >>> isSorted('bcad')
    False
    >>> isSorted([2,3,1,4])
    False
    '''
    assert isIterable(iterable), 'object is not iterable; object: {' \
                                 '0}'.format(iterable)
    if type(iterable) is str:
        iterable = [s for s in iterable]
    return sorted(iterable) == iterable
 def isIterable(obj):
    """
    takes a python object and returns 'True' is the object is iterable and
    'False' otherwise
    :param obj: a python object
    :return: True of False
    """
    try:
        iterator = iter(obj)
    except TypeError as te:
        return False
    return True
 def prepTimeAxis(stime, trace):
    '''
-
+    takes a starttime and a trace object and returns a valid time axis for
-    :param stime:
+    plotting
-    :type stime:
+    :param stime: start time of the actual seismogram as UTCDateTime
-    :param trace:
+    :param trace: seismic trace object
-     :type trace:
+    :return: valid numpy array with time stamps for plotting
    :return:
    '''
    nsamp = trace.stats.npts
    srate = trace.stats.sampling_rate
@ -294,7 +381,6 @@ def runProgram(cmd, parameter=None):
    """
    run an external program specified by cmd with parameters input returning the
    stdout output
    :param cmd: name of the command to run
    :type cmd: str
    :param parameter: filename of parameter file  or parameter string