Merge branch 'develop' of ariadne.geophysik.ruhr-uni-bochum.de:/data/git/pylot into develop

This commit is contained in:
Marcel Paffrath 2016-08-24 13:13:41 +02:00
commit e51704f2b7
4 changed files with 339 additions and 154 deletions

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import copy import copy
import operator
import os import os
import numpy as np import numpy as np
import glob import glob
@ -11,6 +12,7 @@ from obspy import read_events
from pylot.core.io.phases import picksdict_from_picks from pylot.core.io.phases import picksdict_from_picks
from pylot.core.util.pdf import ProbabilityDensityFunction from pylot.core.util.pdf import ProbabilityDensityFunction
from pylot.core.util.utils import find_in_list
from pylot.core.util.version import get_git_version as _getVersionString from pylot.core.util.version import get_git_version as _getVersionString
__version__ = _getVersionString() __version__ = _getVersionString()
@ -151,7 +153,6 @@ class Comparison(object):
return rlist return rlist
def get_array(self, phase, method_name): def get_array(self, phase, method_name):
import operator
method = operator.methodcaller(method_name) method = operator.methodcaller(method_name)
pdf_list = self.get_all(phase) pdf_list = self.get_all(phase)
rarray = map(method, pdf_list) rarray = map(method, pdf_list)
@ -258,6 +259,15 @@ class PDFDictionary(object):
'time is not implemented yet! Sorry!') 'time is not implemented yet! Sorry!')
return PDFDictionary(picksdict_from_picks(cat[0])) return PDFDictionary(picksdict_from_picks(cat[0]))
def get_all(self, phase):
rlist = list()
for phases in self.pdf_data.values():
try:
rlist.append(phases[phase])
except KeyError:
continue
return rlist
def generate_pdf_data(self, type='exp'): def generate_pdf_data(self, type='exp'):
""" """
Returns probabiliy density function dictionary containing the Returns probabiliy density function dictionary containing the
@ -339,143 +349,148 @@ class PDFDictionary(object):
class PDFstatistics(object): class PDFstatistics(object):
''' """
To do: This object can be used to get various statistic values from probabillity density functions.
plots for std, quantiles, Takes a path as argument.
''' """
def __init__(self, directory): def __init__(self, directory):
self.directory = directory """Initiates some values needed when dealing with pdfs later"""
self.evtlist = list() self._rootdir = directory
self.return_phase = None self._evtlist = list()
self._rphase = None
self.make_fnlist()
def readTheta(self, arname, dir, fnpattern): def make_fnlist(self, fn_pattern='*.xml'):
exec('self.' + arname +' = []') """
filelist = glob.glob1(dir, fnpattern) Takes a file pattern and searches for that recursively in the set path for the object.
for file in filelist: :param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml'
fid = open(os.path.join(dir,file), 'r') :type fn_pattern: string
list = [] :return: creates a list of events saved in the PDFstatistics object.
for line in fid.readlines(): """
list.append(eval(line))
exec('self.' + arname + ' += list')
fid.close()
def makeFileList(self, fn_pattern='*.xml'):
evtlist = list() evtlist = list()
evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern) for root, _, files in os.walk(self.root):
if not evtlist: for file in files:
for root, _, files in os.walk(self.directory): if file.endswith(fn_pattern[1:]):
for file in files: evtlist.append(os.path.join(root, file))
if file.endswith(fn_pattern[1:]): self._evtlist = evtlist
evtlist.append(os.path.join(root, file))
self.evtlist = evtlist
def __iter__(self): def __iter__(self):
assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...' for evt in self._evtlist:
for evt in self.evtlist: yield PDFDictionary.from_quakeml(evt)
self.getPDFDict(self.directory, evt)
for station, pdfs in self.pdfdict.pdf_data.items():
try:
yield pdfs[self.return_phase]
except KeyError:
continue
def set_return_phase(self, type): def __getitem__(self, item):
evt = find_in_list(self._evtlist, item)
if evt:
return PDFDictionary.from_quakeml(evt)
return None
@property
def root(self):
return self._rootdir
@root.setter
def root(self, value):
if os.path.exists(value):
self._rootdir = value
else:
raise ValueError("path doesn't exist: %s" % value)
@property
def curphase(self):
"""
return the current phase type of interest
:return: current phase
"""
return self._rphase
@curphase.setter
def curphase(self, type):
"""
setter method for property curphase
:param type: specify the phase type of interest
:type type: string ('p' or 's')
:return: -
"""
if type.upper() not in 'PS': if type.upper() not in 'PS':
raise ValueError("phase type must be either 'P' or 'S'!") raise ValueError("phase type must be either 'P' or 'S'!")
else: else:
self.return_phase = type.upper() self._rphase = type.upper()
def getQD(self,value): def get(self, property='std', value=None):
QDlist = [] """
for pdf in self: takes a property str and a probability value and returns all
QD = pdf.quantile_distance(value) property's values for the current phase of interest
QDlist.append(QD) :func:`self.curphase`
return QDlist
:param property: property name (default: 'std')
:type property: str
:param value: probability value :math:\alpha
:type value: float
:return: list containing all property's values
"""
assert isinstance(self.curphase,
str), 'phase has to be set before being ' \
'able to iterate over items...'
rlist = []
method_options = dict(STD='standard_deviation',
Q='quantile',
QD='quantile_distance',
QDF='quantile_dist_frac')
def getQDQ(self,value): # create method caller for easy mapping
QDQlist = [] if property.upper() == 'STD':
for pdf in self: method = operator.methodcaller(method_options[property.upper()])
QDQ = pdf.qtile_dist_quot(value) elif value is not None:
QDQlist.append(QDQ)
return QDQlist
def getSTD(self):
std = []
for pdf in self:
try: try:
std.append(pdf.standard_deviation()) method = operator.methodcaller(method_options[property.upper()],
value)
except KeyError: except KeyError:
continue raise KeyError('unknwon property: {0}'.format(property.upper()))
std = np.array(std)
self.set_stdarray(std)
def set_stdarray(self, array):
if self.return_phase == 'P':
self.p_stdarray = array
elif self.return_phase == 'S':
self.s_stdarray = array
else: else:
raise ValueError('phase type not set properly...\n' raise ValueError("for call to method {0} value has to be "
'Actual phase type: {0}'.format(self.return_phase)) "defined but is 'None' ".format(method_options[
property.upper()]))
for pdf_dict in self:
# create worklist
wlist = pdf_dict.get_all(self.curphase)
# map method calls to object in worklist
rlist += map(method, wlist)
def getBinList(self,l_boundary,u_boundary,nbins = 100): return rlist
binlist = []
for i in range(nbins):
binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins)
return binlist
def writeThetaToFile(self,array,out_dir):
def histplot(self, array, binlist, xlab = 'Values', """
ylab = 'Frequency', title = None, label=None, Method to write array like data to file. Useful since acquiring can take
fnout = None): serious amount of time when dealing with large databases.
import matplotlib.pyplot as plt :param array: List of values.
plt.hist(array,bins = binlist) :type array: list
plt.xlabel('Values') :param out_dir: Path to save file to including file name.
plt.ylabel('Frequency') :type out_dir: str
if title: :return: Saves a file at given output directory.
title_str = 'Quantile distance quotient distribution' """
if label: fid = open(os.path.join(out_dir), 'w')
title_str += ' (' + label + ')'
plt.title(title_str)
if fnout:
plt.savefig(fnout+'histplot.png')
else:
plt.show()
def getPDFDict(self, month, evt):
self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt))
def getStatistics(self):
self.p_mean = self.p_stdarray.mean()
self.p_std_std = self.p_stdarray.std()
self.p_median = np.median(self.p_stdarray)
self.s_mean = self.s_stdarray.mean()
self.s_std_std = self.s_stdarray.std()
self.s_median = np.median(self.s_stdarray)
def writeThetaToFile(self,array,out_dir,filename = None):
fid = open(os.path.join(out_dir,filename), 'w')
for val in array: for val in array:
fid.write(str(val)+'\n') fid.write(str(val)+'\n')
fid.close() fid.close()
def main(): def main():
root_dir ='/home/sebastianp/Codetesting/xmls/' root_dir ='/home/sebastianp/Codetesting/xmls/'
Insheim = PDFstatistics(root_dir) Insheim = PDFstatistics(root_dir)
Insheim.makeFileList() Insheim.curphase = 'p'
Insheim.set_return_phase('p') qdlist = Insheim.get('qdf', 0.2)
Insheim.getSTD()
qdlist = Insheim.getQDQ(0.3)
binlist = Insheim.getBinList(0.,3.)
print qdlist print qdlist
if __name__ == "__main__": if __name__ == "__main__":
import cProfile
pr = cProfile.Profile()
pr.enable()
main() main()
pr.disable()
# after your program ends
pr.print_stats(sort="calls")

View File

@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object):
raise ValueError('value out of bounds: {0}'.format(value)) raise ValueError('value out of bounds: {0}'.format(value))
return self.prob_limits((value, self.axis[-1])) return self.prob_limits((value, self.axis[-1]))
def prob_limits(self, limits, oversampling=10.): def prob_limits(self, limits, oversampling=1.):
sampling = self.incr / oversampling sampling = self.incr / oversampling
lim = np.arange(limits[0], limits[1], sampling) lim = np.arange(limits[0], limits[1], sampling)
data = self.data(lim) data = self.data(lim)
@ -363,13 +363,40 @@ class ProbabilityDensityFunction(object):
return m return m
def quantile_distance(self, prob_value): def quantile_distance(self, prob_value):
"""
takes a probability value and and returns the distance
between two complementary quantiles
.. math::
QA_\alpha = Q(1 - \alpha) - Q(\alpha)
:param value: probability value :math:\alpha
:type value: float
:return: quantile distance
"""
if 0 >= prob_value or prob_value >= 0.5:
raise ValueError('Value out of range.')
ql = self.quantile(prob_value) ql = self.quantile(prob_value)
qu = self.quantile(1 - prob_value) qu = self.quantile(1 - prob_value)
return qu - ql return qu - ql
def qtile_dist_quot(self,x): def quantile_dist_frac(self, x):
if x <= 0 or x >= 0.5: """
takes a probability value and returns the fraction of two
corresponding quantile distances (
:func:`pylot.core.util.pdf.ProbabilityDensityFunction
#quantile_distance`)
.. math::
Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)}
:param value: probability value :math:\alpha
:return: quantile distance fraction
"""
if x <= 0 or x >= 0.25:
raise ValueError('Value out of range.') raise ValueError('Value out of range.')
return self.quantile_distance(0.5-x)/self.quantile_distance(x) return self.quantile_distance(0.5-x)/self.quantile_distance(x)

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
def create_bin_list(l_boundary, u_boundary, nbins=100):
"""
takes two boundaries and a number of bins and creates a list of bins for
histogram plotting
:param l_boundary: Any number.
:type l_boundary: float
:param u_boundary: Any number that is greater than l_boundary.
:type u_boundary: float
:param nbins: Any positive integer.
:type nbins: int
:return: A list of equidistant bins.
"""
if u_boundary <= l_boundary:
raise ValueError('Upper boundary must be greather than lower!')
elif nbins <= 0:
raise ValueError('Number of bins is not valid.')
binlist = []
for i in range(nbins):
binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins)
return binlist
def histplot(array, binlist, xlab='Values',
ylab='Frequency', title=None, fnout=None):
"""
function to quickly show some distribution of data. Takes array like data,
and a list of bins. Editing detail and inserting a legend is not possible.
:param array: List of values.
:type array: Array like
:param binlist: List of bins.
:type binlist: list
:param xlab: A label for the x-axes.
:type xlab: str
:param ylab: A label for the y-axes.
:type ylab: str
:param title: A title for the Plot.
:type title: str
:param fnout: A path to save the plot instead of showing.
Has to contain filename and type. Like: 'path/to/file.png'
:type fnout. str
:return: -
"""
plt.hist(array, bins=binlist)
plt.xlabel(xlab)
plt.ylabel(ylab)
if title:
plt.title(title)
if fnout:
plt.savefig(fnout)
else:
plt.show()

View File

@ -18,7 +18,6 @@ def _pickle_method(m):
return getattr, (m.im_self, m.im_func.func_name) return getattr, (m.im_self, m.im_func.func_name)
def fit_curve(x, y): def fit_curve(x, y):
return splev, splrep(x, y) return splev, splrep(x, y)
def getindexbounds(f, eta): def getindexbounds(f, eta):
@ -83,8 +82,8 @@ def clims(lim1, lim2):
def demeanTrace(trace, window): def demeanTrace(trace, window):
""" """
returns the DATA where each trace is demean by the average value within takes a trace object and returns the same trace object but with data
WINDOW demeaned within a certain time window
:param trace: waveform trace object :param trace: waveform trace object
:type trace: `~obspy.core.stream.Trace` :type trace: `~obspy.core.stream.Trace`
:param window: :param window:
@ -101,31 +100,73 @@ def findComboBoxIndex(combo_box, val):
Function findComboBoxIndex takes a QComboBox object and a string and Function findComboBoxIndex takes a QComboBox object and a string and
returns either 0 or the index throughout all QComboBox items. returns either 0 or the index throughout all QComboBox items.
:param combo_box: Combo box object. :param combo_box: Combo box object.
:type combo_box: QComboBox :type combo_box: `~QComboBox`
:param val: Name of a combo box to search for. :param val: Name of a combo box to search for.
:type val: :type val: basestring
:return: index value of item with name val or 0 :return: index value of item with name val or 0
""" """
return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0 return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0
def find_in_list(list, str):
"""
takes a list of strings and a string and returns the first list item
matching the string pattern
:param list: list to search in
:param str: pattern to search for
:return: first list item containing pattern
.. example::
>>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456']
>>> find_in_list(l, 'dir')
'/dir/e1234.123.12'
>>> find_in_list(l, 'e1234')
'/dir/e1234.123.12'
>>> find_in_list(l, 'e2')
'/dir/e2345.123.12'
>>> find_in_list(l, 'ABC')
'abc123'
>>> find_in_list(l, 'f456')
'def456'
>>> find_in_list(l, 'gurke')
"""
rlist = [s for s in list if str.lower() in s.lower()]
if rlist:
return rlist[0]
return None
def find_nearest(array, value): def find_nearest(array, value):
''' '''
Function find_nearest takes an array and a value and returns the function find_nearest takes an array and a value and returns the
index of the nearest value found in the array. index of the nearest value found in the array
:param array: :param array: array containing values
:param value: :type array: `~numpy.ndarray`
:return: :param value: number searched for
:return: index of the array item being nearest to the value
>>> a = np.array([ 1.80339578, -0.72546654, 0.95769195, -0.98320759, 0.85922623])
>>> find_nearest(a, 1.3)
2
>>> find_nearest(a, 0)
1
>>> find_nearest(a, 2)
0
>>> find_nearest(a, -1)
3
>>> a = np.array([ 1.1, -0.7, 0.9, -0.9, 0.8])
>>> find_nearest(a, 0.849)
4
''' '''
return (np.abs(array - value)).argmin() return (np.abs(array - value)).argmin()
def fnConstructor(s): def fnConstructor(s):
''' '''
takes a string and returns a valid filename (especially on windows machines)
:param s: :param s: desired filename
:type s: :type s: str
:return: :return: valid filename
''' '''
if type(s) is str: if type(s) is str:
s = s.split(':')[-1] s = s.split(':')[-1]
@ -143,7 +184,21 @@ def fnConstructor(s):
def four_digits(year): def four_digits(year):
if year + 2000 < UTCDateTime.utcnow().year: """
takes a two digit year integer and returns the correct four digit equivalent
from the last 100 years
:param year: two digit year
:type year: int
:return: four digit year correspondant
>>> four_digits(20)
1920
>>> four_digits(16)
2016
>>> four_digits(00)
2000
"""
if year + 2000 <= UTCDateTime.utcnow().year:
year += 2000 year += 2000
else: else:
year += 1900 year += 1900
@ -152,10 +207,11 @@ def four_digits(year):
def getGlobalTimes(stream): def getGlobalTimes(stream):
''' '''
takes a stream object and returns the latest end and the earliest start
:param stream: time of all contained trace objects
:type stream :param stream: seismological data stream
:return: :type stream: `~obspy.core.stream.Stream`
:return: minimum start time and maximum end time
''' '''
min_start = UTCDateTime() min_start = UTCDateTime()
max_end = None max_end = None
@ -169,6 +225,8 @@ def getGlobalTimes(stream):
def getHash(time): def getHash(time):
''' '''
takes a time object and returns the corresponding SHA1 hash of the
formatted date string
:param time: time object for which a hash should be calculated :param time: time object for which a hash should be calculated
:type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object :type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object
:return: str :return: str
@ -180,32 +238,31 @@ def getHash(time):
def getLogin(): def getLogin():
''' '''
returns the actual user's login ID
:return: :return: login ID
''' '''
return pwd.getpwuid(os.getuid())[0] return pwd.getpwuid(os.getuid())[0]
def getOwner(fn): def getOwner(fn):
''' '''
takes a filename and return the login ID of the actual owner of the file
:param fn: :param fn: filename of the file tested
:type fn: :type fn: str
:return: :return: login ID of the file's owner
''' '''
return pwd.getpwuid(os.stat(fn).st_uid).pw_name return pwd.getpwuid(os.stat(fn).st_uid).pw_name
def getPatternLine(fn, pattern): def getPatternLine(fn, pattern):
""" """
Takes a file name and a pattern string to search for in the file and takes a file name and a pattern string to search for in the file and
returns the first line which contains the pattern string otherwise None. returns the first line which contains the pattern string otherwise 'None'
:param fn: file name :param fn: file name
:type fn: str :type fn: str
:param pattern: pattern string to search for :param pattern: pattern string to search for
:type pattern: str :type pattern: str
:return: the complete line containing pattern or None :return: the complete line containing the pattern string or None
>>> getPatternLine('utils.py', 'python') >>> getPatternLine('utils.py', 'python')
'#!/usr/bin/env python\\n' '#!/usr/bin/env python\\n'
@ -223,22 +280,52 @@ def getPatternLine(fn, pattern):
def isSorted(iterable): def isSorted(iterable):
''' '''
takes an iterable and returns 'True' if the items are in order otherwise
:param iterable: 'False'
:param iterable: an iterable object
:type iterable: :type iterable:
:return: :return: Boolean
>>> isSorted(1)
Traceback (most recent call last):
...
AssertionError: object is not iterable; object: 1
>>> isSorted([1,2,3,4])
True
>>> isSorted('abcd')
True
>>> isSorted('bcad')
False
>>> isSorted([2,3,1,4])
False
''' '''
assert isIterable(iterable), 'object is not iterable; object: {' \
'0}'.format(iterable)
if type(iterable) is str:
iterable = [s for s in iterable]
return sorted(iterable) == iterable return sorted(iterable) == iterable
def isIterable(obj):
"""
takes a python object and returns 'True' is the object is iterable and
'False' otherwise
:param obj: a python object
:return: True of False
"""
try:
iterator = iter(obj)
except TypeError as te:
return False
return True
def prepTimeAxis(stime, trace): def prepTimeAxis(stime, trace):
''' '''
takes a starttime and a trace object and returns a valid time axis for
:param stime: plotting
:type stime: :param stime: start time of the actual seismogram as UTCDateTime
:param trace: :param trace: seismic trace object
:type trace: :return: valid numpy array with time stamps for plotting
:return:
''' '''
nsamp = trace.stats.npts nsamp = trace.stats.npts
srate = trace.stats.sampling_rate srate = trace.stats.sampling_rate
@ -294,7 +381,6 @@ def runProgram(cmd, parameter=None):
""" """
run an external program specified by cmd with parameters input returning the run an external program specified by cmd with parameters input returning the
stdout output stdout output
:param cmd: name of the command to run :param cmd: name of the command to run
:type cmd: str :type cmd: str
:param parameter: filename of parameter file or parameter string :param parameter: filename of parameter file or parameter string