Merge branch 'develop' of ariadne.geophysik.ruhr-uni-bochum.de:/data/git/pylot into develop

This commit is contained in:
Marcel Paffrath 2016-08-24 13:13:41 +02:00
commit e51704f2b7
4 changed files with 339 additions and 154 deletions

View File

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import copy
import operator
import os
import numpy as np
import glob
@ -11,6 +12,7 @@ from obspy import read_events
from pylot.core.io.phases import picksdict_from_picks
from pylot.core.util.pdf import ProbabilityDensityFunction
from pylot.core.util.utils import find_in_list
from pylot.core.util.version import get_git_version as _getVersionString
__version__ = _getVersionString()
@ -151,7 +153,6 @@ class Comparison(object):
return rlist
def get_array(self, phase, method_name):
import operator
method = operator.methodcaller(method_name)
pdf_list = self.get_all(phase)
rarray = map(method, pdf_list)
@ -258,6 +259,15 @@ class PDFDictionary(object):
'time is not implemented yet! Sorry!')
return PDFDictionary(picksdict_from_picks(cat[0]))
def get_all(self, phase):
rlist = list()
for phases in self.pdf_data.values():
try:
rlist.append(phases[phase])
except KeyError:
continue
return rlist
def generate_pdf_data(self, type='exp'):
"""
Returns probabiliy density function dictionary containing the
@ -339,143 +349,148 @@ class PDFDictionary(object):
class PDFstatistics(object):
'''
To do:
plots for std, quantiles,
'''
"""
This object can be used to get various statistic values from probabillity density functions.
Takes a path as argument.
"""
def __init__(self, directory):
self.directory = directory
self.evtlist = list()
self.return_phase = None
"""Initiates some values needed when dealing with pdfs later"""
self._rootdir = directory
self._evtlist = list()
self._rphase = None
self.make_fnlist()
def readTheta(self, arname, dir, fnpattern):
exec('self.' + arname +' = []')
filelist = glob.glob1(dir, fnpattern)
for file in filelist:
fid = open(os.path.join(dir,file), 'r')
list = []
for line in fid.readlines():
list.append(eval(line))
exec('self.' + arname + ' += list')
fid.close()
def makeFileList(self, fn_pattern='*.xml'):
def make_fnlist(self, fn_pattern='*.xml'):
"""
Takes a file pattern and searches for that recursively in the set path for the object.
:param fn_pattern: A pattern that can identify all datafiles. Default Value = '*.xml'
:type fn_pattern: string
:return: creates a list of events saved in the PDFstatistics object.
"""
evtlist = list()
evtlist = glob.glob1((os.path.join(self.directory)), fn_pattern)
if not evtlist:
for root, _, files in os.walk(self.directory):
for file in files:
if file.endswith(fn_pattern[1:]):
evtlist.append(os.path.join(root, file))
self.evtlist = evtlist
for root, _, files in os.walk(self.root):
for file in files:
if file.endswith(fn_pattern[1:]):
evtlist.append(os.path.join(root, file))
self._evtlist = evtlist
def __iter__(self):
assert isinstance(self.return_phase, str), 'phase has to be set before being able to iterate over items...'
for evt in self.evtlist:
self.getPDFDict(self.directory, evt)
for station, pdfs in self.pdfdict.pdf_data.items():
try:
yield pdfs[self.return_phase]
except KeyError:
continue
for evt in self._evtlist:
yield PDFDictionary.from_quakeml(evt)
def set_return_phase(self, type):
def __getitem__(self, item):
evt = find_in_list(self._evtlist, item)
if evt:
return PDFDictionary.from_quakeml(evt)
return None
@property
def root(self):
return self._rootdir
@root.setter
def root(self, value):
if os.path.exists(value):
self._rootdir = value
else:
raise ValueError("path doesn't exist: %s" % value)
@property
def curphase(self):
"""
return the current phase type of interest
:return: current phase
"""
return self._rphase
@curphase.setter
def curphase(self, type):
"""
setter method for property curphase
:param type: specify the phase type of interest
:type type: string ('p' or 's')
:return: -
"""
if type.upper() not in 'PS':
raise ValueError("phase type must be either 'P' or 'S'!")
else:
self.return_phase = type.upper()
self._rphase = type.upper()
def getQD(self,value):
QDlist = []
for pdf in self:
QD = pdf.quantile_distance(value)
QDlist.append(QD)
return QDlist
def get(self, property='std', value=None):
"""
takes a property str and a probability value and returns all
property's values for the current phase of interest
:func:`self.curphase`
:param property: property name (default: 'std')
:type property: str
:param value: probability value :math:\alpha
:type value: float
:return: list containing all property's values
"""
assert isinstance(self.curphase,
str), 'phase has to be set before being ' \
'able to iterate over items...'
rlist = []
method_options = dict(STD='standard_deviation',
Q='quantile',
QD='quantile_distance',
QDF='quantile_dist_frac')
def getQDQ(self,value):
QDQlist = []
for pdf in self:
QDQ = pdf.qtile_dist_quot(value)
QDQlist.append(QDQ)
return QDQlist
def getSTD(self):
std = []
for pdf in self:
# create method caller for easy mapping
if property.upper() == 'STD':
method = operator.methodcaller(method_options[property.upper()])
elif value is not None:
try:
std.append(pdf.standard_deviation())
method = operator.methodcaller(method_options[property.upper()],
value)
except KeyError:
continue
std = np.array(std)
self.set_stdarray(std)
def set_stdarray(self, array):
if self.return_phase == 'P':
self.p_stdarray = array
elif self.return_phase == 'S':
self.s_stdarray = array
raise KeyError('unknwon property: {0}'.format(property.upper()))
else:
raise ValueError('phase type not set properly...\n'
'Actual phase type: {0}'.format(self.return_phase))
raise ValueError("for call to method {0} value has to be "
"defined but is 'None' ".format(method_options[
property.upper()]))
for pdf_dict in self:
# create worklist
wlist = pdf_dict.get_all(self.curphase)
# map method calls to object in worklist
rlist += map(method, wlist)
def getBinList(self,l_boundary,u_boundary,nbins = 100):
binlist = []
for i in range(nbins):
binlist.append(l_boundary + i*(u_boundary-l_boundary)/nbins)
return binlist
return rlist
def histplot(self, array, binlist, xlab = 'Values',
ylab = 'Frequency', title = None, label=None,
fnout = None):
import matplotlib.pyplot as plt
plt.hist(array,bins = binlist)
plt.xlabel('Values')
plt.ylabel('Frequency')
if title:
title_str = 'Quantile distance quotient distribution'
if label:
title_str += ' (' + label + ')'
plt.title(title_str)
if fnout:
plt.savefig(fnout+'histplot.png')
else:
plt.show()
def getPDFDict(self, month, evt):
self.pdfdict = PDFDictionary.from_quakeml(os.path.join(self.directory,month,evt))
def getStatistics(self):
self.p_mean = self.p_stdarray.mean()
self.p_std_std = self.p_stdarray.std()
self.p_median = np.median(self.p_stdarray)
self.s_mean = self.s_stdarray.mean()
self.s_std_std = self.s_stdarray.std()
self.s_median = np.median(self.s_stdarray)
def writeThetaToFile(self,array,out_dir,filename = None):
fid = open(os.path.join(out_dir,filename), 'w')
def writeThetaToFile(self,array,out_dir):
"""
Method to write array like data to file. Useful since acquiring can take
serious amount of time when dealing with large databases.
:param array: List of values.
:type array: list
:param out_dir: Path to save file to including file name.
:type out_dir: str
:return: Saves a file at given output directory.
"""
fid = open(os.path.join(out_dir), 'w')
for val in array:
fid.write(str(val)+'\n')
fid.close()
def main():
root_dir ='/home/sebastianp/Codetesting/xmls/'
Insheim = PDFstatistics(root_dir)
Insheim.makeFileList()
Insheim.set_return_phase('p')
Insheim.getSTD()
qdlist = Insheim.getQDQ(0.3)
binlist = Insheim.getBinList(0.,3.)
Insheim.curphase = 'p'
qdlist = Insheim.get('qdf', 0.2)
print qdlist
if __name__ == "__main__":
main()
import cProfile
pr = cProfile.Profile()
pr.enable()
main()
pr.disable()
# after your program ends
pr.print_stats(sort="calls")

View File

@ -326,7 +326,7 @@ class ProbabilityDensityFunction(object):
raise ValueError('value out of bounds: {0}'.format(value))
return self.prob_limits((value, self.axis[-1]))
def prob_limits(self, limits, oversampling=10.):
def prob_limits(self, limits, oversampling=1.):
sampling = self.incr / oversampling
lim = np.arange(limits[0], limits[1], sampling)
data = self.data(lim)
@ -363,13 +363,40 @@ class ProbabilityDensityFunction(object):
return m
def quantile_distance(self, prob_value):
"""
takes a probability value and and returns the distance
between two complementary quantiles
.. math::
QA_\alpha = Q(1 - \alpha) - Q(\alpha)
:param value: probability value :math:\alpha
:type value: float
:return: quantile distance
"""
if 0 >= prob_value or prob_value >= 0.5:
raise ValueError('Value out of range.')
ql = self.quantile(prob_value)
qu = self.quantile(1 - prob_value)
return qu - ql
def qtile_dist_quot(self,x):
if x <= 0 or x >= 0.5:
def quantile_dist_frac(self, x):
"""
takes a probability value and returns the fraction of two
corresponding quantile distances (
:func:`pylot.core.util.pdf.ProbabilityDensityFunction
#quantile_distance`)
.. math::
Q\Theta_\alpha = \frac{QA(0.5 - \alpha)}{QA(\alpha)}
:param value: probability value :math:\alpha
:return: quantile distance fraction
"""
if x <= 0 or x >= 0.25:
raise ValueError('Value out of range.')
return self.quantile_distance(0.5-x)/self.quantile_distance(x)

View File

@ -0,0 +1,57 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
def create_bin_list(l_boundary, u_boundary, nbins=100):
"""
takes two boundaries and a number of bins and creates a list of bins for
histogram plotting
:param l_boundary: Any number.
:type l_boundary: float
:param u_boundary: Any number that is greater than l_boundary.
:type u_boundary: float
:param nbins: Any positive integer.
:type nbins: int
:return: A list of equidistant bins.
"""
if u_boundary <= l_boundary:
raise ValueError('Upper boundary must be greather than lower!')
elif nbins <= 0:
raise ValueError('Number of bins is not valid.')
binlist = []
for i in range(nbins):
binlist.append(l_boundary + i * (u_boundary - l_boundary) / nbins)
return binlist
def histplot(array, binlist, xlab='Values',
ylab='Frequency', title=None, fnout=None):
"""
function to quickly show some distribution of data. Takes array like data,
and a list of bins. Editing detail and inserting a legend is not possible.
:param array: List of values.
:type array: Array like
:param binlist: List of bins.
:type binlist: list
:param xlab: A label for the x-axes.
:type xlab: str
:param ylab: A label for the y-axes.
:type ylab: str
:param title: A title for the Plot.
:type title: str
:param fnout: A path to save the plot instead of showing.
Has to contain filename and type. Like: 'path/to/file.png'
:type fnout. str
:return: -
"""
plt.hist(array, bins=binlist)
plt.xlabel(xlab)
plt.ylabel(ylab)
if title:
plt.title(title)
if fnout:
plt.savefig(fnout)
else:
plt.show()

View File

@ -18,7 +18,6 @@ def _pickle_method(m):
return getattr, (m.im_self, m.im_func.func_name)
def fit_curve(x, y):
return splev, splrep(x, y)
def getindexbounds(f, eta):
@ -83,8 +82,8 @@ def clims(lim1, lim2):
def demeanTrace(trace, window):
"""
returns the DATA where each trace is demean by the average value within
WINDOW
takes a trace object and returns the same trace object but with data
demeaned within a certain time window
:param trace: waveform trace object
:type trace: `~obspy.core.stream.Trace`
:param window:
@ -101,31 +100,73 @@ def findComboBoxIndex(combo_box, val):
Function findComboBoxIndex takes a QComboBox object and a string and
returns either 0 or the index throughout all QComboBox items.
:param combo_box: Combo box object.
:type combo_box: QComboBox
:type combo_box: `~QComboBox`
:param val: Name of a combo box to search for.
:type val:
:type val: basestring
:return: index value of item with name val or 0
"""
return combo_box.findText(val) if combo_box.findText(val) is not -1 else 0
def find_in_list(list, str):
"""
takes a list of strings and a string and returns the first list item
matching the string pattern
:param list: list to search in
:param str: pattern to search for
:return: first list item containing pattern
.. example::
>>> l = ['/dir/e1234.123.12', '/dir/e2345.123.12', 'abc123', 'def456']
>>> find_in_list(l, 'dir')
'/dir/e1234.123.12'
>>> find_in_list(l, 'e1234')
'/dir/e1234.123.12'
>>> find_in_list(l, 'e2')
'/dir/e2345.123.12'
>>> find_in_list(l, 'ABC')
'abc123'
>>> find_in_list(l, 'f456')
'def456'
>>> find_in_list(l, 'gurke')
"""
rlist = [s for s in list if str.lower() in s.lower()]
if rlist:
return rlist[0]
return None
def find_nearest(array, value):
'''
Function find_nearest takes an array and a value and returns the
index of the nearest value found in the array.
:param array:
:param value:
:return:
function find_nearest takes an array and a value and returns the
index of the nearest value found in the array
:param array: array containing values
:type array: `~numpy.ndarray`
:param value: number searched for
:return: index of the array item being nearest to the value
>>> a = np.array([ 1.80339578, -0.72546654, 0.95769195, -0.98320759, 0.85922623])
>>> find_nearest(a, 1.3)
2
>>> find_nearest(a, 0)
1
>>> find_nearest(a, 2)
0
>>> find_nearest(a, -1)
3
>>> a = np.array([ 1.1, -0.7, 0.9, -0.9, 0.8])
>>> find_nearest(a, 0.849)
4
'''
return (np.abs(array - value)).argmin()
def fnConstructor(s):
'''
:param s:
:type s:
:return:
takes a string and returns a valid filename (especially on windows machines)
:param s: desired filename
:type s: str
:return: valid filename
'''
if type(s) is str:
s = s.split(':')[-1]
@ -143,7 +184,21 @@ def fnConstructor(s):
def four_digits(year):
if year + 2000 < UTCDateTime.utcnow().year:
"""
takes a two digit year integer and returns the correct four digit equivalent
from the last 100 years
:param year: two digit year
:type year: int
:return: four digit year correspondant
>>> four_digits(20)
1920
>>> four_digits(16)
2016
>>> four_digits(00)
2000
"""
if year + 2000 <= UTCDateTime.utcnow().year:
year += 2000
else:
year += 1900
@ -152,10 +207,11 @@ def four_digits(year):
def getGlobalTimes(stream):
'''
:param stream:
:type stream
:return:
takes a stream object and returns the latest end and the earliest start
time of all contained trace objects
:param stream: seismological data stream
:type stream: `~obspy.core.stream.Stream`
:return: minimum start time and maximum end time
'''
min_start = UTCDateTime()
max_end = None
@ -169,6 +225,8 @@ def getGlobalTimes(stream):
def getHash(time):
'''
takes a time object and returns the corresponding SHA1 hash of the
formatted date string
:param time: time object for which a hash should be calculated
:type time: :class: `~obspy.core.utcdatetime.UTCDateTime` object
:return: str
@ -180,32 +238,31 @@ def getHash(time):
def getLogin():
'''
:return:
returns the actual user's login ID
:return: login ID
'''
return pwd.getpwuid(os.getuid())[0]
def getOwner(fn):
'''
:param fn:
:type fn:
:return:
takes a filename and return the login ID of the actual owner of the file
:param fn: filename of the file tested
:type fn: str
:return: login ID of the file's owner
'''
return pwd.getpwuid(os.stat(fn).st_uid).pw_name
def getPatternLine(fn, pattern):
"""
Takes a file name and a pattern string to search for in the file and
returns the first line which contains the pattern string otherwise None.
takes a file name and a pattern string to search for in the file and
returns the first line which contains the pattern string otherwise 'None'
:param fn: file name
:type fn: str
:param pattern: pattern string to search for
:type pattern: str
:return: the complete line containing pattern or None
:return: the complete line containing the pattern string or None
>>> getPatternLine('utils.py', 'python')
'#!/usr/bin/env python\\n'
@ -223,22 +280,52 @@ def getPatternLine(fn, pattern):
def isSorted(iterable):
'''
:param iterable:
takes an iterable and returns 'True' if the items are in order otherwise
'False'
:param iterable: an iterable object
:type iterable:
:return:
:return: Boolean
>>> isSorted(1)
Traceback (most recent call last):
...
AssertionError: object is not iterable; object: 1
>>> isSorted([1,2,3,4])
True
>>> isSorted('abcd')
True
>>> isSorted('bcad')
False
>>> isSorted([2,3,1,4])
False
'''
assert isIterable(iterable), 'object is not iterable; object: {' \
'0}'.format(iterable)
if type(iterable) is str:
iterable = [s for s in iterable]
return sorted(iterable) == iterable
def isIterable(obj):
"""
takes a python object and returns 'True' is the object is iterable and
'False' otherwise
:param obj: a python object
:return: True of False
"""
try:
iterator = iter(obj)
except TypeError as te:
return False
return True
def prepTimeAxis(stime, trace):
'''
:param stime:
:type stime:
:param trace:
:type trace:
:return:
takes a starttime and a trace object and returns a valid time axis for
plotting
:param stime: start time of the actual seismogram as UTCDateTime
:param trace: seismic trace object
:return: valid numpy array with time stamps for plotting
'''
nsamp = trace.stats.npts
srate = trace.stats.sampling_rate
@ -294,7 +381,6 @@ def runProgram(cmd, parameter=None):
"""
run an external program specified by cmd with parameters input returning the
stdout output
:param cmd: name of the command to run
:type cmd: str
:param parameter: filename of parameter file or parameter string