[new] first use of Metadata class in autoPyLoT, largely increasing read performance using obspyDMT (single DATALESS-Seed files)

This commit is contained in:
Marcel Paffrath 2018-07-10 11:35:13 +02:00
parent b4516fb2cb
commit 3bc3d07793
4 changed files with 83 additions and 35 deletions

View File

@ -22,7 +22,7 @@ from pylot.core.analysis.magnitude import MomentMagnitude, LocalMagnitude
from pylot.core.io.data import Data
from pylot.core.io.inputs import PylotParameter
from pylot.core.pick.autopick import autopickevent, iteratepicker
from pylot.core.util.dataprocessing import restitute_data, read_metadata
from pylot.core.util.dataprocessing import restitute_data, read_metadata, Metadata
from pylot.core.util.defaults import SEPARATOR
from pylot.core.util.event import Event
from pylot.core.util.structure import DATASTRUCTURE
@ -276,7 +276,11 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
wfdat = check4gaps(wfdat)
wfdat = check4doubled(wfdat)
wfdat = trim_station_components(wfdat, trim_start=True, trim_end=False)
metadata = read_metadata(parameter.get('invdir'))
if not wfpath_extension:
metadata = Metadata(parameter.get('invdir'))
else:
metadata = Metadata(os.path.join(eventpath, 'resp'))
# metadata = read_metadata(parameter.get('invdir'))
# TODO: (idea) read metadata from obspy_dmt database
# if not wfpath_extension:
# metadata = read_metadata(parameter.get('invdir'))
@ -285,10 +289,10 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
corr_dat = None
if metadata:
# rotate stations to ZNE
wfdat = check4rotated(wfdat, metadata)
#wfdat = check4rotated(wfdat, metadata) # MP MP TEMPORARILY DISABLED !!!!!!!!!!!
if locflag:
print("Restitute data ...")
corr_dat = restitute_data(wfdat.copy(), *metadata, ncores=ncores)
corr_dat = restitute_data(wfdat.copy(), metadata, ncores=ncores)
if not corr_dat and locflag:
locflag = 2
print('Working on event %s. Stations: %s' % (eventpath, station))
@ -363,7 +367,8 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
WAscaling[2]))
evt = local_mag.updated_event(magscaling)
net_ml = local_mag.net_magnitude(magscaling)
print("Network local magnitude: %4.1f" % net_ml.mag)
if net_ml:
print("Network local magnitude: %4.1f" % net_ml.mag)
if magscaling == None:
scaling = False
elif magscaling[0] != 0 and magscaling[1] != 0:
@ -447,7 +452,8 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
WAscaling[2]))
evt = local_mag.updated_event(magscaling)
net_ml = local_mag.net_magnitude(magscaling)
print("Network local magnitude: %4.1f" % net_ml.mag)
if net_ml:
print("Network local magnitude: %4.1f" % net_ml.mag)
if magscaling == None:
scaling = False
elif magscaling[0] != 0 and magscaling[1] != 0:

View File

@ -88,9 +88,9 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
print('Autopickstation: Distribute autopicking for {} '
'stations on {} cores.'.format(len(input_tuples), ncores_str))
pool = gen_Pool(ncores)
result = pool.map(call_autopickstation, input_tuples)
pool.close()
result = parallel_picking(input_tuples, ncores)
#result = serial_picking(input_tuples)
for pick in result:
if pick:
@ -106,6 +106,20 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
return wadationsets
def serial_picking(input_tuples):
result = []
for input_tuple in input_tuples:
result.append(call_autopickstation(input_tuple))
return result
def parallel_picking(input_tuples, ncores):
pool = gen_Pool(ncores)
result = pool.imap_unordered(call_autopickstation, input_tuples)
pool.close()
return result
def call_autopickstation(input_tuple):
"""
helper function used for multiprocessing
@ -286,13 +300,10 @@ def autopickstation(wfstream, pickparam, verbose=False,
Lc = np.inf
print('autopickstation: use_taup flag active.')
if not metadata:
metadata = [None, None]
if not metadata[1]:
print('Warning: Could not use TauPy to estimate onsets as there are no metadata given.')
else:
station_id = wfstream[0].get_id()
parser = metadata[1]
station_coords = get_source_coords(parser, station_id)
station_coords = metadata.get_coordinates(station_id)
if station_coords and origin:
source_origin = origin[0]
model = TauPyModel(taup_model)

View File

@ -191,9 +191,24 @@ class AICPicker(AutoPicker):
# remove offset in AIC function
offset = abs(min(aic) - min(aicsmooth))
aicsmooth = aicsmooth - offset
cf = self.Data[0].data
# get maximum of HOS/AR-CF as startimg point for searching
# minimum in AIC function
icfmax = np.argmax(self.Data[0].data)
icfmax = np.argmax(cf)
# MP MP testing threshold
thresh_hit = False
thresh_factor = 0.6
thresh = thresh_factor * cf[icfmax]
for index, sample in enumerate(cf):
if sample >= thresh:
thresh_hit = True
# go on searching for the following maximum
if index > 0 and thresh_hit:
if sample <= cf[index - 1]:
icfmax = index - 1
break
# MP MP ---
# find minimum in AIC-CF front of maximum of HOS/AR-CF
lpickwindow = int(round(self.PickWindow / self.dt))
@ -233,14 +248,14 @@ class AICPicker(AutoPicker):
ii = min([isignal[len(isignal) - 1], len(self.Tcf)])
isignal = isignal[0:ii]
try:
self.Data[0].data[isignal]
cf[isignal]
except IndexError as e:
msg = "Time series out of bounds! {}".format(e)
print(msg)
return
# calculate SNR from CF
self.SNR = max(abs(self.Data[0].data[isignal])) / \
abs(np.mean(self.Data[0].data[inoise]))
self.SNR = max(abs(cf[isignal])) / \
abs(np.mean(cf[inoise]))
# calculate slope from CF after initial pick
# get slope window
tslope = self.TSNR[3] # slope determination window
@ -253,7 +268,7 @@ class AICPicker(AutoPicker):
# find maximum within slope determination window
# 'cause slope should be calculated up to first local minimum only!
try:
dataslope = self.Data[0].data[islope[0][0:-1]]
dataslope = cf[islope[0][0:-1]]
except IndexError:
print("Slope Calculation: empty array islope, check signal window")
return
@ -282,8 +297,8 @@ class AICPicker(AutoPicker):
else:
fig = self.fig
ax = fig.add_subplot(111)
x = self.Data[0].data
ax.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
cf = cf
ax.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
ax.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
ax.legend(loc=1)
ax.set_xlabel('Time [s] since %s' % self.Data[0].stats.starttime)
@ -296,7 +311,16 @@ class AICPicker(AutoPicker):
plt.close(fig)
return
iislope = islope[0][0:imax+1]
dataslope = self.Data[0].data[iislope]
# MP MP change slope calculation
# get all maxima of aicsmooth
iaicmaxima = argrelmax(aicsmooth)[0]
# get first index of maximum after pickindex (indices saved in iaicmaxima)
aicmax = iaicmaxima[np.where(iaicmaxima > pickindex)[0]]
if len(aicmax) > 0:
iaicmax = aicmax[0]
else:
iaicmax = -1
dataslope = aicsmooth[pickindex : iaicmax]
# calculate slope as polynomal fit of order 1
xslope = np.arange(0, len(dataslope), 1)
P = np.polyfit(xslope, dataslope, 1)
@ -306,7 +330,7 @@ class AICPicker(AutoPicker):
else:
self.slope = 1 / (len(dataslope) * self.Data[0].stats.delta) * (datafit[-1] - datafit[0])
# normalize slope to maximum of cf to make it unit independent
self.slope /= self.Data[0].data[icfmax]
self.slope /= aicsmooth[iaicmax]
else:
self.SNR = None
@ -320,10 +344,9 @@ class AICPicker(AutoPicker):
fig = self.fig
fig._tight = True
ax1 = fig.add_subplot(211)
x = self.Data[0].data
if len(self.Tcf) > len(self.Data[0].data): # why? LK
if len(self.Tcf) > len(cf): # why? LK
self.Tcf = self.Tcf[0:len(self.Tcf)-1]
ax1.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
ax1.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
ax1.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
if self.Pick is not None:
ax1.plot([self.Pick, self.Pick], [-0.1, 0.5], 'b', linewidth=2, label='AIC-Pick')
@ -333,7 +356,7 @@ class AICPicker(AutoPicker):
if self.Pick is not None:
ax2 = fig.add_subplot(2, 1, 2, sharex=ax1)
ax2.plot(self.Tcf, x, color=self._linecolor, linewidth=0.7, label='Data')
ax2.plot(self.Tcf, aicsmooth, color='r', linewidth=0.7, label='Data')
ax1.axvspan(self.Tcf[inoise[0]], self.Tcf[inoise[-1]], color='y', alpha=0.2, lw=0, label='Noise Window')
ax1.axvspan(self.Tcf[isignal[0]], self.Tcf[isignal[-1]], color='b', alpha=0.2, lw=0,
label='Signal Window')
@ -345,7 +368,7 @@ class AICPicker(AutoPicker):
label='Signal Window')
ax2.axvspan(self.Tcf[iislope[0]], self.Tcf[iislope[-1]], color='g', alpha=0.2, lw=0,
label='Slope Window')
ax2.plot(self.Tcf[iislope], datafit, 'g', linewidth=2, label='Slope')
ax2.plot(self.Tcf[pickindex : iaicmax], datafit, 'g', linewidth=2, label='Slope') # MP MP changed temporarily!
if self.slope is not None:
ax1.set_title('Station %s, SNR=%7.2f, Slope= %12.2f counts/s' % (self.Data[0].stats.station,

View File

@ -100,12 +100,13 @@ class Metadata(object):
if not seed_id in self.seed_ids.keys():
print('No data found for seed id {}. Trying to find it in all known inventories...'.format(seed_id))
self.read_all()
for inv_fname, metadata in self.inventory_files.items():
for inv_fname, metadata_dict in self.inventory_files.items():
# use get_coordinates to check for seed_id
try:
metadata['data'].get_coordinates(seed_id)
metadata_dict['data'].get_coordinates(seed_id)
self.seed_ids[seed_id] = inv_fname
return metadata
print('Found metadata for station {}!'.format(seed_id))
return metadata_dict
except Exception as e:
continue
print('Could not find metadata for station {}'.format(seed_id))
@ -127,6 +128,7 @@ class Metadata(object):
def read_single_file(self, inv_fname):
# try to read a single file as Parser/Inventory if it was not already read before
if not inv_fname in self.inventory_files.keys():
pass
else:
@ -460,6 +462,11 @@ def read_metadata(path_to_inventory):
def restitute_trace(input_tuple):
def no_metadata(tr, seed_id):
print('no metadata file found '
'for trace {0}'.format(seed_id))
return tr, True
tr, metadata, unit, force = input_tuple
remove_trace = False
@ -467,6 +474,9 @@ def restitute_trace(input_tuple):
seed_id = tr.get_id()
mdata = metadata.get_metadata(seed_id)
if not mdata:
return no_metadata(tr, seed_id)
invtype = mdata['invtype']
inobj = mdata['data']
@ -481,8 +491,7 @@ def restitute_trace(input_tuple):
if invtype == 'resp':
fresp = find_in_list(inobj, seed_id)
if not fresp:
raise IOError('no response file found '
'for trace {0}'.format(seed_id))
return no_metadata(tr, seed_id)
fname = fresp
seedresp = dict(filename=fname,
date=stime,
@ -505,8 +514,7 @@ def restitute_trace(input_tuple):
finv = invlist[0]
inventory = read_inventory(finv, format='STATIONXML')
elif invtype == None:
print("No restitution possible, as there are no station-meta data available!")
return tr, True
return no_metadata(tr, seed_id)
else:
remove_trace = True
# apply restitution to data
@ -562,7 +570,7 @@ def restitute_data(data, metadata, unit='VEL', force=False, ncores=0):
data.remove(tr)
pool = gen_Pool(ncores)
result = pool.map(restitute_trace, input_tuples)
result = pool.imap_unordered(restitute_trace, input_tuples)
pool.close()
for tr, remove_trace in result: