[new] first use of Metadata class in autoPyLoT, largely increasing read performance using obspyDMT (single DATALESS-Seed files)

This commit is contained in:
Marcel Paffrath 2018-07-10 11:35:13 +02:00
parent b4516fb2cb
commit 3bc3d07793
4 changed files with 83 additions and 35 deletions

View File

@ -22,7 +22,7 @@ from pylot.core.analysis.magnitude import MomentMagnitude, LocalMagnitude
from pylot.core.io.data import Data from pylot.core.io.data import Data
from pylot.core.io.inputs import PylotParameter from pylot.core.io.inputs import PylotParameter
from pylot.core.pick.autopick import autopickevent, iteratepicker from pylot.core.pick.autopick import autopickevent, iteratepicker
from pylot.core.util.dataprocessing import restitute_data, read_metadata from pylot.core.util.dataprocessing import restitute_data, read_metadata, Metadata
from pylot.core.util.defaults import SEPARATOR from pylot.core.util.defaults import SEPARATOR
from pylot.core.util.event import Event from pylot.core.util.event import Event
from pylot.core.util.structure import DATASTRUCTURE from pylot.core.util.structure import DATASTRUCTURE
@ -276,7 +276,11 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
wfdat = check4gaps(wfdat) wfdat = check4gaps(wfdat)
wfdat = check4doubled(wfdat) wfdat = check4doubled(wfdat)
wfdat = trim_station_components(wfdat, trim_start=True, trim_end=False) wfdat = trim_station_components(wfdat, trim_start=True, trim_end=False)
metadata = read_metadata(parameter.get('invdir')) if not wfpath_extension:
metadata = Metadata(parameter.get('invdir'))
else:
metadata = Metadata(os.path.join(eventpath, 'resp'))
# metadata = read_metadata(parameter.get('invdir'))
# TODO: (idea) read metadata from obspy_dmt database # TODO: (idea) read metadata from obspy_dmt database
# if not wfpath_extension: # if not wfpath_extension:
# metadata = read_metadata(parameter.get('invdir')) # metadata = read_metadata(parameter.get('invdir'))
@ -285,10 +289,10 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
corr_dat = None corr_dat = None
if metadata: if metadata:
# rotate stations to ZNE # rotate stations to ZNE
wfdat = check4rotated(wfdat, metadata) #wfdat = check4rotated(wfdat, metadata) # MP MP TEMPORARILY DISABLED !!!!!!!!!!!
if locflag: if locflag:
print("Restitute data ...") print("Restitute data ...")
corr_dat = restitute_data(wfdat.copy(), *metadata, ncores=ncores) corr_dat = restitute_data(wfdat.copy(), metadata, ncores=ncores)
if not corr_dat and locflag: if not corr_dat and locflag:
locflag = 2 locflag = 2
print('Working on event %s. Stations: %s' % (eventpath, station)) print('Working on event %s. Stations: %s' % (eventpath, station))
@ -363,6 +367,7 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
WAscaling[2])) WAscaling[2]))
evt = local_mag.updated_event(magscaling) evt = local_mag.updated_event(magscaling)
net_ml = local_mag.net_magnitude(magscaling) net_ml = local_mag.net_magnitude(magscaling)
if net_ml:
print("Network local magnitude: %4.1f" % net_ml.mag) print("Network local magnitude: %4.1f" % net_ml.mag)
if magscaling == None: if magscaling == None:
scaling = False scaling = False
@ -447,6 +452,7 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
WAscaling[2])) WAscaling[2]))
evt = local_mag.updated_event(magscaling) evt = local_mag.updated_event(magscaling)
net_ml = local_mag.net_magnitude(magscaling) net_ml = local_mag.net_magnitude(magscaling)
if net_ml:
print("Network local magnitude: %4.1f" % net_ml.mag) print("Network local magnitude: %4.1f" % net_ml.mag)
if magscaling == None: if magscaling == None:
scaling = False scaling = False

View File

@ -88,9 +88,9 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
print('Autopickstation: Distribute autopicking for {} ' print('Autopickstation: Distribute autopicking for {} '
'stations on {} cores.'.format(len(input_tuples), ncores_str)) 'stations on {} cores.'.format(len(input_tuples), ncores_str))
pool = gen_Pool(ncores)
result = pool.map(call_autopickstation, input_tuples) result = parallel_picking(input_tuples, ncores)
pool.close() #result = serial_picking(input_tuples)
for pick in result: for pick in result:
if pick: if pick:
@ -106,6 +106,20 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
return wadationsets return wadationsets
def serial_picking(input_tuples):
result = []
for input_tuple in input_tuples:
result.append(call_autopickstation(input_tuple))
return result
def parallel_picking(input_tuples, ncores):
pool = gen_Pool(ncores)
result = pool.imap_unordered(call_autopickstation, input_tuples)
pool.close()
return result
def call_autopickstation(input_tuple): def call_autopickstation(input_tuple):
""" """
helper function used for multiprocessing helper function used for multiprocessing
@ -286,13 +300,10 @@ def autopickstation(wfstream, pickparam, verbose=False,
Lc = np.inf Lc = np.inf
print('autopickstation: use_taup flag active.') print('autopickstation: use_taup flag active.')
if not metadata: if not metadata:
metadata = [None, None]
if not metadata[1]:
print('Warning: Could not use TauPy to estimate onsets as there are no metadata given.') print('Warning: Could not use TauPy to estimate onsets as there are no metadata given.')
else: else:
station_id = wfstream[0].get_id() station_id = wfstream[0].get_id()
parser = metadata[1] station_coords = metadata.get_coordinates(station_id)
station_coords = get_source_coords(parser, station_id)
if station_coords and origin: if station_coords and origin:
source_origin = origin[0] source_origin = origin[0]
model = TauPyModel(taup_model) model = TauPyModel(taup_model)

View File

@ -191,9 +191,24 @@ class AICPicker(AutoPicker):
# remove offset in AIC function # remove offset in AIC function
offset = abs(min(aic) - min(aicsmooth)) offset = abs(min(aic) - min(aicsmooth))
aicsmooth = aicsmooth - offset aicsmooth = aicsmooth - offset
cf = self.Data[0].data
# get maximum of HOS/AR-CF as startimg point for searching # get maximum of HOS/AR-CF as startimg point for searching
# minimum in AIC function # minimum in AIC function
icfmax = np.argmax(self.Data[0].data) icfmax = np.argmax(cf)
# MP MP testing threshold
thresh_hit = False
thresh_factor = 0.6
thresh = thresh_factor * cf[icfmax]
for index, sample in enumerate(cf):
if sample >= thresh:
thresh_hit = True
# go on searching for the following maximum
if index > 0 and thresh_hit:
if sample <= cf[index - 1]:
icfmax = index - 1
break
# MP MP ---
# find minimum in AIC-CF front of maximum of HOS/AR-CF # find minimum in AIC-CF front of maximum of HOS/AR-CF
lpickwindow = int(round(self.PickWindow / self.dt)) lpickwindow = int(round(self.PickWindow / self.dt))
@ -233,14 +248,14 @@ class AICPicker(AutoPicker):
ii = min([isignal[len(isignal) - 1], len(self.Tcf)]) ii = min([isignal[len(isignal) - 1], len(self.Tcf)])
isignal = isignal[0:ii] isignal = isignal[0:ii]
try: try:
self.Data[0].data[isignal] cf[isignal]
except IndexError as e: except IndexError as e:
msg = "Time series out of bounds! {}".format(e) msg = "Time series out of bounds! {}".format(e)
print(msg) print(msg)
return return
# calculate SNR from CF # calculate SNR from CF
self.SNR = max(abs(self.Data[0].data[isignal])) / \ self.SNR = max(abs(cf[isignal])) / \
abs(np.mean(self.Data[0].data[inoise])) abs(np.mean(cf[inoise]))
# calculate slope from CF after initial pick # calculate slope from CF after initial pick
# get slope window # get slope window
tslope = self.TSNR[3] # slope determination window tslope = self.TSNR[3] # slope determination window
@ -253,7 +268,7 @@ class AICPicker(AutoPicker):
# find maximum within slope determination window # find maximum within slope determination window
# 'cause slope should be calculated up to first local minimum only! # 'cause slope should be calculated up to first local minimum only!
try: try:
dataslope = self.Data[0].data[islope[0][0:-1]] dataslope = cf[islope[0][0:-1]]
except IndexError: except IndexError:
print("Slope Calculation: empty array islope, check signal window") print("Slope Calculation: empty array islope, check signal window")
return return
@ -282,8 +297,8 @@ class AICPicker(AutoPicker):
else: else:
fig = self.fig fig = self.fig
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
x = self.Data[0].data cf = cf
ax.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data') ax.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
ax.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF') ax.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
ax.legend(loc=1) ax.legend(loc=1)
ax.set_xlabel('Time [s] since %s' % self.Data[0].stats.starttime) ax.set_xlabel('Time [s] since %s' % self.Data[0].stats.starttime)
@ -296,7 +311,16 @@ class AICPicker(AutoPicker):
plt.close(fig) plt.close(fig)
return return
iislope = islope[0][0:imax+1] iislope = islope[0][0:imax+1]
dataslope = self.Data[0].data[iislope] # MP MP change slope calculation
# get all maxima of aicsmooth
iaicmaxima = argrelmax(aicsmooth)[0]
# get first index of maximum after pickindex (indices saved in iaicmaxima)
aicmax = iaicmaxima[np.where(iaicmaxima > pickindex)[0]]
if len(aicmax) > 0:
iaicmax = aicmax[0]
else:
iaicmax = -1
dataslope = aicsmooth[pickindex : iaicmax]
# calculate slope as polynomal fit of order 1 # calculate slope as polynomal fit of order 1
xslope = np.arange(0, len(dataslope), 1) xslope = np.arange(0, len(dataslope), 1)
P = np.polyfit(xslope, dataslope, 1) P = np.polyfit(xslope, dataslope, 1)
@ -306,7 +330,7 @@ class AICPicker(AutoPicker):
else: else:
self.slope = 1 / (len(dataslope) * self.Data[0].stats.delta) * (datafit[-1] - datafit[0]) self.slope = 1 / (len(dataslope) * self.Data[0].stats.delta) * (datafit[-1] - datafit[0])
# normalize slope to maximum of cf to make it unit independent # normalize slope to maximum of cf to make it unit independent
self.slope /= self.Data[0].data[icfmax] self.slope /= aicsmooth[iaicmax]
else: else:
self.SNR = None self.SNR = None
@ -320,10 +344,9 @@ class AICPicker(AutoPicker):
fig = self.fig fig = self.fig
fig._tight = True fig._tight = True
ax1 = fig.add_subplot(211) ax1 = fig.add_subplot(211)
x = self.Data[0].data if len(self.Tcf) > len(cf): # why? LK
if len(self.Tcf) > len(self.Data[0].data): # why? LK
self.Tcf = self.Tcf[0:len(self.Tcf)-1] self.Tcf = self.Tcf[0:len(self.Tcf)-1]
ax1.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data') ax1.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
ax1.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF') ax1.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
if self.Pick is not None: if self.Pick is not None:
ax1.plot([self.Pick, self.Pick], [-0.1, 0.5], 'b', linewidth=2, label='AIC-Pick') ax1.plot([self.Pick, self.Pick], [-0.1, 0.5], 'b', linewidth=2, label='AIC-Pick')
@ -333,7 +356,7 @@ class AICPicker(AutoPicker):
if self.Pick is not None: if self.Pick is not None:
ax2 = fig.add_subplot(2, 1, 2, sharex=ax1) ax2 = fig.add_subplot(2, 1, 2, sharex=ax1)
ax2.plot(self.Tcf, x, color=self._linecolor, linewidth=0.7, label='Data') ax2.plot(self.Tcf, aicsmooth, color='r', linewidth=0.7, label='Data')
ax1.axvspan(self.Tcf[inoise[0]], self.Tcf[inoise[-1]], color='y', alpha=0.2, lw=0, label='Noise Window') ax1.axvspan(self.Tcf[inoise[0]], self.Tcf[inoise[-1]], color='y', alpha=0.2, lw=0, label='Noise Window')
ax1.axvspan(self.Tcf[isignal[0]], self.Tcf[isignal[-1]], color='b', alpha=0.2, lw=0, ax1.axvspan(self.Tcf[isignal[0]], self.Tcf[isignal[-1]], color='b', alpha=0.2, lw=0,
label='Signal Window') label='Signal Window')
@ -345,7 +368,7 @@ class AICPicker(AutoPicker):
label='Signal Window') label='Signal Window')
ax2.axvspan(self.Tcf[iislope[0]], self.Tcf[iislope[-1]], color='g', alpha=0.2, lw=0, ax2.axvspan(self.Tcf[iislope[0]], self.Tcf[iislope[-1]], color='g', alpha=0.2, lw=0,
label='Slope Window') label='Slope Window')
ax2.plot(self.Tcf[iislope], datafit, 'g', linewidth=2, label='Slope') ax2.plot(self.Tcf[pickindex : iaicmax], datafit, 'g', linewidth=2, label='Slope') # MP MP changed temporarily!
if self.slope is not None: if self.slope is not None:
ax1.set_title('Station %s, SNR=%7.2f, Slope= %12.2f counts/s' % (self.Data[0].stats.station, ax1.set_title('Station %s, SNR=%7.2f, Slope= %12.2f counts/s' % (self.Data[0].stats.station,

View File

@ -100,12 +100,13 @@ class Metadata(object):
if not seed_id in self.seed_ids.keys(): if not seed_id in self.seed_ids.keys():
print('No data found for seed id {}. Trying to find it in all known inventories...'.format(seed_id)) print('No data found for seed id {}. Trying to find it in all known inventories...'.format(seed_id))
self.read_all() self.read_all()
for inv_fname, metadata in self.inventory_files.items(): for inv_fname, metadata_dict in self.inventory_files.items():
# use get_coordinates to check for seed_id # use get_coordinates to check for seed_id
try: try:
metadata['data'].get_coordinates(seed_id) metadata_dict['data'].get_coordinates(seed_id)
self.seed_ids[seed_id] = inv_fname self.seed_ids[seed_id] = inv_fname
return metadata print('Found metadata for station {}!'.format(seed_id))
return metadata_dict
except Exception as e: except Exception as e:
continue continue
print('Could not find metadata for station {}'.format(seed_id)) print('Could not find metadata for station {}'.format(seed_id))
@ -127,6 +128,7 @@ class Metadata(object):
def read_single_file(self, inv_fname): def read_single_file(self, inv_fname):
# try to read a single file as Parser/Inventory if it was not already read before
if not inv_fname in self.inventory_files.keys(): if not inv_fname in self.inventory_files.keys():
pass pass
else: else:
@ -460,6 +462,11 @@ def read_metadata(path_to_inventory):
def restitute_trace(input_tuple): def restitute_trace(input_tuple):
def no_metadata(tr, seed_id):
print('no metadata file found '
'for trace {0}'.format(seed_id))
return tr, True
tr, metadata, unit, force = input_tuple tr, metadata, unit, force = input_tuple
remove_trace = False remove_trace = False
@ -467,6 +474,9 @@ def restitute_trace(input_tuple):
seed_id = tr.get_id() seed_id = tr.get_id()
mdata = metadata.get_metadata(seed_id) mdata = metadata.get_metadata(seed_id)
if not mdata:
return no_metadata(tr, seed_id)
invtype = mdata['invtype'] invtype = mdata['invtype']
inobj = mdata['data'] inobj = mdata['data']
@ -481,8 +491,7 @@ def restitute_trace(input_tuple):
if invtype == 'resp': if invtype == 'resp':
fresp = find_in_list(inobj, seed_id) fresp = find_in_list(inobj, seed_id)
if not fresp: if not fresp:
raise IOError('no response file found ' return no_metadata(tr, seed_id)
'for trace {0}'.format(seed_id))
fname = fresp fname = fresp
seedresp = dict(filename=fname, seedresp = dict(filename=fname,
date=stime, date=stime,
@ -505,8 +514,7 @@ def restitute_trace(input_tuple):
finv = invlist[0] finv = invlist[0]
inventory = read_inventory(finv, format='STATIONXML') inventory = read_inventory(finv, format='STATIONXML')
elif invtype == None: elif invtype == None:
print("No restitution possible, as there are no station-meta data available!") return no_metadata(tr, seed_id)
return tr, True
else: else:
remove_trace = True remove_trace = True
# apply restitution to data # apply restitution to data
@ -562,7 +570,7 @@ def restitute_data(data, metadata, unit='VEL', force=False, ncores=0):
data.remove(tr) data.remove(tr)
pool = gen_Pool(ncores) pool = gen_Pool(ncores)
result = pool.map(restitute_trace, input_tuples) result = pool.imap_unordered(restitute_trace, input_tuples)
pool.close() pool.close()
for tr, remove_trace in result: for tr, remove_trace in result: