From 15700b074d141c29493216ca74f9336b2f040a10 Mon Sep 17 00:00:00 2001
From: Sebastianw Wehling-Benatelli <sebastian.wehling@rub.de>
Date: Tue, 13 Sep 2016 12:00:37 +0200
Subject: [PATCH] [major, refs #200] major change for the magnitude estimation
 from GUI

restitution of waveform data has been moved to dataprocessing; the routines have been cleaned up and work in changed order now:
new function restitute_data is a wrapper function in order to restitute seismic data with the most popular kinds of station metadata
---
 pylot/core/pick/autopick.py       |   5 +-
 pylot/core/util/dataprocessing.py | 230 +++++++++++++++---------------
 pylot/core/util/utils.py          |  31 ++++
 3 files changed, 147 insertions(+), 119 deletions(-)

diff --git a/pylot/core/pick/autopick.py b/pylot/core/pick/autopick.py
index 70c7e956..da496bdf 100755
--- a/pylot/core/pick/autopick.py
+++ b/pylot/core/pick/autopick.py
@@ -568,7 +568,7 @@ def autopickstation(wfstream, pickparam, verbose=False):
                 [cordat, restflag] = restitute_data(h_copy, invdir)
                 # calculate WA-peak-to-peak amplitude
                 # using subclass WApp of superclass Magnitude
-                if restflag == 1:
+                if restflag:
                     if Sweight < 4:
                         wapp = WApp(cordat, mpickS, mpickP + sstop, iplot)
                     else:
@@ -578,6 +578,9 @@ def autopickstation(wfstream, pickparam, verbose=False):
                                     (0.5 * (mpickP + sstop)), iplot)
 
                     Ao = wapp.getwapp()
+                else:
+                    print("Go on processing data without source parameter "
+                          "determination!")
 
         else:
             msg = 'Bad initial (AIC) S-pick, skipping this onset!\n' \
diff --git a/pylot/core/util/dataprocessing.py b/pylot/core/util/dataprocessing.py
index d392c6db..fb739fab 100644
--- a/pylot/core/util/dataprocessing.py
+++ b/pylot/core/util/dataprocessing.py
@@ -3,9 +3,14 @@
 
 import os
 import glob
-from obspy import UTCDateTime, read_inventory
 import sys
+
+import numpy as np
+
+from obspy import UTCDateTime, read_inventory, read
 from obspy.io.xseed import Parser
+from pylot.core.util.utils import key_for_set_value, find_in_list, \
+    remove_underscores
 
 
 def time_from_header(header):
@@ -150,140 +155,129 @@ def evt_head_check(root_dir, out_dir = None):
     print(nfiles)
 
 
-def restitute_data(data, invdlpath):
+def restitute_data(data, path_to_inventory, unit='VEL', force=False):
+    """
+    takes a data stream and a path_to_inventory and returns the corrected
+    waveform data stream
+    :param data: seismic data stream
+    :param path_to_inventory: path to inventory folder or file
+    :param unit: unit to correct for (default: 'VEL')
+    :param force: force restitution for already corrected traces (default:
+    False)
+    :return: corrected data stream
     """
 
-    :param invdlpath:
-    :param data:
-    :return:
-    """
+    restflag = list()
 
-    restflag = False
+    data = remove_underscores(data)
 
-    for tr in data:
-        # remove underscores
-        if len(tr.stats.station) > 3 and tr.stats.station[3] == '_':
-            tr.stats.station = tr.stats.station[0:3]
     dlfile = list()
     invfile = list()
     respfile = list()
     inv = dict(dless=dlfile, xml=invfile, resp=respfile)
-    if os.path.isfile(invdlpath):
-        ext = os.path.splitext(invdlpath)[1].split('.')[1]
-        inv[ext] += [invdlpath]
+    if os.path.isfile(path_to_inventory):
+        ext = os.path.splitext(path_to_inventory)[1].split('.')[1]
+        inv[ext] += [path_to_inventory]
     else:
         for ext in inv.keys():
-            inv[ext] += glob.glob1(invdlpath,'*.{}'.format(ext))
+            inv[ext] += glob.glob1(path_to_inventory, '*.{0}'.format(ext))
 
-    # check for dataless-SEED file
-    # TODO ineffective loop -> better concatenate inventory entries and
-    # loop over traces
-    if len(dlfile) >= 1:
-        print("Found dataless-SEED file(s)!")
-        print("Reading meta data information ...")
-        for j in range(len(dlfile)):
-            print("Found dataless-SEED file %s" % dlfile[j])
-            parser = Parser('%s' % dlfile[j])
-            for i in range(len(data)):
-                # check, whether this trace has already been corrected
-                try:
-                    data[i].stats.processing
-                except:
-                    try:
-                        print(
-                            "Correcting %s, %s for instrument response "
-                            "..." % (data[i].stats.station,
-                                     data[i].stats.channel))
-                        # get corner frequencies for pre-filtering
-                        fny = data[i].stats.sampling_rate / 2
-                        fc21 = fny - (fny * 0.05)
-                        fc22 = fny - (fny * 0.02)
-                        prefilt = [0.5, 0.9, fc21, fc22]
-                        # instrument correction
-                        data[i].simulate(pre_filt=prefilt,
-                                       seedresp={'filename': parser,
-                                                 'date': data[
-                                                     i].stats.starttime,
-                                                 'units': "VEL"})
-                        restflag = True
-                    except ValueError as e:
-                        vmsg = '{0}'.format(e)
-                        print(vmsg)
-                else:
-                    print("Trace has already been corrected!")
-    # check for inventory-xml file
-    if len(invfile) >= 1:
-        print("Found inventory-xml file(s)!")
-        print("Reading meta data information ...")
-        for j in range(len(invfile)):
-            print("Found inventory-xml file %s" % invfile[j])
-            inv = read_inventory(invfile[j], format="STATIONXML")
-            for i in range(len(data)):
-                # check, whether this trace has already been corrected
-                try:
-                    data[i].stats.processing
-                except:
-                    try:
-                        print("Correcting %s, %s for instrument response "
-                              "..." % (data[i].stats.station,
-                                       data[i].stats.channel))
-                        # get corner frequencies for pre-filtering
-                        fny = data[i].stats.sampling_rate / 2
-                        fc21 = fny - (fny * 0.05)
-                        fc22 = fny - (fny * 0.02)
-                        prefilt = [0.5, 0.9, fc21, fc22]
-                        # instrument correction
-                        data[i].attach_response(inv)
-                        data[i].remove_response(output='VEL',
-                                              pre_filt=prefilt)
-                        restflag = True
-                    except ValueError as e:
-                        vmsg = '{0}'.format(e)
-                        print(vmsg)
-                else:
-                    print("Trace has already been corrected!")
-    # check for RESP-file
-    if len(respfile) >= 1:
-        print("Found response file(s)!")
-        print("Reading meta data information ...")
-        for j in range(len(respfile)):
-            print("Found RESP-file %s" % respfile[j])
-            for i in range(len(data)):
-                # check, whether this trace has already been corrected
-                try:
-                    data[i].stats.processing
-                except:
-                    try:
-                        print("Correcting %s, %s for instrument response "
-                              "..." % (data[i].stats.station,
-                                       data[i].stats.channel))
-                        # get corner frequencies for pre-filtering
-                        fny = data[i].stats.sampling_rate / 2
-                        fc21 = fny - (fny * 0.05)
-                        fc22 = fny - (fny * 0.02)
-                        prefilt = [0.5, 0.9, fc21, fc22]
-                        # instrument correction
-                        seedresp = {'filename': respfile[0],
-                                    'date': data[0].stats.starttime,
-                                    'units': "VEL"}
-                        data[i].simulate(paz_remove=None, pre_filt=prefilt,
-                                       seedresp=seedresp)
-                        restflag = True
-                    except ValueError as e:
-                        vmsg = '{0}'.format(e)
-                        print(vmsg)
-                else:
-                    print("Trace has already been corrected!")
+    invtype = key_for_set_value(inv)
 
-    if len(respfile) < 1 and len(invfile) < 1 and len(dlfile) < 1:
-        print("No dataless-SEED file,inventory-xml file nor RESP-file "
+    if invtype is None:
+        print("Neither dataless-SEED file,inventory-xml file nor RESP-file "
               "found!")
-        print("Go on processing data without source parameter "
-              "determination!")
+        return data, False
 
+    # loop over traces
+    for tr in data:
+        seed_id = tr.get_id()
+        # check, whether this trace has already been corrected
+        if 'processing' in tr.stats.keys() and not force:
+            print("Trace {0} has already been corrected!".format(seed_id))
+            continue
+        stime = tr.stats.starttime
+        seedresp = None
+        prefilt = get_prefilt(tr)
+        if invtype == 'resp':
+            fresp = find_in_list(inv[invtype], seed_id)
+            if not fresp:
+                raise IOError('no response file found '
+                              'for trace {0}'.format(seed_id))
+            fname = fresp
+            seedresp = dict(filename=fname,
+                            date=stime,
+                            units=unit)
+            kwargs = dict(paz_remove=None, pre_filt=prefilt, seedresp=seedresp)
+        elif invtype == 'dless':
+            if len(inv[invtype]) > 1:
+                fname = Parser(find_in_list(inv[invtype], seed_id))
+            else:
+                fname = Parser(inv[invtype][0])
+            seedresp = dict(filename=fname,
+                            date=stime,
+                            units=unit)
+            kwargs = dict(pre_filt=prefilt, seedresp=seedresp)
+        elif invtype == 'xml':
+            invlist = inv[invtype]
+            if len(invlist) > 1:
+                finv = find_in_list(invlist, seed_id)
+            else:
+                finv = invlist[0]
+            inventory = read_inventory(finv, format='STATIONXML')
+        # apply restitution to data
+        if invtype in ['resp', 'dless']:
+            tr.simulate(**kwargs)
+        else:
+            tr.attach_response(inventory)
+            tr.remove_response(output=unit,
+                               pre_filt=prefilt)
+        restflag.append(True)
+    # check if ALL traces could be restituted, take care of large datasets
+    # better try restitution for smaller subsets of data (e.g. station by
+    # station)
+    if len(restflag) > 0:
+        restflag = np.all(restflag)
+    else:
+        restflag = False
     return data, restflag
 
 
+def get_prefilt(trace, tlow=(0.5, 0.9), thi=(5., 2.), verbosity=0):
+    """
+    takes a `obspy.core.stream.Trace` object, taper parameters tlow and thi and
+    returns the pre-filtering corner frequencies for the cosine taper for
+    further processing
+    :param trace: seismic data trace
+    :type trace: `obspy.core.stream.Trace`
+    :param tlow: tuple or list containing the desired lower corner
+    frequenices for a cosine taper
+    :type tlow: tuple or list
+    :param thi: tuple or list containing the percentage values of the
+    Nyquist frequency for the desired upper corner frequencies of the cosine
+    taper
+    :type thi: tuple or list
+    :param verbosity: verbosity level
+    :type verbosity: int
+    :return: pre-filt cosine taper corner frequencies
+    :rtype: tuple
+
+    ..example::
+
+    >>> st = read()
+    >>> get_prefilt(st[0])
+    (0.5, 0.9, 47.5, 49.0)
+    """
+    if verbosity:
+        print("Calculating pre-filter values for %s, %s ..." % (
+            trace.stats.station, trace.stats.channel))
+    # get corner frequencies for pre-filtering
+    fny = trace.stats.sampling_rate / 2
+    fc21 = fny - (fny * thi[0]/100.)
+    fc22 = fny - (fny * thi[1]/100.)
+    return (tlow[0], tlow[1], fc21, fc22)
+
+
 if __name__ == "__main__":
     import doctest
 
diff --git a/pylot/core/util/utils.py b/pylot/core/util/utils.py
index 8aafbd9b..225b276e 100644
--- a/pylot/core/util/utils.py
+++ b/pylot/core/util/utils.py
@@ -328,6 +328,23 @@ def isIterable(obj):
         return False
     return True
 
+
+def key_for_set_value(d):
+    """
+    takes a dictionary and returns the first key for which's value the
+    boolean is True
+    :param d: dictionary containing values
+    :type d: dict
+    :return: key to the first non-False value found; None if no value's
+    boolean equals True
+    """
+    r = None
+    for k, v in d.items():
+        if v:
+            return k
+    return r
+
+
 def prepTimeAxis(stime, trace):
     '''
     takes a starttime and a trace object and returns a valid time axis for
@@ -354,6 +371,20 @@ def prepTimeAxis(stime, trace):
     return time_ax
 
 
+def remove_underscores(data):
+    """
+    takes a `obspy.core.stream.Stream` object and removes all underscores
+    from stationnames
+    :param data: stream of seismic data
+    :type data: `obspy.core.stream.Stream`
+    :return: data stream
+    """
+    for tr in data:
+        # remove underscores
+        tr.stats.station = tr.stats.station.strip('_')
+    return data
+
+
 def scaleWFData(data, factor=None, components='all'):
     """
     produce scaled waveforms from given waveform data and a scaling factor,