[new] first use of Metadata class in autoPyLoT, largely increasing read performance using obspyDMT (single DATALESS-Seed files)

2018-07-10 11:35:13 +02:00 · 2018-07-10 11:35:13 +02:00 · 3bc3d07793
commit 3bc3d07793
parent b4516fb2cb
4 changed files with 83 additions and 35 deletions
--- a/autoPyLoT.py
+++ b/autoPyLoT.py
@ -22,7 +22,7 @@ from pylot.core.analysis.magnitude import MomentMagnitude, LocalMagnitude
 from pylot.core.io.data import Data
 from pylot.core.io.inputs import PylotParameter
 from pylot.core.pick.autopick import autopickevent, iteratepicker
-from pylot.core.util.dataprocessing import restitute_data, read_metadata
+from pylot.core.util.dataprocessing import restitute_data, read_metadata, Metadata
 from pylot.core.util.defaults import SEPARATOR
 from pylot.core.util.event import Event
 from pylot.core.util.structure import DATASTRUCTURE
@ -276,7 +276,11 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
            wfdat = check4gaps(wfdat)
            wfdat = check4doubled(wfdat)
            wfdat = trim_station_components(wfdat, trim_start=True, trim_end=False)
-            metadata = read_metadata(parameter.get('invdir'))
+            if not wfpath_extension:
+                metadata = Metadata(parameter.get('invdir'))
+            else:
+                metadata = Metadata(os.path.join(eventpath, 'resp'))
+            # metadata = read_metadata(parameter.get('invdir'))
            # TODO: (idea) read metadata from obspy_dmt database
            # if not wfpath_extension:
            #     metadata = read_metadata(parameter.get('invdir'))
@ -285,10 +289,10 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
            corr_dat = None
            if metadata:
                # rotate stations to ZNE
-                wfdat = check4rotated(wfdat, metadata)
+                #wfdat = check4rotated(wfdat, metadata) # MP MP TEMPORARILY DISABLED !!!!!!!!!!!
                if locflag:
                    print("Restitute data ...")
-                    corr_dat = restitute_data(wfdat.copy(), *metadata, ncores=ncores)
+                    corr_dat = restitute_data(wfdat.copy(), metadata, ncores=ncores)
            if not corr_dat and locflag:
                locflag = 2
            print('Working on event %s. Stations: %s' % (eventpath, station))
@ -363,7 +367,8 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
                                                                       WAscaling[2]))
                        evt = local_mag.updated_event(magscaling)
                        net_ml = local_mag.net_magnitude(magscaling)
-                        print("Network local magnitude: %4.1f" % net_ml.mag)
+                        if net_ml:
+                            print("Network local magnitude: %4.1f" % net_ml.mag)
                        if magscaling == None:
                            scaling = False
                        elif magscaling[0] != 0 and magscaling[1] != 0:
@ -447,7 +452,8 @@ def autoPyLoT(input_dict=None, parameter=None, inputfile=None, fnames=None, even
                                                                           WAscaling[2]))
                            evt = local_mag.updated_event(magscaling)
                            net_ml = local_mag.net_magnitude(magscaling)
-                            print("Network local magnitude: %4.1f" % net_ml.mag)
+                            if net_ml:
+                                print("Network local magnitude: %4.1f" % net_ml.mag)
                            if magscaling == None:
                                scaling = False
                            elif magscaling[0] != 0 and magscaling[1] != 0:
--- a/pylot/core/pick/autopick.py
+++ b/pylot/core/pick/autopick.py
@ -88,9 +88,9 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
    print('Autopickstation: Distribute autopicking for {} '
          'stations on {} cores.'.format(len(input_tuples), ncores_str))

-    pool = gen_Pool(ncores)
-    result = pool.map(call_autopickstation, input_tuples)
-    pool.close()
+
+    result = parallel_picking(input_tuples, ncores)
+    #result = serial_picking(input_tuples)

    for pick in result:
        if pick:
@ -106,6 +106,20 @@ def autopickevent(data, param, iplot=0, fig_dict=None, fig_dict_wadatijack=None,
    return wadationsets


+def serial_picking(input_tuples):
+    result = []
+    for input_tuple in input_tuples:
+        result.append(call_autopickstation(input_tuple))
+    return result
+
+
+def parallel_picking(input_tuples, ncores):
+    pool = gen_Pool(ncores)
+    result = pool.imap_unordered(call_autopickstation, input_tuples)
+    pool.close()
+    return result
+
+
 def call_autopickstation(input_tuple):
    """
    helper function used for multiprocessing
@ -286,13 +300,10 @@ def autopickstation(wfstream, pickparam, verbose=False,
            Lc = np.inf
            print('autopickstation: use_taup flag active.')
            if not metadata:
-                metadata = [None, None]
-            if not metadata[1]:
                print('Warning: Could not use TauPy to estimate onsets as there are no metadata given.')
            else:
                station_id = wfstream[0].get_id()
-                parser = metadata[1]
-                station_coords = get_source_coords(parser, station_id)
+                station_coords = metadata.get_coordinates(station_id)
                if station_coords and origin:
                    source_origin = origin[0]
                    model = TauPyModel(taup_model)
--- a/pylot/core/pick/picker.py
+++ b/pylot/core/pick/picker.py
@ -191,9 +191,24 @@ class AICPicker(AutoPicker):
        # remove offset in AIC function
        offset = abs(min(aic) - min(aicsmooth))
        aicsmooth = aicsmooth - offset
+        cf = self.Data[0].data
        # get maximum of HOS/AR-CF as startimg point for searching
        # minimum in AIC function 
-        icfmax = np.argmax(self.Data[0].data)
+        icfmax = np.argmax(cf)
+
+        # MP MP testing threshold
+        thresh_hit = False
+        thresh_factor = 0.6
+        thresh = thresh_factor * cf[icfmax]
+        for index, sample in enumerate(cf):
+            if sample >= thresh:
+                thresh_hit = True
+            # go on searching for the following maximum
+            if index > 0 and thresh_hit:
+                if sample <= cf[index - 1]:
+                    icfmax = index - 1
+                    break
+        # MP MP ---

        # find minimum in AIC-CF front of maximum of HOS/AR-CF
        lpickwindow = int(round(self.PickWindow / self.dt))
@ -233,14 +248,14 @@ class AICPicker(AutoPicker):
            ii = min([isignal[len(isignal) - 1], len(self.Tcf)])
            isignal = isignal[0:ii]
            try:
-                self.Data[0].data[isignal]
+                cf[isignal]
            except IndexError as e:
                msg = "Time series out of bounds! {}".format(e)
                print(msg)
                return
            # calculate SNR from CF
-            self.SNR = max(abs(self.Data[0].data[isignal])) / \
-                       abs(np.mean(self.Data[0].data[inoise]))
+            self.SNR = max(abs(cf[isignal])) / \
+                       abs(np.mean(cf[inoise]))
            # calculate slope from CF after initial pick
            # get slope window
            tslope = self.TSNR[3]  # slope determination window
@ -253,7 +268,7 @@ class AICPicker(AutoPicker):
            # find maximum within slope determination window
            # 'cause slope should be calculated up to first local minimum only!
            try:
-                dataslope = self.Data[0].data[islope[0][0:-1]]
+                dataslope = cf[islope[0][0:-1]]
            except IndexError:
                print("Slope Calculation: empty array islope, check signal window")
                return
@ -282,8 +297,8 @@ class AICPicker(AutoPicker):
                        else:
                            fig = self.fig
                        ax = fig.add_subplot(111)
-                        x = self.Data[0].data
-                        ax.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
+                        cf = cf
+                        ax.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
                        ax.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
                        ax.legend(loc=1)
                        ax.set_xlabel('Time [s] since %s' % self.Data[0].stats.starttime)
@ -296,7 +311,16 @@ class AICPicker(AutoPicker):
                            plt.close(fig)
                    return
                iislope = islope[0][0:imax+1]
-            dataslope = self.Data[0].data[iislope]
+            # MP MP change slope calculation
+            # get all maxima of aicsmooth
+            iaicmaxima = argrelmax(aicsmooth)[0]
+            # get first index of maximum after pickindex (indices saved in iaicmaxima)
+            aicmax = iaicmaxima[np.where(iaicmaxima > pickindex)[0]]
+            if len(aicmax) > 0:
+                iaicmax = aicmax[0]
+            else:
+                iaicmax = -1
+            dataslope = aicsmooth[pickindex : iaicmax]
            # calculate slope as polynomal fit of order 1
            xslope = np.arange(0, len(dataslope), 1)
            P = np.polyfit(xslope, dataslope, 1)
@ -306,7 +330,7 @@ class AICPicker(AutoPicker):
            else:
                self.slope = 1 / (len(dataslope) * self.Data[0].stats.delta) * (datafit[-1] - datafit[0])
                # normalize slope to maximum of cf to make it unit independent
-                self.slope /= self.Data[0].data[icfmax]
+                self.slope /= aicsmooth[iaicmax]

        else:
            self.SNR = None
@ -320,10 +344,9 @@ class AICPicker(AutoPicker):
                fig = self.fig
            fig._tight = True
            ax1 = fig.add_subplot(211)
-            x = self.Data[0].data
-            if len(self.Tcf) > len(self.Data[0].data): # why? LK
+            if len(self.Tcf) > len(cf): # why? LK
                self.Tcf = self.Tcf[0:len(self.Tcf)-1]
-            ax1.plot(self.Tcf, x / max(x), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
+            ax1.plot(self.Tcf, cf / max(cf), color=self._linecolor, linewidth=0.7, label='(HOS-/AR-) Data')
            ax1.plot(self.Tcf, aicsmooth / max(aicsmooth), 'r', label='Smoothed AIC-CF')
            if self.Pick is not None:
                ax1.plot([self.Pick, self.Pick], [-0.1, 0.5], 'b', linewidth=2, label='AIC-Pick')
@ -333,7 +356,7 @@ class AICPicker(AutoPicker):

            if self.Pick is not None:
                ax2 = fig.add_subplot(2, 1, 2, sharex=ax1)
-                ax2.plot(self.Tcf, x, color=self._linecolor, linewidth=0.7, label='Data')
+                ax2.plot(self.Tcf, aicsmooth, color='r', linewidth=0.7, label='Data')
                ax1.axvspan(self.Tcf[inoise[0]], self.Tcf[inoise[-1]], color='y', alpha=0.2, lw=0, label='Noise Window')
                ax1.axvspan(self.Tcf[isignal[0]], self.Tcf[isignal[-1]], color='b', alpha=0.2, lw=0,
                            label='Signal Window')
@ -345,7 +368,7 @@ class AICPicker(AutoPicker):
                            label='Signal Window')
                ax2.axvspan(self.Tcf[iislope[0]], self.Tcf[iislope[-1]], color='g', alpha=0.2, lw=0,
                            label='Slope Window')
-                ax2.plot(self.Tcf[iislope], datafit, 'g', linewidth=2, label='Slope')
+                ax2.plot(self.Tcf[pickindex : iaicmax], datafit, 'g', linewidth=2, label='Slope') # MP MP changed temporarily!

                if self.slope is not None:
                    ax1.set_title('Station %s, SNR=%7.2f, Slope= %12.2f counts/s' % (self.Data[0].stats.station,
--- a/pylot/core/util/dataprocessing.py
+++ b/pylot/core/util/dataprocessing.py
@ -100,12 +100,13 @@ class Metadata(object):
        if not seed_id in self.seed_ids.keys():
            print('No data found for seed id {}. Trying to find it in all known inventories...'.format(seed_id))
            self.read_all()
-            for inv_fname, metadata in self.inventory_files.items():
+            for inv_fname, metadata_dict in self.inventory_files.items():
                # use get_coordinates to check for seed_id
                try:
-                    metadata['data'].get_coordinates(seed_id)
+                    metadata_dict['data'].get_coordinates(seed_id)
                    self.seed_ids[seed_id] = inv_fname
-                    return metadata
+                    print('Found metadata for station {}!'.format(seed_id))
+                    return metadata_dict
                except Exception as e:
                    continue
            print('Could not find metadata for station {}'.format(seed_id))
@ -127,6 +128,7 @@ class Metadata(object):


    def read_single_file(self, inv_fname):
+        # try to read a single file as Parser/Inventory if it was not already read before
        if not inv_fname in self.inventory_files.keys():
            pass
        else:
@ -460,6 +462,11 @@ def read_metadata(path_to_inventory):


 def restitute_trace(input_tuple):
+    def no_metadata(tr, seed_id):
+        print('no metadata file found '
+              'for trace {0}'.format(seed_id))
+        return tr, True
+
    tr, metadata, unit, force = input_tuple

    remove_trace = False
@ -467,6 +474,9 @@ def restitute_trace(input_tuple):
    seed_id = tr.get_id()

    mdata = metadata.get_metadata(seed_id)
+    if not mdata:
+        return no_metadata(tr, seed_id)
+
    invtype = mdata['invtype']
    inobj = mdata['data']

@ -481,8 +491,7 @@ def restitute_trace(input_tuple):
    if invtype == 'resp':
        fresp = find_in_list(inobj, seed_id)
        if not fresp:
-            raise IOError('no response file found '
-                          'for trace {0}'.format(seed_id))
+            return no_metadata(tr, seed_id)
        fname = fresp
        seedresp = dict(filename=fname,
                        date=stime,
@ -505,8 +514,7 @@ def restitute_trace(input_tuple):
            finv = invlist[0]
        inventory = read_inventory(finv, format='STATIONXML')
    elif invtype == None:
-        print("No restitution possible, as there are no station-meta data available!")
-        return tr, True
+        return no_metadata(tr, seed_id)
    else:
        remove_trace = True
    # apply restitution to data
@ -562,7 +570,7 @@ def restitute_data(data, metadata, unit='VEL', force=False, ncores=0):
        data.remove(tr)

    pool = gen_Pool(ncores)
-    result = pool.map(restitute_trace, input_tuples)
+    result = pool.imap_unordered(restitute_trace, input_tuples)
    pool.close()

    for tr, remove_trace in result: