WIP: Simplify data structure #39

Draft
sebastianw wants to merge 31 commits from 38-simplify-data-structure into develop
Showing only changes of commit c0c3cbbd7b - Show all commits

View File

@ -338,19 +338,6 @@ class Metadata(object):
return inv, exc return inv, exc
def time_from_header(header):
"""
Function takes in the second line from a .gse file and takes out the date and time from that line.
:param header: second line from .gse file
:type header: string
:return: a list of integers of form [year, month, day, hour, minute, second, microsecond]
"""
timeline = header.split(' ')
time = timeline[1].split('/') + timeline[2].split(':')
time = time[:-1] + time[-1].split('.')
return [int(t) for t in time]
def check_time(datetime): def check_time(datetime):
""" """
Function takes in date and time as list and validates it's values by trying to make an UTCDateTime object from it Function takes in date and time as list and validates it's values by trying to make an UTCDateTime object from it
@ -388,167 +375,6 @@ def check_time(datetime):
return False return False
# TODO: change root to datapath
def get_file_list(root_dir):
"""
Function uses a directorie to get all the *.gse files from it.
:param root_dir: a directorie leading to the .gse files
:type root_dir: string
:return: returns a list of filenames (without path to them)
"""
file_list = glob.glob1(root_dir, '*.gse')
return file_list
def checks_station_second(datetime, file):
"""
Function uses the given list to check if the parameter 'second' is set to 60 by mistake
and sets the time correctly if so. Can only correct time if no date change would be necessary.
:param datetime: [year, month, day, hour, minute, second, microsecond]
:return: returns the input with the correct value for second
"""
if datetime[5] == 60:
if datetime[4] == 59:
if datetime[3] == 23:
err_msg = 'Date should be next day. ' \
'File not changed: {0}'.format(file)
raise ValueError(err_msg)
else:
datetime[3] += 1
datetime[4] = 0
datetime[5] = 0
else:
datetime[4] += 1
datetime[5] = 0
return datetime
def make_time_line(line, datetime):
"""
Function takes in the original line from a .gse file and a list of date and
time values to make a new line with corrected date and time.
:param line: second line from .gse file.
:type line: string
:param datetime: list of integers [year, month, day, hour, minute, second, microsecond]
:type datetime: list
:return: returns a string to write it into a file.
"""
ins_form = '{0:02d}:{1:02d}:{2:02d}.{3:03d}'
insertion = ins_form.format(int(datetime[3]),
int(datetime[4]),
int(datetime[5]),
int(datetime[6] * 1e-3))
newline = line[:16] + insertion + line[28:]
return newline
def evt_head_check(root_dir, out_dir=None):
"""
A function to make sure that an arbitrary number of .gse files have correct values in their header.
:param root_dir: a directory leading to the .gse files.
:type root_dir: string
:param out_dir: a directory to store the new files somwhere els.
:return: returns nothing
"""
if not out_dir:
print('WARNING files are going to be overwritten!')
inp = str(input('Continue? [y/N]'))
if not inp == 'y':
sys.exit()
filelist = get_file_list(root_dir)
nfiles = 0
for file in filelist:
infile = open(os.path.join(root_dir, file), 'r')
lines = infile.readlines()
infile.close()
datetime = time_from_header(lines[1])
if check_time(datetime):
continue
else:
nfiles += 1
datetime = checks_station_second(datetime, file)
print('writing ' + file)
# write File
lines[1] = make_time_line(lines[1], datetime)
if not out_dir:
out = open(os.path.join(root_dir, file), 'w')
out.writelines(lines)
out.close()
else:
out = open(os.path.join(out_dir, file), 'w')
out.writelines(lines)
out.close()
print(nfiles)
def read_metadata(path_to_inventory):
"""
take path_to_inventory and return either the corresponding list of files
found or the Parser object for a network dataless seed volume to prevent
read overhead for large dataless seed volumes
:param path_to_inventory:
:return: tuple containing a either list of files or `obspy.io.xseed.Parser`
object and the inventory type found
:rtype: tuple
"""
dlfile = list()
invfile = list()
respfile = list()
# possible file extensions specified here:
inv = dict(dless=dlfile, xml=invfile, resp=respfile, dseed=dlfile[:])
if os.path.isfile(path_to_inventory):
ext = os.path.splitext(path_to_inventory)[1].split('.')[1]
inv[ext] += [path_to_inventory]
else:
for ext in inv.keys():
inv[ext] += glob.glob1(path_to_inventory, '*.{0}'.format(ext))
invtype = key_for_set_value(inv)
if invtype is None:
print("Neither dataless-SEED file, inventory-xml file nor "
"RESP-file found!")
print("!!WRONG CALCULATION OF SOURCE PARAMETERS!!")
robj = None,
elif invtype == 'dless': # prevent multiple read of large dlsv
print("Reading metadata information from dataless-SEED file ...")
if len(inv[invtype]) == 1:
fullpath_inv = os.path.join(path_to_inventory, inv[invtype][0])
robj = Parser(fullpath_inv)
else:
robj = inv[invtype]
else:
print("Reading metadata information from inventory-xml file ...")
robj = read_inventory(inv[invtype])
return invtype, robj
# idea to optimize read_metadata
# def read_metadata_new(path_to_inventory):
# metadata_objects = []
# # read multiple files from directory
# if os.path.isdir(path_to_inventory):
# fnames = os.listdir(path_to_inventory)
# # read single file
# elif os.path.isfile(path_to_inventory):
# fnames = [path_to_inventory]
# else:
# print("Neither dataless-SEED file, inventory-xml file nor "
# "RESP-file found!")
# print("!!WRONG CALCULATION OF SOURCE PARAMETERS!!")
# fnames = []
#
# for fname in fnames:
# path_to_inventory_filename = os.path.join(path_to_inventory, fname)
# try:
# ftype, robj = read_metadata_file(path_to_inventory_filename)
# metadata_objects.append((ftype, robj))
# except Exception as e:
# print('Could not read metadata file {} '
# 'because of the following Exception: {}'.format(path_to_inventory_filename, e))
# return metadata_objects
def restitute_trace(input_tuple): def restitute_trace(input_tuple):
def no_metadata(tr, seed_id): def no_metadata(tr, seed_id):
print('no metadata file found ' print('no metadata file found '