[task] started to implement data processing step for checking corrupted GSE files
This commit is contained in:
parent
a2b0854345
commit
4a836fd1f7
118
pylot/core/util/dataprocessing.py
Normal file
118
pylot/core/util/dataprocessing.py
Normal file
@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import glob
|
||||
from obspy import UTCDateTime
|
||||
import sys
|
||||
|
||||
def time_from_header(header):
|
||||
'''
|
||||
Function takes in the second line from a .gse file and takes out the date and time from that line.
|
||||
:param header: second line from .gse file
|
||||
:type header: string
|
||||
:return: a list of integers of form [year, month, day, hour, minute, second, microsecond]
|
||||
'''
|
||||
timeline = header.split(' ')
|
||||
time = timeline[1].split('/') + timeline[2].split(':')
|
||||
time = time[:-1] + time[-1].split('.')
|
||||
return [int(t) for t in time]
|
||||
|
||||
def check_time(datetime):
|
||||
'''
|
||||
Function takes in date and time as list and validates it's values by trying to make an UTCDateTime object from it
|
||||
:param datetime: list of integers [year, month, day, hour, minute, second, microsecond]
|
||||
:type datetime: list
|
||||
:return: returns True if Values are in supposed range, returns False otherwise
|
||||
'''
|
||||
try:
|
||||
UTCDateTime(*datetime)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def get_file_list(root_dir):
|
||||
'''
|
||||
Function uses a directorie to get all the *.gse files from it.
|
||||
:param root_dir: a directorie leading to the .gse files
|
||||
:type root_dir: string
|
||||
:return: returns a list of filenames (without path to them)
|
||||
'''
|
||||
file_list = glob.glob1(root_dir, '*.gse')
|
||||
return file_list
|
||||
|
||||
def checks_station_second(datetime):
|
||||
'''
|
||||
Function uses the given list to check if the parameter 'second' is set to 60 by mistake
|
||||
and sets the time correctly if so. Can only correct time if no date change would be necessary.
|
||||
:param datetime: [year, month, day, hour, minute, second, microsecond]
|
||||
:return: returns the input with the correct value for second
|
||||
'''
|
||||
if datetime[5] == 60:
|
||||
if datetime[4] == 59:
|
||||
if datetime[3] == 23:
|
||||
print 'Date should be next day.'+file
|
||||
raise ValueError
|
||||
else:
|
||||
datetime[3] += 1
|
||||
datetime[4] = 0
|
||||
datetime[5] = 0
|
||||
else:
|
||||
datetime[4] += 1
|
||||
datetime[5] = 0
|
||||
return datetime
|
||||
|
||||
def make_time_line(line, datetime):
|
||||
'''
|
||||
Function takes in the original line from a .gse file and a list of date and time values to make a new line with
|
||||
corrected date and time.
|
||||
:param line: second line from .gse file.
|
||||
:type line: string
|
||||
:param datetime: list of integers [year, month, day, hour, minute, second, microsecond]
|
||||
:type datetime: list
|
||||
:return: returns a string to write it into a file.
|
||||
'''
|
||||
insertion = '{:02d}'.format(int(datetime[3])) + ':' + '{:02d}'.format(int(datetime[4])) + ':' + '{:02d}'.format(int(datetime[5])) + '.000'
|
||||
newline = line[:16]+insertion+line[28:]
|
||||
return newline
|
||||
|
||||
def evt_head_check(root_dir,out_dir = None):
|
||||
'''
|
||||
A function to make sure that an arbitrary number of .gse files have correct values in their header.
|
||||
:param root_dir: a directory leading to the .gse files.
|
||||
:type root_dir: string
|
||||
:param out_dir: a directory to store the new files somwhere els.
|
||||
:return: returns nothing
|
||||
'''
|
||||
if not out_dir:
|
||||
print 'WARNING files are going to be overwritten!'
|
||||
inp = str(raw_input('Continue? [y/n]'))
|
||||
if inp == 'y':
|
||||
pass
|
||||
else:
|
||||
sys.exit()
|
||||
Filelist = get_file_list(root_dir)
|
||||
debugcounter = 0
|
||||
for i in range(len(Filelist)):
|
||||
inFile = open(root_dir+'/'+Filelist[i], 'r')
|
||||
lines = inFile.readlines()
|
||||
datetime = time_from_header(lines[1])
|
||||
if check_time(datetime):
|
||||
continue
|
||||
else:
|
||||
debugcounter += 1
|
||||
datetime = checks_station_second(datetime)
|
||||
print 'writing ' + Filelist[i]
|
||||
timeline = make_time_line(lines[1],datetime)
|
||||
# write File
|
||||
lines[1] = timeline
|
||||
if not out_dir:
|
||||
out = open(root_dir+Filelist[i], 'w')
|
||||
out.writelines(lines)
|
||||
out.close()
|
||||
else:
|
||||
out = open(out_dir+Filelist[i], 'w')
|
||||
out.writelines(lines)
|
||||
out.close()
|
||||
inFile.close()
|
||||
print debugcounter
|
Loading…
Reference in New Issue
Block a user