[task] started to implement data processing step for checking corrupted GSE files
This commit is contained in:
parent
a2b0854345
commit
4a836fd1f7
118
pylot/core/util/dataprocessing.py
Normal file
118
pylot/core/util/dataprocessing.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
from obspy import UTCDateTime
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def time_from_header(header):
|
||||||
|
'''
|
||||||
|
Function takes in the second line from a .gse file and takes out the date and time from that line.
|
||||||
|
:param header: second line from .gse file
|
||||||
|
:type header: string
|
||||||
|
:return: a list of integers of form [year, month, day, hour, minute, second, microsecond]
|
||||||
|
'''
|
||||||
|
timeline = header.split(' ')
|
||||||
|
time = timeline[1].split('/') + timeline[2].split(':')
|
||||||
|
time = time[:-1] + time[-1].split('.')
|
||||||
|
return [int(t) for t in time]
|
||||||
|
|
||||||
|
def check_time(datetime):
|
||||||
|
'''
|
||||||
|
Function takes in date and time as list and validates it's values by trying to make an UTCDateTime object from it
|
||||||
|
:param datetime: list of integers [year, month, day, hour, minute, second, microsecond]
|
||||||
|
:type datetime: list
|
||||||
|
:return: returns True if Values are in supposed range, returns False otherwise
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
UTCDateTime(*datetime)
|
||||||
|
return True
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_file_list(root_dir):
|
||||||
|
'''
|
||||||
|
Function uses a directorie to get all the *.gse files from it.
|
||||||
|
:param root_dir: a directorie leading to the .gse files
|
||||||
|
:type root_dir: string
|
||||||
|
:return: returns a list of filenames (without path to them)
|
||||||
|
'''
|
||||||
|
file_list = glob.glob1(root_dir, '*.gse')
|
||||||
|
return file_list
|
||||||
|
|
||||||
|
def checks_station_second(datetime):
|
||||||
|
'''
|
||||||
|
Function uses the given list to check if the parameter 'second' is set to 60 by mistake
|
||||||
|
and sets the time correctly if so. Can only correct time if no date change would be necessary.
|
||||||
|
:param datetime: [year, month, day, hour, minute, second, microsecond]
|
||||||
|
:return: returns the input with the correct value for second
|
||||||
|
'''
|
||||||
|
if datetime[5] == 60:
|
||||||
|
if datetime[4] == 59:
|
||||||
|
if datetime[3] == 23:
|
||||||
|
print 'Date should be next day.'+file
|
||||||
|
raise ValueError
|
||||||
|
else:
|
||||||
|
datetime[3] += 1
|
||||||
|
datetime[4] = 0
|
||||||
|
datetime[5] = 0
|
||||||
|
else:
|
||||||
|
datetime[4] += 1
|
||||||
|
datetime[5] = 0
|
||||||
|
return datetime
|
||||||
|
|
||||||
|
def make_time_line(line, datetime):
|
||||||
|
'''
|
||||||
|
Function takes in the original line from a .gse file and a list of date and time values to make a new line with
|
||||||
|
corrected date and time.
|
||||||
|
:param line: second line from .gse file.
|
||||||
|
:type line: string
|
||||||
|
:param datetime: list of integers [year, month, day, hour, minute, second, microsecond]
|
||||||
|
:type datetime: list
|
||||||
|
:return: returns a string to write it into a file.
|
||||||
|
'''
|
||||||
|
insertion = '{:02d}'.format(int(datetime[3])) + ':' + '{:02d}'.format(int(datetime[4])) + ':' + '{:02d}'.format(int(datetime[5])) + '.000'
|
||||||
|
newline = line[:16]+insertion+line[28:]
|
||||||
|
return newline
|
||||||
|
|
||||||
|
def evt_head_check(root_dir,out_dir = None):
|
||||||
|
'''
|
||||||
|
A function to make sure that an arbitrary number of .gse files have correct values in their header.
|
||||||
|
:param root_dir: a directory leading to the .gse files.
|
||||||
|
:type root_dir: string
|
||||||
|
:param out_dir: a directory to store the new files somwhere els.
|
||||||
|
:return: returns nothing
|
||||||
|
'''
|
||||||
|
if not out_dir:
|
||||||
|
print 'WARNING files are going to be overwritten!'
|
||||||
|
inp = str(raw_input('Continue? [y/n]'))
|
||||||
|
if inp == 'y':
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
sys.exit()
|
||||||
|
Filelist = get_file_list(root_dir)
|
||||||
|
debugcounter = 0
|
||||||
|
for i in range(len(Filelist)):
|
||||||
|
inFile = open(root_dir+'/'+Filelist[i], 'r')
|
||||||
|
lines = inFile.readlines()
|
||||||
|
datetime = time_from_header(lines[1])
|
||||||
|
if check_time(datetime):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
debugcounter += 1
|
||||||
|
datetime = checks_station_second(datetime)
|
||||||
|
print 'writing ' + Filelist[i]
|
||||||
|
timeline = make_time_line(lines[1],datetime)
|
||||||
|
# write File
|
||||||
|
lines[1] = timeline
|
||||||
|
if not out_dir:
|
||||||
|
out = open(root_dir+Filelist[i], 'w')
|
||||||
|
out.writelines(lines)
|
||||||
|
out.close()
|
||||||
|
else:
|
||||||
|
out = open(out_dir+Filelist[i], 'w')
|
||||||
|
out.writelines(lines)
|
||||||
|
out.close()
|
||||||
|
inFile.close()
|
||||||
|
print debugcounter
|
Loading…
Reference in New Issue
Block a user