"""
This module is the main SpRIT module that contains all the functions needed to run HVSR analysis.
The functions defined here are read both by the SpRIT graphical user interface and by the command-line interface to run HVSR analysis on input data.
See documentation for individual functions for more information.
"""
import base64
import copy
import datetime
import gzip
import inspect
import io
import json
import math
import operator
import os
import pathlib
import pickle
import importlib
import re
import struct
import sys
import tempfile
import traceback
import warnings
import webbrowser
import xml.etree.ElementTree as ET
import zoneinfo
import kaleido
import matplotlib
from matplotlib.backend_bases import MouseButton
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import obspy
from obspy.signal import PPSD
import pandas as pd
import plotly
from pyproj import CRS, Transformer
import scipy
from scipy.spatial.distance import squareform, pdist
from xhtml2pdf import pisa
try: # For distribution
from sprit import sprit_utils
from sprit import sprit_jupyter_UI
from sprit import sprit_plot
except Exception: # For testing
import sprit_utils
import sprit_jupyter_UI
import sprit_plot
# Constants, etc
NOWTIME = datetime.datetime.now()
DEFAULT_PLOT_STR = "HVSR p ann COMP+ p ann SPEC p ann"
OBSPY_FORMATS = ['AH', 'ALSEP_PSE', 'ALSEP_WTH', 'ALSEP_WTN', 'CSS', 'DMX',
'GCF', 'GSE1', 'GSE2', 'KINEMETRICS_EVT', 'KNET', 'MSEED',
'NNSA_KB_CORE', 'PDAS', 'PICKLE', 'Q', 'REFTEK130', 'RG16',
'SAC', 'SACXY', 'SEG2', 'SEGY', 'SEISAN', 'SH_ASC', 'SLIST', 'TRC',
'SU', 'TSPAIR', 'WAV', 'WIN', 'Y']
DEFAULT_BAND = [0.5, 40]
PLOT_KEYS = ["Input_Plot", "Outlier_Plot", "Plot_Report", "Depth_Plot", "Plot_Report"]
# Resources directory path, and the other paths as well
RESOURCE_DIR = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources')
SAMPLE_DATA_DIR = RESOURCE_DIR.joinpath('sample_data')
SETTINGS_DIR = RESOURCE_DIR.joinpath('settings')
global spritApp
# Predefined variables
max_rank = 0
global do_run
do_run = False
sampleListNos = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
SAMPLE_LIST = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'batch', 'sample', 'sample_batch']
for s in sampleListNos:
SAMPLE_LIST.append(f'sample{s}')
SAMPLE_LIST.append(f'sample_{s}')
sampleFileKeyMap = {'1':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite1_AM.RAC84.00.2023.046_2023-02-15_1704-1734.MSEED'),
'2':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite2_AM.RAC84.00.2023-02-15_2132-2200.MSEED'),
'3':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite3_AM.RAC84.00.2023.199_2023-07-18_1432-1455.MSEED'),
'4':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite4_AM.RAC84.00.2023.199_2023-07-18_1609-1629.MSEED'),
'5':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite5_AM.RAC84.00.2023.199_2023-07-18_2039-2100.MSEED'),
'6':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite6_AM.RAC84.00.2023.192_2023-07-11_1510-1528.MSEED'),
'7':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite7_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'8':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite8_BNE_6_AM.RAC84.00.2023.191_2023-07-10_1806-1825.MSEED'),
'9':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite9_BNE-2_AM.RAC84.00.2023.192_2023-07-11_0000-0011.MSEED'),
'10':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite10_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'sample1':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite1_AM.RAC84.00.2023.046_2023-02-15_1704-1734.MSEED'),
'sample2':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite2_AM.RAC84.00.2023-02-15_2132-2200.MSEED'),
'sample3':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite3_AM.RAC84.00.2023.199_2023-07-18_1432-1455.MSEED'),
'sample4':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite4_AM.RAC84.00.2023.199_2023-07-18_1609-1629.MSEED'),
'sample5':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite5_AM.RAC84.00.2023.199_2023-07-18_2039-2100.MSEED'),
'sample6':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite6_AM.RAC84.00.2023.192_2023-07-11_1510-1528.MSEED'),
'sample7':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite7_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'sample8':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite8_BNE_6_AM.RAC84.00.2023.191_2023-07-10_1806-1825.MSEED'),
'sample9':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite9_BNE-2_AM.RAC84.00.2023.192_2023-07-11_0000-0011.MSEED'),
'sample10':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite10_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'sample_1':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite1_AM.RAC84.00.2023.046_2023-02-15_1704-1734.MSEED'),
'sample_2':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite2_AM.RAC84.00.2023-02-15_2132-2200.MSEED'),
'sample_3':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite3_AM.RAC84.00.2023.199_2023-07-18_1432-1455.MSEED'),
'sample_4':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite4_AM.RAC84.00.2023.199_2023-07-18_1609-1629.MSEED'),
'sample_5':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite5_AM.RAC84.00.2023.199_2023-07-18_2039-2100.MSEED'),
'sample_6':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite6_AM.RAC84.00.2023.192_2023-07-11_1510-1528.MSEED'),
'sample_7':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite7_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'sample_8':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite8_BNE_6_AM.RAC84.00.2023.191_2023-07-10_1806-1825.MSEED'),
'sample_9':SAMPLE_DATA_DIR.joinpath('SampleHVSRSite9_BNE-2_AM.RAC84.00.2023.192_2023-07-11_0000-0011.MSEED'),
'sample_10': SAMPLE_DATA_DIR.joinpath('SampleHVSRSite10_BNE_4_AM.RAC84.00.2023.191_2023-07-10_2237-2259.MSEED'),
'batch': SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv'),
'sample_batch': SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv')}
# CLASSES
# Check if the data is already the right class
# Define a decorator that wraps the __init__ method
[docs]
def check_instance(init):
def wrapper(self, *args, **kwargs):
# Check if the first argument is an instance of self.__class__
if args and isinstance(args[0], self.__class__):
# Copy its attributes to self
self.__dict__.update(args[0].__dict__)
else:
# Call the original __init__ method
init(self, *args, **kwargs)
return wrapper
# Class for batch data
[docs]
class HVSRBatch:
"""HVSRBatch is the data container used for batch processing.
It contains several HVSRData objects (one for each site).
These can be accessed using their site name,
either square brackets (HVSRBatchVariable["SiteName"]) or the dot (HVSRBatchVariable.SiteName) accessor.
The dot accessor may not work if there is a space in the site name.
All of the functions in the sprit package are designed to perform the bulk of their operations iteratively
on the individual HVSRData objects contained in the HVSRBatch object, and do little with the HVSRBatch object itself,
besides using it determine which sites are contained within it.
"""
@check_instance
def __init__(self, batch_input, batch_ext=None, batch_use=None, df_as_read=None):
"""HVSR Batch initializer
Parameters
----------
batch_input : dict, list, tuple, HVSRData, or filepath(s)
If:
* dict, dictionary containing Key value pairs with {sitename: HVSRData object}.
* list or tuple, assumed to be dicts, HVSRData objects, or filepaths to processed .hvsr files or seismic data to be processed.
* HVSRData object, will transform into HVSRBatch object with single HVSRData object. The add() or append() methods, or using square brackes can be used to add additional sites.
* filepaths, if:
* If directory, will use `batch_ext` as the input to a `glob()` function to get all files in that directory and add them to batch. Defaults to '.hvsr' files if `batch_ext` not specified.
* Filepath, will make a HVSRBatch object importing that single file, or if readable by pandas.read_csv() will use in conjunction with `batch_use` (see below)
batch_ext : str or None
Filepath extension to use in `glob()` function for filetypes to import, if batch_input is a filepath.
batch_use : {dict, list, tuple, None}
Intended to be used as dict with keys "site", "filepath", and "batch".
In this case, should be {'site':"name_of_df_col_with_sitenames", 'filepath':"name_of_df_col_with_filepaths_to_data", 'batch':values_to_include}.
values_to_include can be a value (or list of values) in a column called "batch" to specify that that row should be included in the HVSRBatch object or
a dictionary where they keys are column names and the values are the values to look for in each column name for inclusion in HVSRBatch object.
If not specified, defaults to None and uses all rows in dataframe.
df_as_read : {None, pd.DataFrame}
Used in various sprit functions to allow original DataFrame used to create HVSRBatch object to be carried through.
"""
# Just return it as-is if it's already Batch object
if isinstance(batch_input, HVSRBatch):
return batch_input
self._batch_input = batch_input
self.batch_input = self._batch_input
self._batch_dict = self.batch_dict = {}
self._input_df = df_as_read
self.input_df = self._input_df
self.batch = True
if isinstance(batch_input, (list, tuple,)):
# This is for a list/tuple with the following structure:
# batch_input = [HVSRData, HVSRData, HVSRData]
# or batch_input = ['/file/path1.hvsr', '/file/path2.hvsr']
# Can also be mixed: [HVSRData, '/file/path3/.hvsr']
siteNo = 0
zfilldigs = len(str(len(batch_input)))
for hvdata in batch_input:
if isinstance(hvdata, (dict, HVSRData)):
if hasattr(hvdata, 'site'):
sitename = hvdata.site
elif hasattr(hvdata, 'Table_Report') and 'Site Name' in hvdata.Table_Report.columns:
sitename = hvdata.Table_Report['Site Name'][0]
else:
sitename = f"HVSRSite{str(siteNo).zfill(zfilldigs)}"
siteNo += 1
self.batch_dict[sitename] = hvdata
elif pathlib.Path(hvdata).exists():
def _get_sitename(proposed_sitename, batch_dict):
# Get unique site name based on stem
j = 0
if proposed_sitename in batch_dict.keys():
# 100 is limit
for index in range(100):
if len(proposed_sitename.split('_')) <= index:
if proposed_sitename.split('_')[-1].isdigit():
j = int(proposed_sitename.split('_')[-1]) + 1
sitenameList = proposed_sitename.split('_')
sitenameList[-1] = str(j)
proposed_sitename = '_'.join(sitenameList)
break
else:
proposed_sitename = proposed_sitename+'_'+str(j)
break
j += 1
else:
proposed_sitename = '_'.join(proposed_sitename.split('_')[:index+1])
return proposed_sitename
if 'hvsr' in pathlib.Path(hvdata).suffix:
sitename = pathlib.path(hvdata).stem
sitename = _get_sitename(sitename, batch_dict)
self.batch_dict[sitename] = hvdata
elif pathlib.Path(hvdata).suffix.upper()[1:] in OBSPY_FORMATS:
if verbose:
print(f"Site specified for inclusion in HVSRBatch has not been processed. Processing. ({hvdata})")
sitename = pathlib.Path(hvdata).stem
sitename = _get_sitename(sitename, batch_dict)
self.batch_dict[sitename] = run(pathlib.Path(hvdata).as_posix())
else:
print(f"Could not parse Batch input. Excluding from HVSRBatch object: {hvdata}")
elif isinstance(batch_input, dict):
# This is for a dictionary with the following structure:
# batch_input = {"SiteName1":HVSRData, "Sitename2":HVSRData}
self.batch_dict = batch_input
elif isinstance(batch_input, HVSRData):
# If iniitializing HVSRBatch with single HVSRData
self.batch_dict[batch_input['site']] = batch_input
elif pathlib.Path(batch_input).exists():
# This is intended for filepaths
if pathlib.Path(batch_input).is_dir():
if batch_ext is not None:
batchfileglob = pathlib.Path(batch_input).glob("*."+batch_ext)
batchfiledict = {}
#if 'hvsr' in batch_ext:
for hvfile in batchfileglob:
currhvfile = import_data(hvfile)
batchfiledict[currhvfile['site']] = currhvfile
self.batch_dict = self._batch_dict = batchfiledict
else:
# Assume it is .hvsr file you wish to import
batchfileglob = []
batchfiledict = {}
batchfileglob = pathlib.Path(batch_input).glob("*")
for hvfile in batchfileglob:
if hvfile.as_posix().lower().endswith('hvsr'):
currhvfile = import_data(hvfile.as_posix())
batchfiledict[currhvfile['site']] = currhvfile
self.batch_dict = self._batch_dict = batchfiledict
else:
if '.hvsr' in pathlib.Path(batch_input).suffix:
# In this case, assume this is alreayd a batch file and import/return it
return import_data(batch_input)
else:
# For reading in a csv and specifying column map
batch_df = pd.read_csv(batch_input)
# Convert columns to lowercase
batch_df.columns = [c.lower() for c in batch_df.columns]
# This is for if dictionary mapping is not specified
snList = ['site', 'sitename', 'sites', 'sitenames',
'identifier', 'batch', 'profile', 'crosssection', 'group']
pathList = ['hvsr_export_path', 'import_filepath', 'batch_input', 'filepath', 'input_data',
'path', 'filepath', 'filename', 'file', 'hvsrdata', 'hvsr', 'data']
siteCol = batch_df.columns[0]
for sn in snList:
if sn in snList:
siteCol = sn
break
pathCol = batch_df.columns[1]
for pa in pathList:
if pa in pathList:
pathCol = pa
break
def _read_data_into_batch(batch_df_row, site_col, path_col):
if '.hvsr' in str(batch_df_row[path_col]):
dataObj = import_data(str(batch_df_row[path_col]))
elif pathlib.Path(batch_df_row[path_col]).suffix.upper()[1:] in OBSPY_FORMATS:
dataObj = run(pathlib.Path(batch_df_row[path_col]).as_posix())
else:
warnings.Warn(f"Batch input specified as site {batch_df_row[site_col]} cannot be read, skipping: {batch_df_row[path_col]}")
dataObj = None
return dataObj
if isinstance(batch_use, dict):
# Dictionary of {'site':"site_col", 'filepath':'path_col', 'batch':values_in_batch_col_to_include}
if len(list(batch_use.keys())) != 3:
warnMsg = f"batch_use dict should have three keys called 'site', 'filepath', and 'batch' (not {len(list(batch_use.keys()))}: {list(batch_use.keys())}). \n\t'batch' may be changed to name of column you are using to specify inclusion in HVSRBatch object, or input DataFrame should have column called 'batch'"
warnings.Warn(warnMsg)
# Should be site and filepath, but just in case
for k in batch_use.keys():
if str(k).lower() in snList:
siteCol = batch_use[k]
siteKey = k
if str(k).lower() in pathList:
pathCol = batch_use[k]
pathKey = k
if str(k).lower() not in snList and str(k).lower() not in pathList:
includeMe = batch_use[k]
batchKey = k
# Get subset df with only rows that we want
#includeMe = batchCol#batch_use[batchCol]
if isinstance(includeMe, (list, tuple)):
sites_df = batch_df[batch_df[batchKey].isin(includeMe)]
elif isinstance(includeMe, dict):
sitesDFList = []
for batchCol, includeValue in includeMe.items():
sitesDFList.append(batch_df[batch_df[batchCol]==includeValue])
sites_df = pd.concat(sitesDFList, ignore_index=True)
else:
sites_df = batch_df[batch_df[batchKey]==includeMe]
# Import, process, or otherwise read data into batch object
for i, row in sites_df.iterrows():
dataObj = _read_data_into_batch(row, siteCol, pathCol)
if dataObj is not None:
self.batch_dict[str(row[siteCol])] = dataObj
elif isinstance(batch_use, (list, tuple)):
# This should be list/tuples of site names
sites_df = batch_df[batch_df[siteCol].isin(batch_use)]
for i, row in sites_df.iterrows():
dataObj = _read_data_into_batch(row, siteCol, pathCol)
if dataObj is not None:
self.batch_dict[str(row[siteCol])] = dataObj
else:
# Use all rows (as possible)
print(f"**NOTE**: All data specified will be read into batch object, from: {batch_input}")
for i, row in batch_df.iterrows():
dataObj = _read_data_into_batch(row, siteCol, pathCol)
if dataObj is not None:
self.batch_dict[str(row[siteCol])] = dataObj
else:
raise TypeError(f"The batch_input parameter of the HVSRBatch class must be a dict of parameters, list or tuple of HVSRData obejcts, or an HVSRData object itself. {type(batch_input)}")
self._batch_dict = self.batch_dict
for sitename, hvsrdata in self.batch_dict.items():
setattr(self, sitename, hvsrdata)
self[sitename]['batch'] = True
self.sites = list(self.batch_dict.keys())
# METHODS
def __to_json(self, filepath):
"""Not yet implemented, but may allow import/export to json files in the future, rather than just .hvsr pickles
Parameters
----------
filepath : filepath object
Location to save HVSRBatch object as json
"""
# open the file with the given filepath
with open(filepath, 'w') as f:
# dump the JSON string to the file
json.dump(self, f, default=lambda o: o.__dict__, sort_keys=True, indent=4)
[docs]
def add(self, hvsr_data):
"""Function to add HVSRData objects to existing HVSRBatch objects"""
if isinstance(hvsr_data, (dict, HVSRData)):
hvsr_data = [hvsr_data]
if isinstance(hvsr_data, (list, tuple,)):
siteNo = 0
zfilldigs = len(str(len(hvsr_data)))
for hvdata in hvsr_data:
sitename = f"HVSRSite{str(siteNo).zfill(zfilldigs)}"
if hasattr(hvdata, 'site'):
sitename = hvdata.site
elif hasattr(hvdata, 'Table_Report') and 'Site Name' in hvdata.Table_Report.columns:
sitename = hvdata.Table_Report['Site Name'][0]
elif isinstance(hvdata, dict):
if 'site' in hvdata.keys():
sitename = hvdata['site']
self[sitename] = hvsr_data
[docs]
def append(self, hvsr_data):
"""Alias of add()"""
add(self, hvsr_data)
[docs]
def export(self, hvsr_export_path=True, ext='hvsr'):
"""Method to export HVSRData objects in HVSRBatch container to indivdual .hvsr pickle files.
Parameters
----------
hvsr_export_path : filepath, default=True
Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes). By default True. If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True
ext : str, optional
The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension.
"""
export_hvsr(hvsr_data=self, hvsr_export_path=hvsr_export_path, ext=ext)
[docs]
def keys(self):
"""Method to return the "keys" of the HVSRBatch object. For HVSRBatch objects, these are the site names. Functions similar to dict.keys().
Returns
-------
dict_keys
A dict_keys object listing the site names of each of the HVSRData objects contained in the HVSRBatch object
"""
return self.batch_dict.keys()
[docs]
def items(self):
"""Method to return both the site names and the HVSRData object as a set of dict_items tuples. Functions similar to dict.items().
Returns
-------
_type_
_description_
"""
return self.batch_dict.items()
[docs]
def copy(self, type='shallow'):
"""Make a copy of the HVSRBatch object. Uses python copy module.
Parameters
----------
type : str {'shallow', 'deep'}
Based on input, creates either a shallow or deep copy of the HVSRBatch object. Shallow is equivalent of copy.copy(). Input of 'deep' is equivalent of copy.deepcopy() (still experimental). Defaults to shallow.
"""
if type.lower()=='deep':
return HVSRBatch(copy.deepcopy(self._batch_dict), df_as_read=self._input_df)
else:
return HVSRBatch(copy.copy(self._batch_dict), df_as_read=self._input_df)
#Method wrapper of sprit.plot_hvsr function
[docs]
def plot(self, **kwargs):
"""Method to plot data, based on the sprit.plot_hvsr() function.
All the same kwargs and default values apply as plot_hvsr().
For return_fig, returns it to the 'Plot_Report' attribute of each HVSRData object
Returns
-------
_type_
_description_
See Also
--------
plot_hvsr
"""
for sitename in self:
if 'return_fig' in kwargs.keys() and kwargs['return_fig']:
self[sitename]['Plot_Report'] = plot_hvsr(self[sitename], **kwargs)
else:
plot_hvsr(self[sitename], **kwargs)
return self
[docs]
def get_report(self, **kwargs):
"""Method to get report from processed data, in print, graphical, or tabular format.
Returns
-------
Variable
May return nothing, pandas.Dataframe, or pyplot Figure, depending on input.
See Also
--------
get_report
"""
if 'report_formats' in kwargs.keys():
if 'table' == kwargs['report_formats']:
for sitename in self:
rowList = []
rowList.append(get_report(self[sitename], **kwargs))
return pd.concat(rowList, ignore_index=True)
elif 'plot' == kwargs['report_formats']:
plotDict = {}
for sitename in self:
if 'return_fig' in kwargs.keys() and kwargs['return_fig']:
plotDict[sitename] = get_report(self[sitename], **kwargs)
else:
get_report(self[sitename], **kwargs)
return plotDict
#Only report_formats left is print, doesn't return anything, so doesn't matter if defalut or not
for sitename in self:
get_report(self[sitename], **kwargs)
return
[docs]
def report(self, **kwargs):
"""Wrapper of get_report()
See Also
--------
get_report
"""
return self.get_report(**kwargs)
[docs]
def export_settings(self, site_name=None, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True):
"""Method to export settings from HVSRData object in HVSRBatch object.
Simply calls sprit.export_settings() from specified HVSRData object in the HVSRBatch object.
See sprit.export_settings() for more details.
Parameters
----------
site_name : str, default=None
The name of the site whose settings should be exported. If None, will default to the first site, by default None.
export_settings_path : str, optional
Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default'
export_settings_type : str, {'all', 'instrument', 'processing'}, optional
They type of settings to save, by default 'all'
include_location : bool, optional
Whether to include the location information in the instrument settings, if that settings type is selected, by default False
verbose : bool, optional
Whether to print output (filepath and settings) to terminal, by default True
See Also
--------
export_settings
"""
#If no site name selected, use first site
if site_name is None:
site_name = self.sites[0]
export_settings(hvsr_data=self[site_name],
export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose)
def __iter__(self):
return iter(self._batch_dict.keys())
def __setitem__(self, key, value):
setattr(self, key, value)
def __getitem__(self, key):
return getattr(self, key)
# Class for HVSR site data
[docs]
class HVSRData:
"""HVSRData is the basic data class of the sprit package.
It contains all the processed data, input parameters, and reports.
These attributes and objects can be accessed using square brackets or the dot accessor. For example, to access the site name, HVSRData['site'] and HVSRData.site will both return the site name.
Some of the methods that work on the HVSRData object (e.g., .plot() and .get_report()) are essentially wrappers for some of the main sprit package functions (sprit.plot_hvsr() and sprit.get_report(), respectively)
"""
@check_instance
def __init__(self, params):
self.params = params
self.batch = False
#self.tsteps_used = []
for key, value in params.items():
setattr(self, key, value)
if key == 'input_params':
for k, v in params[key].items():
setattr(self, k, v)
self.processing_status = {'input_params_status': None,
'fetch_data_status': None,
'calculate_azimuths_status': None,
'remove_noise_status': None,
'generate_psds_status': None,
'process_hvsr_status': None,
'remove_outlier_curves_status': None,
'overall_status': False}
def __setitem__(self, key, value):
setattr(self, key, value)
def __getitem__(self, key):
return getattr(self, key)
def __str__(self):
attrsToUse = ['project', 'site',
'instrument', 'network', 'station', 'location', 'channels',
'acq_date', 'starttime', 'endtime',
'xcoord', 'ycoord', 'input_crs', 'elevation', 'elev_unit',
]
if not all([atu in self.keys() for atu in attrsToUse]):
return 'String representation cannot be generated. Object not instatianted correctly using sprit.input_params()'
def __get_ip_default(parameter):
if parameter in inspect.signature(input_params).parameters:
return inspect.signature(input_params).parameters[parameter].default
elif parameter in params:
return params[parameter]
else:
return parameter
# Get title lines formatted
if self.project == __get_ip_default('project'):
projStr = 'No project specified'
else:
projStr = self.project
hvsrIDStr = ''
if hasattr(self, 'hvsr_id'):
hvsrIDStr = self.hvsr_id
elif 'hvsr_id' in params:
hvsrIDStr = params['hvsr_id']
titleInfoStr =f"\nSpRIT HVSR DATA INFORMATION\n"
titleLen = len(titleInfoStr)
bigLineBreak = "—"*titleLen+ '\n'
titleInfoStr += bigLineBreak
titleInfoStr += f"Site Name: {self.site}\nProject: ({projStr})\n"
titleInfoStr = f"{titleInfoStr}HVSRID (autogenerated): {hvsrIDStr}\n"
titleInfoStr += bigLineBreak
# Acquisition instrument information
instInfoStr = "\n\nINSTRUMENT INFO\n"
instInfoStr += '-'*(len(instInfoStr)-3) + '\n'
instStr = f"Instrument in use: {self.instrument}"
if self.instrument == __get_ip_default('instrument'):
instStr = 'No instrument type specified'
netStr = self.network
staStr = self.station
locStr = self.location
chaStr = self.channels
if chaStr == __get_ip_default('channels'):
chaStr = f'No channels specified (using {chaStr})'
acqInstStr = instInfoStr
acqInstStr += f"{instStr}"
acqInstStr += f"\n\tInstrument ID: {netStr}.{staStr}.{locStr}"
acqInstStr += f"\n\t\tChannels: {chaStr}"
# Acquisition site information
xcoordINStr = self.xcoord_input
xcoordStr = self.xcoord
lonStr = self.longitude
ycoordINstr = self.ycoord_input
ycoordStr = self.ycoord
latStr = self.latitude
inCRSStr = self.input_crs
outCRSStr = self.output_crs
inputLocStr = f"{xcoordINStr}, {ycoordINstr} (as input in {inCRSStr})\n"
transLocStr = ''
if inCRSStr != outCRSStr:
transLocStr = f"{xcoordStr}, {ycoordstr} (transformed to output_crs: {outCRSStr})\n"
wgs84Str = f"{lonStr:.5f}°, {latStr:.5f}° | Lon/Lat in WGS84 (EPSG:4326)"
siteLocInfoStr = "\n\nSITE INFO\n"
siteLocInfoStr += '-'*(len(siteLocInfoStr)-3) + '\n'
siteLocInfoStr += inputLocStr + transLocStr + wgs84Str
# Acquistion time information
acqTimeStr = "\n\nACQUISITION TIME\n"
acqTimeStr += '-'*(len(acqTimeStr)-3) + '\n'
aDateStr = self.acq_date
sTimeStr = self.starttime
eTimeStr = self.endtime
if hasattr(self, 'stream'):
dataST = self.stream
utcSTime = dataST[0].stats.starttime
utcETime = dataST[0].stats.endtime
else:
utcSTime = self.starttime
utcETime = self.endtime
minDur = int(str((utcETime - utcSTime)//60).split('.')[0])
secDur = float(round((((utcETime - utcSTime) / 60) - int(minDur)) * 60, 3))
if secDur >= 60:
minDur += int(secDur//60)
secDur = secDur - (secDur//60)*60
acqDurStr = f'Record duration: {minDur}:{secDur:06.3f} ({utcETime-utcSTime} seconds)'
if aDateStr == __get_ip_default('acq_date') and sTimeStr == __get_ip_default('starttime'):
acqTimeStr += 'No acquisition time specified.\n'
else:
acqTimeStr += f"Acquisition Date: {aDateStr}\n"
acqTimeStr += f"\tStarted at: {sTimeStr}\n"
acqTimeStr += f"\tEnded at : {eTimeStr}\n"
acqTimeStr += acqDurStr
# PEAK INFORMATION (IF CALCULATED)
peakInfoStr = ''
azimuth='HV'
if 'BestPeak' in self.keys():
curvTestsPassed = (self['BestPeak'][azimuth]['PassList']['WinLen'] +
self['BestPeak'][azimuth]['PassList']['SigCycles']+
self['BestPeak'][azimuth]['PassList']['LowCurveStD'])
curvePass = curvTestsPassed > 2
#Peak Pass?
peakTestsPassed = ( self['BestPeak'][azimuth]['PassList']['ProminenceLow'] +
self['BestPeak'][azimuth]['PassList']['ProminenceHi']+
self['BestPeak'][azimuth]['PassList']['AmpClarity']+
self['BestPeak'][azimuth]['PassList']['FreqStability']+
self['BestPeak'][azimuth]['PassList']['LowStDev_Freq']+
self['BestPeak'][azimuth]['PassList']['LowStDev_Amp'])
peakPass = peakTestsPassed >= 5
peakInfoStr = "\nCALCULATED F₀\n"
peakInfoStr += "-"*(len(peakInfoStr) - 3) + '\n'
peakInfoStr += '{0:.3f} Hz ± {1:.4f} Hz'.format(self['BestPeak'][azimuth]['f0'], float(self["BestPeak"][azimuth]['Sf']))
if curvePass and peakPass:
peakInfoStr += '\n\t {} Peak at {} Hz passed SESAME quality tests! :D'.format(sprit_utils._check_mark(), round(self['BestPeak'][azimuth]['f0'],3))
else:
peakInfoStr += '\n\t {} Peak at {} Hz did NOT pass SESAME quality tests :('.format(sprit_utils._x_mark(), round(self['BestPeak'][azimuth]['f0'],3))
else:
peakInfoStr = 'F₀ not Calculated'
printList = [
titleInfoStr,
peakInfoStr,
acqInstStr,
siteLocInfoStr,
acqTimeStr
]
strRep = ''
for ps in printList:
strRep += ps
return strRep
#try:
# Check if running in IPython environment
# from IPython.display import display, HTML
# return f"<b>Person Information:</b><br>Name: {self.name}<br>Age: {self.age}"
#except ImportError:
# Fallback for terminal/console
# return f"Person Information:\nName: {self.name}\nAge: {self.age}"
def __repr__(self):
return self.__str__()
# METHODS (many reflect dictionary methods)
[docs]
def to_json(self, json_filepath=None, export_json=True, return_json=False, **kwargs):
"""Not yet supported, will export HVSRData object to json"""
class_keys_to_convert = (datetime.date, obspy.UTCDateTime,
datetime.time, CRS, obspy.Inventory)
def iterative_json_parser(input_attrib=self, level=0):
outValue = input_attrib
if isinstance(input_attrib, dict): # simplified condition for demo
# if isinstance(input_attrib, (dict, sprit.HVSRData)): # use this line instead
outValue = {}
level += 1
for i, (key, value) in enumerate(input_attrib.items()):
outKey = key
print(level, "".join([' ']*level), outKey)
if not isinstance(outKey, (str, int, float, bool, type(None))):
outKey = str(outKey)
# Recursively process the value
processed_value = iterative_json_parser(value, level)
# Apply string conversion if needed
if isinstance(processed_value, class_keys_to_convert):
processed_value = str(processed_value)
outValue[outKey] = processed_value
return outValue
elif isinstance(input_attrib, list):
outValue = []
for item in input_attrib:
if isinstance(item, np.ndarray):
outValue.append(item.tolist())
else:
# Recursively process list items
outValue.append(iterative_json_parser(item, level))
return outValue
elif isinstance(input_attrib, np.ndarray):
outValue = input_attrib.tolist()
return outValue
elif isinstance(input_attrib, pd.DataFrame):
# Convert DataFrame to dict, but then recursively process it
dict_value = input_attrib.to_dict()
return iterative_json_parser(dict_value, level)
elif isinstance(input_attrib, class_keys_to_convert):
return str(input_attrib)
else:
return input_attrib
sKeys = True
if 'sort_keys' in kwargs:
sKeys = kwargs['sort_keys']
del kwargs['sort_keys']
indent = 4
if 'indent' in kwargs:
indent = kwargs['indent']
del kwargs['indent']
if export_json and json_filepath is not None:
with open(json_filepath, 'w') as f:
# dump the JSON string to the file
json.dump(self, fp=f, default=iterative_json_parser,
sort_keys=True, indent=indent, **kwargs)
if return_json or json_filepath is None:
return json.dumps(self, default=iterative_json_parser,
sort_keys=True, indent=indent, **kwargs)
[docs]
def export(self, **kwargs):
"""Method to export HVSRData objects to .hvsr pickle files.
Parameters
----------
hvsr_export_path : filepath, default=True
Filepath to save file. Can be either directory (which will assign a filename based on the HVSRData attributes).
By default True.
If True, it will first try to save each file to the same directory as input_data, then if that does not work, to the current working directory, then to the user's home directory, by default True
ext : str, optional
The extension to use for the output, by default 'hvsr'. This is still a pickle file that can be read with pickle.load(), but will have .hvsr extension.
See Also
--------
export_hvsr
"""
if 'hvsr_data' in kwargs:
del kwargs['hvsr_data']
export_hvsr(hvsr_data=self, **kwargs)
[docs]
def copy(self, copy_type='shallow'):
"""Make a copy of the HVSRData object. Uses python copy module.
Parameters
----------
copy_type : str {'shallow', 'deep'}
Based on input, creates either a shallow or deep copy of the HVSRData object.
Shallow is equivalent of copy.copy().
Input of copy_type='deep' is equivalent of copy.deepcopy() (still experimental).
Defaults to shallow.
"""
if copy_type.lower() == 'deep':
return copy.deepcopy(self)
else:
return HVSRData(copy.copy(self.params))
[docs]
def export_settings(self, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True):
"""Method to export settings from HVSRData object. Simply calls sprit.export_settings() from the HVSRData object. See sprit.export_settings() for more details.
Parameters
----------
export_settings_path : str, optional
Filepath to output file. If left as 'default', will save as the default value in the resources directory. If that is not possible, will save to home directory, by default 'default'
export_settings_type : str, {'all', 'instrument', 'processing'}, optional
They type of settings to save, by default 'all'
include_location : bool, optional
Whether to include the location information in the instrument settings, if that settings type is selected, by default False
verbose : bool, optional
Whether to print output (filepath and settings) to terminal, by default True
"""
export_settings(hvsr_data=self,
export_settings_path=export_settings_path, export_settings_type=export_settings_type, include_location=include_location, verbose=verbose)
[docs]
def get_report(self, **kwargs):
"""Method to get report from processed data, in print, graphical, or tabular format.
Returns
-------
Variable
May return nothing, pandas.Dataframe, or pyplot Figure, depending on input.
See Also
--------
get_report
"""
report_return = get_report(hvsr_results=self, **kwargs)
return report_return
[docs]
def items(self):
"""Method to return the "items" of the HVSRData object. For HVSRData objects, this is a dict_items object with the keys and values in tuples. Functions similar to dict.items().
Returns
-------
dict_items
A dict_items object of the HVSRData objects attributes, parameters, etc.
"""
return self.params.items()
[docs]
def keys(self):
"""Method to return the "keys" of the HVSRData object. For HVSRData objects, these are the attributes and parameters of the object. Functions similar to dict.keys().
Returns
-------
dict_keys
A dict_keys object of the HVSRData objects attributes, parameters, etc.
"""
keyList = []
for k in dir(self):
if not k.startswith('_'):
keyList.append(k)
return keyList
[docs]
def plot(self, **kwargs):
"""Method to plot data, wrapper of sprit.plot_hvsr()
Returns
-------
matplotlib.Figure, matplotlib.Axis (if return_fig=True)
See Also
--------
plot_hvsr
plot_azimuth
"""
if 'close_figs' not in kwargs.keys():
kwargs['close_figs']=True
plot_return = plot_hvsr(self, **kwargs)
plt.show()
return plot_return
[docs]
def report(self, **kwargs):
"""Wrapper of get_report()
See Also
--------
get_report
"""
report_return = get_report(hvsr_results=self, **kwargs)
return report_return
[docs]
def select(self, **kwargs):
"""Wrapper for obspy select method on 'stream' attribute of HVSRData object"""
if hasattr(self, 'stream'):
stream = self['stream'].select(**kwargs)
return stream
else:
warnings.Warn("HVSRData.select() method applied, but 'stream' attribute (obspy.Stream object) not found")
# ATTRIBUTES
@property
def params(self):
"""Dictionary containing the parameters used to process the data
Returns
-------
dict
Dictionary containing the process parameters
"""
return self._params
@params.setter
def params(self, value):
if not (isinstance(value, dict)):
raise ValueError("params must be a dict type, currently passing {} type.".format(type(value)))
self._params = value
# batch
@property
def batch(self):
"""Whether this HVSRData object is part of an HVSRBatch object. This is used throughout the code to help direct the object into the proper processing pipeline.
Returns
-------
bool
True if HVSRData object is part of HVSRBatch object, otherwise, False
"""
return self._batch
@batch.setter
def batch(self, value):
if value == 0:
value = False
elif value == 1:
value = True
else:
value = None
if not isinstance(value, bool):
raise ValueError("batch must be boolean type")
self._batch = value
#PPSD object from obspy (static)
@property
def ppsds_obspy(self):
"""The original ppsd information from the obspy.signal.spectral_estimation.PPSD(), so as to keep original if copy is manipulated/changed."""
return self._ppsds_obspy
@ppsds_obspy.setter
def ppsds_obspy(self, value):
"""Checks whether the ppsd_obspy is of the proper type before saving as attribute"""
if not isinstance(value, obspy.signal.spectral_estimation.PPSD):
if not isinstance(value, dict):
raise ValueError("ppsds_obspy must be obspy.PPSD or dict with osbpy.PPSDs")
else:
for key in value.keys():
if not isinstance(value[key], obspy.signal.spectral_estimation.PPSD):
raise ValueError("ppsds_obspy must be obspy.PPSD or dict with osbpy.PPSDs")
self._ppsds_obspy=value
#PPSD dict, copied from obspy ppsds (dynamic)
@property
def ppsds(self):
"""Dictionary copy of the class object obspy.signal.spectral_estimation.PPSD(). The dictionary copy allows manipulation of the data in PPSD, whereas that data cannot be easily manipulated in the original Obspy object.
Returns
-------
dict
Dictionary copy of the PPSD information from generate_psds()
"""
return self._ppsds
@ppsds.setter
def ppsds(self, value):
if not isinstance(value, dict):
raise ValueError("ppsds dict with infomration from osbpy.PPSD (created by sprit.generate_psds())")
self._ppsds=value
# Test guis
def _gui_test():
import subprocess
print(sprit_tkinter_ui.__file__)
guiFile = sprit_tkinter_ui.__file__
subprocess.call(guiFile, shell=True)
# Launch a gui
[docs]
def gui(kind: str = 'browser'):
"""Function to open a graphical user interface (gui)
Parameters
----------
kind : str, optional
What type of gui to open:
* "browser" or "default" opens browser interface (using streamlit)
* "widget" opens jupyter widget (using ipywidgets)
* "window" opens windowed gui (using tkinter)
"""
browserList = ['browser', 'streamlit', 'default', 'd', 'b', 's']
windowList = ['windowed', 'window', 'tkinter', 'tk', 't', 'win']
widgetList = ['widget', 'jupyter', 'notebook', 'nb']
liteList = ['lite', 'light', 'basic', 'l']
if kind.lower() in browserList:
import subprocess
streamlitPath = pathlib.Path(__file__).parent.joinpath("sprit_streamlit_ui.py")
cmd = ['streamlit', 'run', streamlitPath.as_posix()]
#subprocess.run(cmd)
import sys
from streamlit.web import cli as stcli
import streamlit
import sys
import subprocess
import tempfile
temp_dir = tempfile.TemporaryDirectory()
def run_streamlit_app(path_dir):
temp_dir = tempfile.TemporaryDirectory()
# create a temporary directory
fpathList = ['sprit_hvsr.py', 'sprit_tkinter_ui.py', 'sprit_jupyter_ui.py', 'sprit_utils.py', 'sprit_plot.py', '__init__.py', 'sprit_streamlit_ui.py']
currDir = os.path.dirname(os.path.abspath(__file__))
for fpath in fpathList:
temp_file_path = os.path.join(temp_dir.name, fpath)
with open(pathlib.Path(currDir).joinpath(fpath), 'r') as cf:
scriptText = cf.read()
# write the streamlit app code to a Python script in the temporary directory
with open(temp_file_path, 'w') as f:
f.write(scriptText)
# execute the streamlit app
try:
# execute the streamlit app
subprocess.run(
['streamlit', "run", temp_file_path],
stderr=subprocess.DEVNULL
)
except KeyboardInterrupt:
pass
# clean up the temporary directory when done
temp_dir.cleanup()
#with open(streamlitPath.parent.as_posix(), 'r') as file:
# appText = file.read()
run_streamlit_app(pathlib.Path(__name__).parent)
#streamlit.web.bootstrap.run(streamlitPath.as_posix(), '', [], [])
#process = subprocess.Popen(["streamlit", "run", os.path.join(
# 'application', 'main', 'services', 'streamlit_app.py')])
elif kind.lower() in windowList:
#guiPath = pathlib.Path(os.path.realpath(__file__))
try:
import tkinter as tk
from sprit.sprit_tkinter_ui import SPRIT_App
except:
if sys.platform == 'linux':
raise ImportError('The SpRIT graphical interface uses tkinter, which ships with python but is not pre-installed on linux machines. Use "apt-get install python-tk" or "apt-get install python3-tk" to install tkinter. You may need to use the sudo command at the start of those commands.')
else:
print("Tkinter may not be installed on your system, or is not functioning correctly. Please download and install tkinter, or use another interface.")
def on_gui_closing():
plt.close('all')
gui_root.quit()
gui_root.destroy()
if sys.platform == 'linux':
if not pathlib.Path("/usr/share/doc/python3-tk").exists():
warnings.warn('The SpRIT graphical interface uses tkinter, which ships with python but is not pre-installed on linux machines. Use "apt-get install python-tk" or "apt-get install python3-tk" to install tkinter. You may need to use the sudo command at the start of those commands.')
gui_root = tk.Tk()
try:
try:
icon_path = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources').joinpath("icon").joinpath('sprit_icon_alpha.ico')
gui_root.iconbitmap(icon_path.as_posix())
except:
icon_path = pathlib.Path(str(importlib.resources.files('sprit'))).joinpath('resources').joinpath("icon").joinpath('sprit_icon.png')
gui_root.iconphoto(False, tk.PhotoImage(file=icon_path.as_posix()))
except Exception as e:
print("ICON NOT LOADED, still opening GUI")
gui_root.resizable(True, True)
spritApp = SPRIT_App(master=gui_root) # Open the app with a tk.Tk root
gui_root.protocol("WM_DELETE_WINDOW", on_gui_closing)
gui_root.mainloop() # Run the main loop
elif kind.lower() in widgetList:
try:
sprit_jupyter_UI.create_jupyter_ui()
except Exception as e:
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
print(errMsg)
raise e
elif kind.lower() in liteList:
print("Lite GUI is not currently supported")
# FUNCTIONS AND METHODS
# The run function to rule them all (runs all needed for simply processing HVSR)
[docs]
def run(input_data=None, source='file', azimuth_calculation=False, noise_removal=False, outlier_curves_removal=False, skip_steps=None, verbose=False, **kwargs):
"""The sprit.run() is the main function that allows you to do all your HVSR processing in one simple step (sprit.run() is how you would call it in your code, but it may also be called using sprit.sprit_hvsr.run())
The input_data parameter of sprit.run() is the only required parameter (if nothing entered, it will run sample data). This can be either a single file, a list of files (one for each component, for example), a directory (in which case, all obspy-readable files will be added to an HVSRBatch instance), a Rasp. Shake raw data directory, or sample data.
Notes
-----
The sprit.run() function calls the following functions. This is the recommended order/set of functions to run to process HVSR using SpRIT. See the API documentation for these functions for more information:
- input_params(): The input_data parameter of input_params() is the only required variable, though others may also need to be called for your data to process correctly.
- fetch_data(): the source parameter of fetch_data() is the only explicit variable in the sprit.run() function aside from input_data and verbose. Everything else gets delivered to the correct function via the kwargs dictionary
- remove_noise(): by default, the kind of noise removal is remove_method='auto'. See the remove_noise() documentation for more information. If remove_method is set to anything other than one of the explicit options in remove_noise, noise removal will not be carried out.
- calculate_azimuth(): calculate one or several azimuths. Single azimuth can be a way to combine H components too.
- generate_psds(): generates psds for each component, which will be combined/used later. Any parameter of obspy.signal.spectral_estimation.PPSD() may also be read into this function.
- remove_outlier_curves(): removes any outlier ppsd curves so that the data quality for when curves are combined will be enhanced. See the remove_outlier_curves() documentation for more information.
- process_hvsr(): this is the main function processing the hvsr curve and statistics. See process_hvsr() documentation for more details. The hvsr_band parameter sets the frequency spectrum over which these calculations occur.
- check_peaks(): this is the main function that will find and 'score' peaks to get a best peak. The parameter peak_freq_range can be set to limit the frequencies within which peaks are checked and scored.
- get_report(): this is the main function that will print, plot, and/or save the results of the data. See the get_report() API documentation for more information.
- export_hvsr(): this function exports the final data output as a pickle file (by default, this pickle object has a .hvsr extension). This can be used to read data back into SpRIT without having to reprocess data.
Parameters
----------
input_data : str or filepath object that can be read by obspy
Filepath to data to be processed. This may be a file or directory, depending on what kind of data is being processed (this can be specified with the source parameter).
For sample data, The following can be specified as the input_data parameter:
- Any integer 1-6 (inclusive), or the string (e.g., input_data="1" or input_data=1 will work)
- The word "sample" before any integer (e.g., input_data="sample1")
- The word "sample" will default to "sample1" if source='file'.
- If source='batch', input_data should be input_data='sample' or input_data='batch'. In this case, it will read and process all the sample files using the HVSRBatch class. Set verbose=True to see all the information in the sample batch csv file.
source : str, optional
_description_, by default 'file'
azimuth_calculation : bool, optional
Whether to perform azimuthal analysis, by default False.
noise_removal : bool, default=False
Whether to remove noise (before processing PPSDs)
outlier_curves_removal : bool, default=False
Whether to remove outlier curves from HVSR time windows
skip_steps : list, str, or None
A list of function names to skip (as strings), to manually prevent any function from being performed.
For example, skip_steps=["input_params", "fetch_data"] will prevent sprit.input_params() and sprit.fetch_data() from being called in sprit.run().
show_plot : bool, default=True
Whether to show plots. This does not affect whether the plots are created (and then inserted as an attribute of HVSRData), only whether they are shown.
verbose : bool, optional
_description_, by default False
**kwargs
Keyword arguments for the functions listed above. The keyword arguments are unique, so they will get parsed out and passed into the appropriate function.
Returns
-------
hvsr_results : sprit.HVSRData or sprit.HVSRBatch object
If a single file/data point is being processed, a HVSRData object will be returned. Otherwise, it will be a HVSRBatch object. See their documention for more information.
See Also
--------
input_params
fetch_data
remove_noise
calculate_azimuth
generate_psds
remove_outlier_curves
process_hvsr
check_peaks
get_report
export_hvsr
Raises
------
RuntimeError
If the input parameter may not be read correctly. This is raised if the input_params() function fails. This raises an error since no other data processing or reading steps will be able to carried out correctly.
RuntimeError
If the data is not read/fetched correctly using fetch_data(), an error will be raised. This is raised if the fetch_data() function fails. This raises an error since no other data processing steps will be able to carried out correctly.
RuntimeError
If the data being processed is a single file, an error will be raised if generate_psds() does not work correctly. No errors are raised for remove_noise() errors (since that is an optional step) and the process_hvsr() step (since that is the last processing step) .
"""
if input_data is None or input_data == '':
print("********************* PROCESSING SAMPLE DATA *****************************************")
print("To read in your own data, use sprit.run(input_data='/path/to/your/seismic/data.mseed')")
print("See SpRIT Wiki or API documentation for more information:")
print("\t Wiki: https://github.com/RJbalikian/SPRIT-HVSR/wiki")
print("\t API Documentation: https://sprit.readthedocs.io/en/latest/#")
print("**************************************************************************************")
print()
input_data = 'sample'
orig_args = locals().copy() # Get the initial arguments
global do_run
do_run = True
if verbose:
print('Using sprit.run() with the following parameters:')
print(f'\tinput_data = {input_data}')
print(f'\tazimuth_calculation = {azimuth_calculation}')
print(f'\tnoise_removal = {noise_removal}')
print(f'\toutlier_curves_removal = {outlier_curves_removal}')
print("\tWith the following kwargs: ", end='')
if kwargs is not {}:
print()
for k, v in kwargs.items():
print(f"\t\t{k} = {v}")
else:
print("{None}")
print()
if 'hvsr_band' not in kwargs.keys():
kwargs['hvsr_band'] = inspect.signature(input_params).parameters['hvsr_band'].default
if 'peak_freq_range' not in kwargs.keys():
kwargs['peak_freq_range'] = inspect.signature(input_params).parameters['peak_freq_range'].default
if 'processing_parameters' not in kwargs.keys():
kwargs['processing_parameters'] = {}
# Separate out input_params and fetch_data processes based on whether batch has been specified
batchlist = ['batch', 'bach', 'bath', 'b']
if str(source).lower() in batchlist and str('input_data').lower() not in SAMPLE_LIST:
try:
batch_data_read_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(batch_data_read).parameters.keys())}
hvsrDataIN = batch_data_read(batch_data=input_data, verbose=verbose, **batch_data_read_kwargs)
except Exception as e:
raise RuntimeError(f'Batch data read in was not successful:\n{e}')
else:
# Get the input parameters
params = input_data
try:
input_params_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(input_params).parameters.keys())}
if 'acq_date' not in input_params_kwargs:
input_params_kwargs['acq_date'] = NOWTIME.date()
if 'starttime' not in input_params_kwargs:
input_params_kwargs['starttime'] = NOWTIME.time()
if skip_steps is None or 'input_params' not in skip_steps:
params = input_params(input_data=input_data, verbose=verbose, **input_params_kwargs)
except Exception as e:
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
print(f"ERROR during input_params(): {errMsg}")
# Even if batch, this is reading in data for all sites so we want to raise error, not just warn
raise RuntimeError('Input parameters not read correctly, see sprit.input_params() function and parameters')
# If input_params fails, initialize params as an HVSRDATA
#params = {'processing_status':{'input_params_status':False, 'overall_status':False}}
#params.update(input_params_kwargs)
#params = sprit_utils._make_it_classy(params)
# Fetch Data
hvsrDataIN = params
try:
fetch_data_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(fetch_data).parameters.keys())}
fetch_data_kwargs.update({k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())})
if 'obspy_ppsds' in kwargs:
fetch_data_kwargs['obspy_ppsds'] = kwargs['obspy_ppsds']
else:
fetch_data_kwargs['obspy_ppsds'] = False
if skip_steps is None or 'fetch_data' not in skip_steps:
hvsrDataIN = fetch_data(params=params, source=source, verbose=verbose, **fetch_data_kwargs)
except Exception as e:
# Even if batch, this is reading in data for all sites so we want to raise error, not just warn
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
print(f"ERROR during fetch_data(): {errMsg}")
raise RuntimeError('Data not read correctly, see sprit.fetch_data() function and parameters for more details.')
# BREAK OUT FOR BATCH PROCESSING
run_kwargs_for_df = []
if isinstance(hvsrDataIN, HVSRBatch):
# Create dictionary that will be used to create HVSRBatch object
hvsrBatchDict = {}
# Loop through each site and run sprit.run() for each HVSRData object
for site_name, site_data in hvsrDataIN.items():
run_kwargs = {} #orig_args.copy() # Make a copy so we don't accidentally overwrite
print(f'\n\n**PROCESSING DATA FOR SITE {site_name.upper()}**\n')
run_kwargs['input_data'] = site_data
# Update run kwargs
# First, get processing_parameters per site
for funname, fundict in site_data['processing_parameters'].items():
for funk, funv in fundict.items():
run_kwargs[funk] = funv
# Overwrite per-site processing parameters with params passed to sprit.run() as kwargs
for paramname, paramval in kwargs.items():
if paramname != 'source': # Don't update source for batch data
run_kwargs[paramname] = paramval
dont_update_these_args = ['input_data', 'source', 'kwargs']
# Overwrite per-site processing parameters with sprit.run()
run_args = orig_args.copy()
for k, v in run_args.items():
if k not in dont_update_these_args:
if v != inspect.signature(run).parameters[k].default:
run_kwargs[k] = v
try:
hvsrBatchDict[site_name] = run(**run_kwargs)
run_kwargs_for_df.append(run_kwargs)
except Exception as e:
hvsrBatchDict[site_name] = site_data
hvsrBatchDict[site_name]['Error_Message'] = sprit_utils._get_error_from_exception(e,
print_error_message=False,
return_error_message=True)
if verbose:
sprit_utils._get_error_from_exception(e)
print(f"Error processing site {site_name}. Continuing processing of remaining sites.")
hvsrBatchDict[site_name]['processing_status']['generate_psds_status'] = False
hvsrBatchDict[site_name]['processing_status']['overall_status'] = False
# Create batch object
hvsrBatchData = HVSRBatch(hvsrBatchDict, df_as_read=pd.DataFrame(run_kwargs_for_df))
# Use batch object to get Output Table with all data, including results and inputs
for s, site in enumerate(hvsrBatchData):
if hasattr(hvsrBatchData[site], 'Table_Report'):
if s == 0:
table_reports = hvsrBatchData[site].Table_Report
else:
table_reports = pd.concat([table_reports, hvsrBatchData[site].Table_Report])
else:
if s == 0:
table_reports = pd.DataFrame()
hvsrBatchData['Table_Report'] = pd.merge(left=hvsrBatchData.input_df, right=table_reports,
how='outer',
left_on='site', right_on='Site Name')
return hvsrBatchData
# Calculate azimuths
hvsr_az = hvsrDataIN
azimuth_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(calculate_azimuth).parameters.keys())}
azList = ['azimuth', 'single azimuth', 'single']
azCond1 = 'horizontal_method' in kwargs.keys() and str(kwargs['horizontal_method']) == '8'
azCond2 = 'horizontal_method' in kwargs.keys() and str(kwargs['horizontal_method']).lower() in azList
azCond3 = azimuth_calculation
azCond4 = len(azimuth_kwargs.keys()) > 0
if (azCond1 or azCond2 or azCond3 or azCond4) and (skip_steps is None or 'calculate_azimuth' not in skip_steps):
azimuth_calculation = True
azimuth_kwargs['azimuth_type'] = kwargs['azimuth_type'] = 'single'
if 'azimuth_angle' not in kwargs.keys():
azimuth_kwargs['azimuth_angle'] = kwargs['azimuth_angle'] = 45
kwargs['azimuth'] = "R" # str(kwargs['azimuth_angle']).zfill(3)
if 'horizontal_method' not in kwargs.keys():
kwargs['horizontal_method'] = 'Single Azimuth'
try:
hvsr_az = calculate_azimuth(hvsrDataIN, verbose=verbose, **azimuth_kwargs)
except Exception as e:
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
print(f"Error during calculate_azimuth() for {hvsr_az.site}: \n{errMsg}")
if isinstance(hvsr_az, HVSRBatch):
for site_name in hvsr_az.keys():
hvsr_az[site_name]['processing_status']['calculate_azimuths_status'] = False
else:
hvsr_az['processing_status']['calculate_azimuths_status'] = False
else:
azimuth_calculation = False
# Remove Noise
data_noiseRemoved = hvsr_az
try:
remove_noise_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_noise).parameters.keys())}
if noise_removal or remove_noise_kwargs != {}:
remove_noise_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_noise).parameters.keys())}
try:
if skip_steps is None or 'remove_noise' not in skip_steps:
data_noiseRemoved = remove_noise(hvsr_data=data_noiseRemoved, verbose=verbose, **remove_noise_kwargs)
except Exception as e:
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
print(f"Error with remove_noise for site {data_noiseRemoved.site}: {errMsg}")
# Mark that remove_noise failed
# Reformat data so HVSRData and HVSRBatch data both work here
if isinstance(data_noiseRemoved, HVSRData):
data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved}
data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved}
for site_name in data_noiseRemoved.keys():
data_noiseRemoved[site_name]['processing_status']['remove_noise_status'] = False
# Since noise removal is not required for data processing, check others first
if data_noiseRemoved[site_name]['processing_status']['overall_status']:
data_noiseRemoved[site_name]['processing_status']['overall_status'] = True
else:
data_noiseRemoved[site_name]['processing_status']['overall_status'] = False
# If it wasn't originally HVSRBatch, make it HVSRData object again
if not data_noiseRemoved[site_name]['batch']:
data_noiseRemoved = data_noiseRemoved[site_name]
else:
if isinstance(data_noiseRemoved, HVSRData):
data_noiseRemoved = {data_noiseRemoved.site: data_noiseRemoved}
for site_name in data_noiseRemoved.keys(): # This should work more or less the same for batch and regular data now
data_noiseRemoved[site_name]['stream_edited'] = data_noiseRemoved[site_name]['stream']
data_noiseRemoved[site_name]['processing_status']['remove_noise_status'] = None
# If it wasn't originally HVSRBatch, make it HVSRData object again
#if not data_noiseRemoved[site_name]['batch']:
data_noiseRemoved = data_noiseRemoved[site_name]
except Exception as e:
if (source == 'file' or source == 'raw'):
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
if not ('batch' in data_noiseRemoved.keys() and data_noiseRemoved['batch']):
raise RuntimeError(f"generate_psds() error: {errMsg}")
# Generate PPSDs
psd_data = data_noiseRemoved
try:
generate_psds_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(generate_psds).parameters.keys())}
PPSDkwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(PPSD).parameters.keys())}
generate_psds_kwargs.update(PPSDkwargs)
generate_psds_kwargs['azimuthal_psds'] = azimuth_calculation
if skip_steps is None or ('generate_psds' not in skip_steps and 'generate_ppsds' not in skip_steps):
psd_data = generate_psds(hvsr_data=psd_data, verbose=verbose, **generate_psds_kwargs)
except Exception as e:
if hasattr(e, 'message'):
errMsg = e.message
else:
errMsg = e
if verbose:
print(f"Error during generate_psds() for {site_name}: \n{errMsg}")
if (source == 'file' or source == 'raw'):
raise RuntimeError(f"generate_psds() error: \n{errMsg}")
# Reformat data so HVSRData and HVSRBatch data both work here
if isinstance(psd_data, HVSRData):
psd_data = {psd_data['site']: psd_data}
for site_name in psd_data.keys(): # This should work more or less the same for batch and regular data now
psd_data[site_name]['processing_status']['generate_psds_status'] = False
psd_data[site_name]['processing_status']['overall_status'] = False
#If it wasn't originally HVSRBatch, make it HVSRData object again
if not psd_data[site_name]['batch']:
psd_data = psd_data[site_name]
# Remove Outlier PSD Curves
data_curvesRemoved = psd_data
try:
remove_outlier_curve_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_outlier_curves).parameters.keys())}
if len(remove_outlier_curve_kwargs.keys())==1 and 'plot_engine' in remove_outlier_curve_kwargs.keys():
remove_outlier_curve_kwargs = {}
if 'use_hv_curves' not in remove_outlier_curve_kwargs.keys():
use_hv_curves = False
else:
use_hv_curves = remove_outlier_curve_kwargs['use_hv_curves']
# Check whether it is indicated to remove outlier curves
outlier_curve_keys_used = True
if remove_outlier_curve_kwargs == {} or list(remove_outlier_curve_kwargs.keys()) == ['show_plot']:
outlier_curve_keys_used = False
if (outlier_curves_removal or outlier_curve_keys_used) and not use_hv_curves and (skip_steps is None or 'remove_outlier_curves' not in skip_steps):
remove_outlier_curve_kwargs['remove_outliers_during_plot'] = False
data_curvesRemoved = remove_outlier_curves(hvsr_data=data_curvesRemoved, verbose=verbose,**remove_outlier_curve_kwargs)
except Exception as e:
traceback.print_exception(sys.exc_info()[1])
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
errLineNo = str(traceback.extract_tb(sys.exc_info()[2])[-1].lineno)
error_category = type(e).__name__.title().replace('error', 'Error')
error_message = f"{e} ({errLineNo})"
print(f"{error_category} ({errLineNo}): {error_message}")
print(lineno, filename, f)
# Reformat data so HVSRData and HVSRBatch data both work here
if isinstance(data_curvesRemoved, HVSRData):
data_curvesRemoved_interim = {data_curvesRemoved['site']: data_curvesRemoved}
else:
data_curvesRemoved_interim = data_curvesRemoved
for site_name in data_curvesRemoved_interim.keys(): # This should work more or less the same for batch and regular data now
data_curvesRemoved_interim[site_name]['processing_status']['remove_outlier_curves_status'] = False
#data_curvesRemoved_interim[site_name]['processing_status']['overall_status'] = False
#If it wasn't originally HVSRBatch, make it HVSRData object again
if not data_curvesRemoved_interim[site_name]['batch']:
data_curvesRemoved_interim = data_curvesRemoved_interim[site_name]
data_curvesRemoved = data_curvesRemoved_interim
# Process HVSR Curves
hvsr_results = data_curvesRemoved
try:
process_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(process_hvsr).parameters.keys())}
if azimuth_calculation:
if azimuth_kwargs['azimuth_type'] == 'single':
process_hvsr_kwargs['azimuth'] = azimuth_kwargs['azimuth_angle']
if skip_steps is None or 'process_hvsr' not in skip_steps:
hvsr_results = process_hvsr(hvsr_data=psd_data, verbose=verbose, **process_hvsr_kwargs)
except Exception as e:
sprit_utils._get_error_from_exception(e,
print_error_message=True)
if isinstance(hvsr_results, HVSRData):
hvsr_results = {hvsr_results['site']: hvsr_results}
for site_name in hvsr_results.keys(): # This should work more or less the same for batch and regular data now
hvsr_results[site_name]['processing_status']['process_hvsr_status']=False
hvsr_results[site_name]['processing_status']['overall_status'] = False
# If it wasn't originally HVSRBatch, make it HVSRData object again
if not hvsr_results[site_name]['batch']:
hvsr_results = hvsr_results[site_name]
# Remove outlier HV Curves
try:
remove_outlier_curve_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(remove_outlier_curves).parameters.keys())}
if 'use_hv_curves' not in remove_outlier_curve_kwargs.keys():
use_hv_curves = False
else:
use_hv_curves = remove_outlier_curve_kwargs['use_hv_curves']
# Check whether it is indicated to remove outlier curves
outlier_curve_keys_used = True
if remove_outlier_curve_kwargs == {} or list(remove_outlier_curve_kwargs.keys()) == ['show_plot']:
outlier_curve_keys_used = False
if (outlier_curves_removal or outlier_curve_keys_used) and use_hv_curves and (skip_steps is None or 'remove_outlier_curves' not in skip_steps):
remove_outlier_curve_kwargs['remove_outliers_during_plot'] = False
hvsr_results = remove_outlier_curves(hvsr_data=hvsr_results, verbose=verbose,**remove_outlier_curve_kwargs)
except Exception as e:
traceback.print_exception(sys.exc_info()[1])
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
errLineNo = str(traceback.extract_tb(sys.exc_info()[2])[-1].lineno)
error_category = type(e).__name__.title().replace('error', 'Error')
error_message = f"{e} ({errLineNo})"
print(f"{error_category} ({errLineNo}): {error_message}")
print(lineno, filename, f)
# Reformat data so HVSRData and HVSRBatch data both work here
if isinstance(hvsr_results, HVSRData):
data_curvesRemoved_interim = {hvsr_results['site']: hvsr_results}
else:
data_curvesRemoved_interim = hvsr_results
for site_name in data_curvesRemoved_interim.keys(): # This should work more or less the same for batch and regular data now
data_curvesRemoved_interim[site_name]['processing_status']['remove_outlier_curves_status'] = False
#data_curvesRemoved_interim[site_name]['processing_status']['overall_status'] = False
#If it wasn't originally HVSRBatch, make it HVSRData object again
if not data_curvesRemoved_interim[site_name]['batch']:
data_curvesRemoved_interim = data_curvesRemoved_interim[site_name]
hvsr_results = data_curvesRemoved_interim
# Final post-processing/reporting
# Check peaks
check_peaks_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(check_peaks).parameters.keys())}
if skip_steps is None or 'check_peaks' not in skip_steps:
hvsr_results = check_peaks(hvsr_data=hvsr_results, verbose=verbose, **check_peaks_kwargs)
get_report_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(get_report).parameters.keys())}
# Add 'az' as a default plot if the following conditions
# first check if report_formats is specified, if not, add default value
if 'report_formats' not in get_report_kwargs.keys():
get_report_kwargs['report_formats'] = inspect.signature(get_report).parameters['report_formats'].default
# Now, check if plot is specified, then if plot_type is specified, then add 'az' if stream has azimuths
if 'plot' in get_report_kwargs['report_formats']:
plot_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(plot_hvsr).parameters.keys())}
get_report_kwargs.update(plot_hvsr_kwargs)
usingDefault = True
if 'plot_type' not in get_report_kwargs.keys():
get_report_kwargs['plot_type'] = inspect.signature(get_report).parameters['plot_type'].default
else:
usingDefault = False
# Check if az is already specified as plot output
azList = ['azimuth', 'az', 'a', 'radial', 'r']
az_requested = False
get_report_kwargs['plot_type'] = [item.lower() for item in get_report_kwargs['plot_type'].split(' ')]
for azStr in azList:
if azStr.lower() in get_report_kwargs['plot_type']:
az_requested = True
break
get_report_kwargs['plot_type'] = ' '.join(get_report_kwargs['plot_type'])
if isinstance(hvsr_results, HVSRData):
hvsr_results_interim = {hvsr_results['site']: hvsr_results}
else:
hvsr_results_interim = hvsr_results
for site_name in hvsr_results_interim.keys(): # This should work more or less the same for batch and regular data now
# Check if data has azimuth data
hasAz = False
if 'stream' in hvsr_results_interim[site_name].keys():
for tr in hvsr_results_interim[site_name]['stream']:
if tr.stats.component == 'R':
hasAz = True
break
# Assuming all sites in batch have az if one does
if hasAz:
break
# If it wasn't originally HVSRBatch, make it HVSRData object again
#if not hvsr_results_interim[site_name]['batch']:
# hvsr_results_interim = hvsr_results_interim[site_name]
# Add azimuth as a requested plot if azimuthal data exists but not requested in plot
if not az_requested and hasAz and hvsr_results.horizontal_method != 'Single Azimuth':
get_report_kwargs['plot_type'] = get_report_kwargs['plot_type'] + ' az'
if skip_steps is None or ('get_report' not in skip_steps and 'report' not in skip_steps):
hvsr_results = get_report(hvsr_results=hvsr_results, verbose=verbose, **get_report_kwargs)
if verbose:
if 'report_formats' in get_report_kwargs.keys():
if type(get_report_kwargs['report_formats']) is str:
report_formats = get_report_kwargs['report_formats'].lower()
elif isinstance(get_report_kwargs['report_formats'], (tuple, list)):
for i, rf in enumerate(get_report_kwargs['report_formats']):
get_report_kwargs['report_formats'][i] = rf.lower()
# if report_formats is 'print', we would have already printed it in previous step
if get_report_kwargs['report_formats'] == 'print' or 'print' in get_report_kwargs['report_formats'] or isinstance(hvsr_results, HVSRBatch):
# We do not need to print another report if already printed to terminal
pass
else:
# We will just change the report_formats kwarg to print, since we already got the originally intended report format above,
# now need to print for verbose output
get_report_kwargs['report_formats'] = 'print'
get_report(hvsr_results=hvsr_results, **get_report_kwargs)
if get_report_kwargs['report_formats'] == 'plot' or 'plot' in get_report_kwargs['report_formats']:
# We do not need to plot another report if already plotted
pass
else:
# hvplot_kwargs = {k: v for k, v in kwargs.items() if k in plot_hvsr.__code__.co_varnames}
# hvsr_results['Plot_Report'] = plot_hvsr(hvsr_results, return_fig=True, show_plot=False, close_figs=True)
pass
else:
pass
# Export processed data if hvsr_export_path(as pickle currently, default .hvsr extension)
if 'hvsr_export_path' in kwargs.keys():
if kwargs['hvsr_export_path'] is None:
pass
else:
if 'ext' in kwargs.keys():
ext = kwargs['ext']
else:
ext = 'hvsr'
export_hvsr(hvsr_data=hvsr_results, hvsr_export_path=kwargs['hvsr_export_path'], ext=ext, verbose=verbose)
if 'show_plot' in kwargs:
if not kwargs['show_plot']:
plt.close()
return hvsr_results
# Read data as batch
[docs]
def batch_data_read(batch_data, batch_type='table', param_col=None, batch_params=None, verbose=False, **readcsv_getMeta_fetch_kwargs):
"""Function to read data in data as a batch of multiple data files.
This is best used through sprit.fetch_data(*args, source='batch', **other_kwargs).
Parameters
----------
batch_data : filepath or list
Input data information for how to read in data as batch. Can be filepath or list of filepaths/stream objects.
If filepath, should point to .csv (or similar that can be read by pandas.read_csv()) with batch data information.
batch_type : str, optional
Type of batch read, only 'table' and 'filelist' accepted.
If 'table', will read data from a file read in using pandas.read_csv(), by default 'table'
param_col : None or str, optional
Name of parameter column from batch information file. Only used if a batch_type='table' and single parameter column is used, rather than one column per parameter (for single parameter column, parameters are formatted with = between keys/values and , between item pairs), by default None
batch_params : list, dict, or None, default = None
Parameters to be used if batch_type='filelist'. If it is a list, needs to be the same length as batch_data. If it is a dict, will be applied to all files in batch_data and will combined with extra keyword arguments caught by **readcsv_getMeta_fetch_kwargs.
verbose : bool, optional
Whether to print information to terminal during batch read, by default False
**readcsv_getMeta_fetch_kwargs
Keyword arguments that will be read into pandas.read_csv(), sprit.input_params, sprit.get_metadata(), and/or sprit.fetch_data()
Returns
-------
hvsrBatch
HVSRBatch object with each item representing a different HVSRData object
Raises
------
IndexError
_description_
"""
if verbose:
print(f'Processing batch data from {batch_type}:')
print(f" Batch data source: {batch_data}")
# First figure out which parameters go with which function
input_params_params = inspect.signature(input_params).parameters
get_metadata_params = inspect.signature(get_metadata).parameters
fetch_data_params = inspect.signature(fetch_data).parameters
calculate_azimuth_params = inspect.signature(calculate_azimuth).parameters
remove_noise_params = inspect.signature(remove_noise).parameters
generate_ppsds_params = inspect.signature(generate_psds).parameters
remove_outlier_curves_params = inspect.signature(remove_outlier_curves).parameters
process_hvsr_params = inspect.signature(process_hvsr).parameters
check_peaks_params = inspect.signature(check_peaks).parameters
get_report_params = inspect.signature(get_report).parameters
dict_of_params = {'input_params': input_params_params,
'get_metadata': get_metadata_params,
'fetch_data_params': fetch_data_params,
'calculate_azimuth_params': calculate_azimuth_params,
'remove_noise_params': remove_noise_params,
'generate_ppsds_params': generate_ppsds_params,
'remove_outlier_curves_params': remove_outlier_curves_params,
'process_hvsr_params': process_hvsr_params,
'check_peaks_params': check_peaks_params,
'get_report_params': get_report_params}
def __get_run_functions():
# Get a list of all functions (for which paramters are used) in sprit.run()
run_functions_list = [input_params, fetch_data, batch_data_read,
get_metadata, calculate_azimuth,
remove_noise, generate_psds, remove_outlier_curves,
process_hvsr, check_peaks,
get_report, export_hvsr]
return run_functions_list
SPRIT_RUN_FUNCTIONS = __get_run_functions()
# Get default values of all functions in a dict
default_dict = {}
for i, fun in enumerate(SPRIT_RUN_FUNCTIONS):
for param_name, param_info in inspect.signature(fun).parameters.items():
if param_info.default is not inspect._empty:
default_dict[param_name] = param_info.default
if batch_type == 'sample' or batch_data in sampleFileKeyMap.keys():
sample_data = True
batch_type = 'table'
else:
sample_data = False
# Dictionary to store the stream objects
stream_dict = {}
data_dict = {}
if batch_type == 'table':
# If this is sample data, we need to create absolute paths to the filepaths
if sample_data:
dataReadInfoDF = pd.read_csv(sampleFileKeyMap['sample_batch'])
for index, row in dataReadInfoDF.iterrows():
dataReadInfoDF.loc[index, 'input_data'] = SAMPLE_DATA_DIR.joinpath(row.loc['input_data'])
elif isinstance(batch_data, pd.DataFrame):
dataReadInfoDF = batch_data
elif isinstance(batch_data, dict):
# For params input
dataReadInfoDF = pd.DataFrame.from_dict(batch_data)
pass
else: # Read csv
read_csv_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(pd.read_csv).parameters}
dataReadInfoDF = pd.read_csv(batch_data, **read_csv_kwargs)
if 'input_data' in dataReadInfoDF.columns:
filelist = list(dataReadInfoDF['input_data'])
# Generate site names if they don't exist already
if 'site' not in dataReadInfoDF.columns:
siterows = []
filldigs = len(str(dataReadInfoDF.shape[0])) # Number of digits in df shape
for i, row in dataReadInfoDF.iterrows():
siterows.append(f'HVSRSite_{str(i).zfill(filldigs)}')
dataReadInfoDF['site'] = siterows
# Print information about batch read, as specified
print(f" {dataReadInfoDF.shape[0]} sites found: {list(dataReadInfoDF['site'])}")
if verbose:
maxLength = 25
maxColWidth = 12
if dataReadInfoDF.shape[0] > maxLength:
print(f'\t Showing information for first {maxLength} files only:')
print()
# Print nicely formatted df
# Print column names
print(' ', end='')
for col in dataReadInfoDF.columns:
print(str(col)[:maxColWidth].ljust(maxColWidth), end=' ')
print('\n', end='')
# Print separator
tableLen = (maxColWidth+2)*len(dataReadInfoDF.columns)
for r in range(tableLen):
print('-', end='')
print()
#Print columns/rows
for index, row in dataReadInfoDF.iterrows():
print(' ', end='')
for col in row:
if len(str(col)) > maxColWidth:
print((str(col)[:maxColWidth-3]+'...').ljust(maxColWidth), end=' ')
else:
print(str(col)[:maxColWidth].ljust(maxColWidth), end=' ')
print()
if dataReadInfoDF.shape[0] > maxLength:
endline = f'\t...{dataReadInfoDF.shape[0]-maxLength} more rows in file.\n'
else:
endline = '\n'
print(endline)
print('Fetching the following files:')
# Get processing parameters, either from column param_col or from individual columns
# If param_col, format is string of format: "param_name=param_val, param_name2=param_val2"
param_dict_list = []
verboseStatement = []
if param_col is None: # Not a single parameter column, each col=parameter
for row_ind in range(dataReadInfoDF.shape[0]):
param_dict = {}
verboseStatement.append([])
for col in dataReadInfoDF.columns:
for fun in SPRIT_RUN_FUNCTIONS:
if col in inspect.signature(fun).parameters:
currParam = dataReadInfoDF.loc[row_ind, col]
if pd.isna(currParam) or currParam == 'nan':
if col in default_dict.keys():
param_dict[col] = default_dict[col] # Get default value
if verbose:
if type(default_dict[col]) is str:
verboseStatement[row_ind].append("\t\t'{}' parameter not specified in batch file. Using {}='{}'".format(col, col, default_dict[col]))
else:
verboseStatement[row_ind].append("\t\t'{}' parameter not specified in batch file. Using {}={}".format(col, col, default_dict[col]))
else:
param_dict[col] = None
else:
param_dict[col] = dataReadInfoDF.loc[row_ind, col]
param_dict_list.append(param_dict)
else:
if param_col not in dataReadInfoDF.columns:
raise IndexError('{} is not a column in {} (columns are: {})'.format(param_col, batch_data, dataReadInfoDF.columns))
for row in dataReadInfoDF[param_col]:
param_dict = {}
splitRow = str(row).split(',')
for item in splitRow:
param_dict[item.split('=')[0]] = item.split('=')[1]
param_dict_list.append(param_dict)
elif batch_type == 'filelist':
if not isinstance(batch_data, (list, tuple)):
raise RuntimeError(f"If batch_type is specified as 'filelist' or 'list', batch_data must be list or tuple, not {type(batch_data)}.")
# Update formatting of batch_params for rest of processing
if batch_params is None:
batch_params = [{}] * len(batch_data)
# Get batch_parameters
if isinstance(batch_params, list):
if len(batch_params) != len(batch_data):
raise RuntimeError('If batch_params is list, it must be the same length as batch_data. len(batch_params)={} != len(batch_data)={}'.format(len(batch_params), len(batch_data)))
param_dict_list = batch_params
elif isinstance(batch_params, dict):
batch_params.update(readcsv_getMeta_fetch_kwargs)
param_dict_list = []
for i in range(len(batch_data)):
param_dict_list.append(batch_params)
# Read and process each MiniSEED file
for i, file in enumerate(batch_data):
param_dict_list[i]['input_data'] = file
# Get a uniformly formatted input DataFrame
input_df_uniformatted = pd.DataFrame(param_dict_list)
# Do batch fun of input_params() and fetch_data() (these are skipped in run() if batch mode is used)
hvsr_batchDict = {}
zfillDigs = len(str(len(param_dict_list))) # Get number of digits of length of param_dict_list
i = 0
for i, param_dict in enumerate(param_dict_list):
# Read the data file into a Stream object
input_params_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(input_params).parameters}
input_params_kwargs2 = {k: v for k, v in param_dict.items() if k in inspect.signature(input_params).parameters}
input_params_kwargs.update(input_params_kwargs2)
# Run input_params()
try:
ipverboseString = '\tinput_params: <No parameters specified>, '
for arg, value in input_params_kwargs.items():
ipverboseString = ipverboseString.replace('<No parameters specified>, ', '')
ipverboseString += f"{arg}={value}, "
ipverboseString = ipverboseString[:-2]
ipverboseString = (ipverboseString[:96] + '...') if len(ipverboseString) > 99 else ipverboseString
params = input_params(**input_params_kwargs)
except Exception as e:
params = input_params_kwargs
params['processing_status'] = {}
params['processing_status']['input_params_status'] = False
params['processing_status']['overall_status'] = False
verboseStatement.append(f"\t{e}")
# Run fetch_data()
fetch_data_kwargs = {k: v for k, v in locals()['readcsv_getMeta_fetch_kwargs'].items() if k in inspect.signature(fetch_data).parameters}
fetch_data_kwargs2 = {k: v for k, v in param_dict.items() if k in inspect.signature(fetch_data).parameters}
fetch_data_kwargs.update(fetch_data_kwargs2)
try:
fdverboseString = '\tfetch_data: <No parameters specified>, '
for arg, value in fetch_data_kwargs.items():
fdverboseString = fdverboseString.replace('<No parameters specified>, ', '')
fdverboseString += f"{arg}={value}, "
fdverboseString = fdverboseString[:-2]
fdverboseString = (fdverboseString[:96] + '...') if len(fdverboseString) > 99 else fdverboseString
hvsrData = fetch_data(params=params, **fetch_data_kwargs)
except Exception as e:
hvsrData = params
hvsrData['processing_status']['fetch_data_status'] = False
hvsrData['processing_status']['overall_status'] = False
verboseStatement.append(f"\t{e}")
if verbose and hvsrData['processing_status']['overall_status']:
print(f" {hvsrData['site']}")
print(ipverboseString)
print(fdverboseString)
if verboseStatement != []:
for item in verboseStatement[i]:
print(item)
elif verbose and not hvsrData['processing_status']['overall_status']:
if 'site' in param_dict.keys():
sitename = param_dict['site']
else:
sitename = 'UNSPECIFIED_SITE'
print(f" {sitename}")
print(ipverboseString)
print(fdverboseString)
if verboseStatement != []:
for item in verboseStatement[i]:
print(item)
print(f" *{sitename} not read correctly. Processing will not be carried out.")
hvsrData['batch'] = True
# This may be redundant
if hvsrData['site'] == default_dict['site']: # If site was not designated
hvsrData['site'] = "{}_{}".format(hvsrData['site'], str(i).zfill(zfillDigs))
i += 1
# Get processing parameters for other functions in sprit.run() besides input_params and fetch_data
if 'processing_parameters' in hvsrData.keys():
processing_parameters = hvsrData['processing_parameters'].copy()
else:
processing_parameters = {} # "input_params": input_params_kwargs, "fetch_data": fetch_data_kwargs}
for fun in SPRIT_RUN_FUNCTIONS:
specified_params = {k: v for k, v in param_dict.items() if k in inspect.signature(fun).parameters}
processing_parameters[fun.__name__] = specified_params
# Assume source is 'file' if not specified
hvsrData['processing_parameters'] = processing_parameters
if 'source' not in hvsrData['processing_parameters']['fetch_data'].keys():
hvsrData['processing_parameters']['fetch_data']['source'] = 'file'
hvsr_batchDict[hvsrData['site']] = hvsrData
hvsrBatch = HVSRBatch(hvsr_batchDict, df_as_read=input_df_uniformatted)
print()
print('Finished reading input data in preparation for batch processing')
return hvsrBatch
# Function to generate azimuthal readings from the horizontal components
[docs]
def calculate_azimuth(hvsr_data, azimuth_angle=45, azimuth_type='multiple', azimuth_unit='degrees',
show_az_plot=False, verbose=False, **plot_azimuth_kwargs):
"""Function to calculate azimuthal horizontal component at specified angle(s).
Adds each new horizontal component as a radial component to obspy.Stream object at hvsr_data['stream']
Parameters
----------
hvsr_data : HVSRData
Input HVSR data
azimuth_angle : int, default=10
If `azimuth_type='multiple'`, this is the angular step (in unit `azimuth_unit`) of each of the azimuthal measurements.
If `azimuth_type='single'` this is the angle (in unit `azimuth_unit`) of the single calculated azimuthal measruement. By default 10.
azimuth_type : str, default='multiple'
What type of azimuthal measurement to make, by default 'multiple'.
If 'multiple' (or {'multi', 'mult', 'm'}), will take a measurement at each angular step of azimuth_angle of unit azimuth_unit.
If 'single' (or {'sing', 's'}), will take a single azimuthal measurement at angle specified in azimuth_angle.
azimuth_unit : str, default='degrees'
Angular unit used to specify `azimuth_angle` parameter. By default 'degrees'.
If 'degrees' (or {'deg', 'd'}), will use degrees.
If 'radians' (or {'rad', 'r'}), will use radians.
show_az_plot : bool, default=False
Whether to show azimuthal plot, by default False.
verbose : bool, default=False
Whether to print terminal output, by default False
Returns
-------
HVSRData
Updated HVSRData object specified in hvsr_data with hvsr_data['stream'] attribute containing additional components (EHR-***),
with *** being zero-padded (3 digits) azimuth angle in degrees.
"""
# Get intput paramaters
orig_args = locals().copy()
start_time = datetime.datetime.now()
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'calculate_azimuth' in hvsr_data['processing_parameters'].keys():
update_msg = []
for k, v in hvsr_data['processing_parameters']['calculate_azimuth'].items():
defaultVDict = dict(zip(inspect.getfullargspec(calculate_azimuth).args[1:],
inspect.getfullargspec(calculate_azimuth).defaults))
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
azimuth_angle = orig_args['azimuth_angle']
azimuth_unit = orig_args['azimuth_unit']
show_az_plot = orig_args['show_az_plot']
verbose = orig_args['verbose']
if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']):
if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']:
pass
else:
print('\nGenerating azimuthal data (calculate_azimuth())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key == 'hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
if 'processing_parameters' in hvsr_data.keys() and 'calculate_azimuth' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
print()
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
if isinstance(hvsr_data, HVSRBatch):
# If running batch, we'll loop through each site
hvsr_out = {}
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_out[site_name] = __azimuth_batch(**args) #Call another function, that lets us run this function again
except Exception as e:
hvsr_out[site_name]['processing_status']['calculate_azimuths_status'] = False
hvsr_out[site_name]['processing_status']['overall_status'] = False
if verbose:
print(e)
else:
hvsr_data[site_name]['processing_status']['calculate_azimuths_status'] = False
hvsr_data[site_name]['processing_status']['overall_status'] = False
hvsr_out = hvsr_data
output = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df)
return output
elif isinstance(hvsr_data, (HVSRData, dict, obspy.Stream)):
degList = ['degrees', 'deg', 'd', '°']
radList = ['radians', 'rad', 'r']
if azimuth_unit.lower() in degList:
az_angle_rad = np.deg2rad(azimuth_angle)
az_angle_deg = azimuth_angle
elif azimuth_unit.lower() in radList:
az_angle_rad = azimuth_angle
az_angle_deg = np.rad2deg(azimuth_angle)
else:
warnings.warn(f"azimuth_unit={azimuth_unit} not supported. Try 'degrees' or 'radians'. No azimuthal analysis run.")
return hvsr_data
# Limit to 1-180 (and "right" half of compass) (will be reflected on other half if applicable to save computation time)
conversion_message = ''
will_convert = False
if az_angle_deg < 0:
will_convert = True
conversion_message = conversion_message + 'converted to a positive value'
if az_angle_deg < -180:
conversion_message = conversion_message + ' between 0 and 180 degrees'
if az_angle_deg > 180:
will_convert = True
conversion_message = conversion_message + ' converted to a value between 0 and 180 degrees'
if will_convert:
conversion_message = f"\tThe azimuth angle specified will be{conversion_message}"
if verbose:
print(conversion_message, end=f': {az_angle_deg}')
# Convert angle to 0-180
az_angle_deg = az_angle_deg - (180 * (az_angle_deg // 180))
az_angle_rad = az_angle_rad = np.deg2rad(azimuth_angle)
if verbose:
print(f' degrees --> {az_angle_deg} degrees.')
multAzList = ['multiple azimuths', 'multiple', 'multi', 'mult', 'm']
singleAzList = ['single azimuth', 'single', 'sing', 's']
if azimuth_type.lower() in multAzList:
azimuth_list = list(np.arange(0, np.pi, az_angle_rad))
azimuth_list_deg = list(np.arange(0, 180, az_angle_deg))
elif azimuth_type.lower() in singleAzList:
azimuth_list = [az_angle_rad]
azimuth_list_deg = [az_angle_deg]
else:
warnings.warn(f"azimuth_type={azimuth_type} not supported. Try 'multiple' or 'single'. No azimuthal analysis run.")
return hvsr_data
if isinstance(hvsr_data, (HVSRData, dict)):
zComp = hvsr_data['stream'].select(component='Z').merge()
eComp = hvsr_data['stream'].select(component='E').merge()
nComp = hvsr_data['stream'].select(component='N').merge()
elif isinstance(hvsr_data, obspy.Stream):
zComp = hvsr_data.select(component='Z').merge()
eComp = hvsr_data.select(component='E').merge()
nComp = hvsr_data.select(component='N').merge()
# Reset stats for original data too
zComp[0].stats['azimuth_deg'] = 0
eComp[0].stats['azimuth_deg'] = 90
nComp[0].stats['azimuth_deg'] = 0
zComp[0].stats['azimuth_rad'] = 0
eComp[0].stats['azimuth_rad'] = np.pi/2
nComp[0].stats['azimuth_rad'] = 0
zComp[0].stats['location'] = '000'
eComp[0].stats['location'] = '090'
nComp[0].stats['location'] = '000'
statsDict = {}
for key, value in eComp[0].stats.items():
statsDict[key] = value
for i, az_rad in enumerate(azimuth_list):
az_deg = azimuth_list_deg[i]
statsDict['location'] = f"{str(round(az_deg,0)).zfill(3)}" #Change location name
statsDict['channel'] = f"EHR"#-{str(round(az_deg,0)).zfill(3)}" #Change channel name
statsDict['azimuth_deg'] = az_deg
statsDict['azimuth_rad'] = az_rad
hasMask = [False, False]
if np.ma.is_masked(nComp[0].data):
nData = nComp[0].data.data
nMask = nComp[0].data.mask
hasMask[0] = True
else:
nData = nComp[0].data
nMask = [True] * len(nData)
if np.ma.is_masked(eComp[0].data):
eData = eComp[0].data.data
eMask = eComp[0].data.mask
hasMask[1] = True
else:
eData = eComp[0].data
eMask = [True] * len(eData)
# From hvsrpy: horizontal = self.ns._amp * math.cos(az_rad) + self.ew._amp*math.sin(az_rad)
if True in hasMask:
radial_comp_data = np.ma.array(np.add(nData * np.cos(az_rad), eData * np.sin(az_angle_rad)), mask=list(map(operator.and_, nMask, eMask)))
else:
radial_comp_data = np.add(nData * np.cos(az_rad), eData * np.sin(az_rad))
radial_trace = obspy.Trace(data=radial_comp_data, header=statsDict)
hvsr_data['stream'].append(radial_trace)
# Verbose printing
if verbose and not isinstance(hvsr_data, HVSRBatch):
dataINStr = hvsr_data.stream.__str__().split('\n')
for line in dataINStr:
print('\t\t', line)
if show_az_plot:
hvsr_data['Azimuth_Fig'] = plot_azimuth(hvsr_data=hvsr_data, **plot_azimuth_kwargs)
hvsr_data['processing_status']['calculate_azimuths_status'] = True
hvsr_data = sprit_utils._check_processing_status(hvsr_data, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_data
# Quality checks, stability tests, clarity tests
# def check_peaks(hvsr, x, y, index_list, peak, peakm, peakp, hvsr_peaks, stdf, hvsr_log_std, rank, hvsr_band=[0.1, 50], do_rank=False):
[docs]
def check_peaks(hvsr_data, hvsr_band=DEFAULT_BAND, peak_selection='max', peak_freq_range=DEFAULT_BAND, azimuth='HV', verbose=False):
"""Function to run tests on HVSR peaks to find best one and see if it passes SESAME quality checks
Parameters
----------
hvsr_data : dict
Dictionary containing all the calculated information about the HVSR data (i.e., hvsr_out returned from process_hvsr)
hvsr_band : tuple or list, default=[0.1, 50]
2-item tuple or list with lower and upper limit of frequencies to analyze
peak_selection : str or numeric, default='max'
How to select the "best" peak used in the analysis. For peak_selection="max" (default value), the highest peak within peak_freq_range is used.
For peak_selection='scored', an algorithm is used to select the peak based in part on which peak passes the most SESAME criteria.
If a numeric value is used (e.g., int or float), this should be a frequency value to manually select as the peak of interest.
peak_freq_range : tuple or list, default=[0.1, 50];
The frequency range within which to check for peaks. If there is an HVSR curve with multiple peaks, this allows the full range of data to be processed while limiting peak picks to likely range.
verbose : bool, default=False
Whether to print results and inputs to terminal.
Returns
-------
hvsr_data : HVSRData or HVSRBatch object
Object containing previous input data, plus information about peak tests
"""
orig_args = locals().copy() # Get the initial arguments
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'check_peaks' in hvsr_data['processing_parameters'].keys():
update_msg = []
for k, v in hvsr_data['processing_parameters']['check_peaks'].items():
defaultVDict = dict(zip(inspect.getfullargspec(check_peaks).args[1:],
inspect.getfullargspec(check_peaks).defaults))
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
hvsr_band = orig_args['hvsr_band']
peak_selection = orig_args['peak_selection']
peak_freq_range = orig_args['peak_freq_range']
verbose = orig_args['verbose']
#if (verbose and 'input_params' not in hvsr_data.keys()) or (verbose and not hvsr_data['batch']):
# if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']:
# pass
# else:
if verbose:
print('\nChecking peaks in the H/V Curve (check_peaks())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key == 'hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in hvsr_data.keys() and 'check_peaks' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
# First, divide up for batch or not
if isinstance(hvsr_data, HVSRBatch):
if verbose:
print('\t Running in batch mode')
#If running batch, we'll loop through each site
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "params" variable for each site
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_data[site_name] = __check_peaks_batch(**args) #Call another function, that lets us run this function again
except:
if verbose:
print(f"\t{site_name}: check_peaks() unsuccessful. Peaks not checked.")
else:
warnings.warn(f"\t{site_name}: check_peaks() unsuccessful. Peaks not checked.", RuntimeWarning)
hvsr_data = HVSRBatch(hvsr_data, df_as_read=hvsr_data.input_df)
else:
HVColIDList = ['_'.join(col_name.split('_')[2:]) for col_name in hvsr_data['hvsr_windows_df'].columns if col_name.startswith('HV_Curves') and 'Log' not in col_name]
HVColIDList[0] = 'HV'
if hvsr_data['processing_status']['overall_status']:
if not hvsr_band:
hvsr_band = DEFAULT_BAND
hvsr_data['hvsr_band'] = hvsr_band
anyK = list(hvsr_data['x_freqs'].keys())[0]
hvsr_data['PeakReport'] = {}
hvsr_data['BestPeak'] = {}
for i, col_id in enumerate(HVColIDList):
x = hvsr_data['x_freqs'][anyK] # Consistent for all curves
if col_id == 'HV':
y = hvsr_data['hvsr_curve'] # Calculated based on "Use" column
else:
y = hvsr_data['hvsr_az'][col_id]
scorelist = ['score', 'scored', 'best', 's']
maxlist = ['maximum', 'max', 'highest', 'm']
# Convert peak_selection to numeric, get index of nearest value as list item for __init_peaks()
try:
peak_val = float(peak_selection)
index_list = [np.argmin(np.abs(x - peak_val))]
except Exception as e:
# If score method is being used, get index list for __init_peaks()
if peak_selection in scorelist:
index_list = hvsr_data['hvsr_peak_indices'][col_id] #Calculated based on hvsr_curve
else:# str(peak_selection).lower() in maxlist:
#Get max index as item in list for __init_peaks()
startInd = np.argmin(np.abs(x - peak_freq_range[0]))
endInd = np.argmin(np.abs(x - peak_freq_range[1]))
if startInd > endInd:
holder = startInd
startInd = endInd
endInd = holder
subArrayMax = np.argmax(y[startInd:endInd])
# If max val is in subarray, this will be the same as the max of curve
# Otherwise, it will be the index of the value that is max within peak_freq_range
index_list = [subArrayMax+startInd]
hvsrp = hvsr_data['hvsrp'][col_id] # Calculated based on "Use" column
hvsrm = hvsr_data['hvsrm'][col_id] # Calculated based on "Use" column
hvsrPeaks = hvsr_data['hvsr_windows_df'][hvsr_data['hvsr_windows_df']['Use']]['CurvesPeakIndices_'+col_id]
hvsr_log_std = hvsr_data['hvsr_log_std'][col_id]
peak_freq_range = hvsr_data['peak_freq_range']
# Do for hvsr
peak = __init_peaks(x, y, index_list, hvsr_band, peak_freq_range, _min_peak_amp=0.5)
peak = __check_curve_reliability(hvsr_data, peak, col_id)
peak = __check_clarity(x, y, peak, do_rank=True)
# Do for hvsrp
# Find the relative extrema of hvsrp (hvsr + 1 standard deviation)
if not np.isnan(np.sum(hvsrp)):
index_p = __find_peaks(hvsrp)
else:
index_p = list()
peakp = __init_peaks(x, hvsrp, index_p, hvsr_band, peak_freq_range, _min_peak_amp=1)
peakp = __check_clarity(x, hvsrp, peakp, do_rank=True)
# Do for hvsrm
# Find the relative extrema of hvsrm (hvsr - 1 standard deviation)
if not np.isnan(np.sum(hvsrm)):
index_m = __find_peaks(hvsrm)
else:
index_m = list()
peakm = __init_peaks(x, hvsrm, index_m, hvsr_band, peak_freq_range, _min_peak_amp=0)
peakm = __check_clarity(x, hvsrm, peakm, do_rank=True)
# Get standard deviation of time peaks
stdf = __get_stdf(x, index_list, hvsrPeaks)
peak = __check_freq_stability(peak, peakm, peakp)
peak = __check_stability(stdf, peak, hvsr_log_std, rank=True)
hvsr_data['PeakReport'][col_id] = peak
#Iterate through peaks and
# Get the BestPeak based on the peak score
# Calculate whether each peak passes enough tests
curveTests = ['WinLen','SigCycles', 'LowCurveStD']
peakTests = ['ProminenceLow', 'ProminenceHi', 'AmpClarity', 'FreqStability', 'LowStDev_Freq', 'LowStDev_Amp']
bestPeakScore = 0
for p in hvsr_data['PeakReport'][col_id]:
# Get BestPeak
if p['Score'] > bestPeakScore:
bestPeakScore = p['Score']
bestPeak = p
# Calculate if peak passes criteria
cTestsPass = 0
pTestsPass = 0
for testName in p['PassList'].keys():
if testName in curveTests:
if p['PassList'][testName]:
cTestsPass += 1
elif testName in peakTests:
if p['PassList'][testName]:
pTestsPass += 1
if cTestsPass == 3 and pTestsPass >= 5:
p['PeakPasses'] = True
else:
p['PeakPasses'] = False
# Designate BestPeak in output dict
if len(hvsr_data['PeakReport'][col_id]) == 0:
bestPeak = {}
print(f"No Best Peak identified for {hvsr_data['site']} (azimuth {col_id})")
hvsr_data['BestPeak'][col_id] = bestPeak
else:
for i, col_id in enumerate(HVColIDList):
if hasattr(hvsr_data, 'BestPeak'):
hvsr_data['BestPeak'][col_id] = {}
else:
print(f"Processing Errors: No Best Peak identified for {hvsr_data['site']} (azimuth {col_id})")
try:
hvsr_data.plot()
except:
pass
hvsr_data['processing_parameters']['check_peaks'] = {}
exclude_params_list = ['hvsr_data']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_data['processing_parameters']['check_peaks'][key] = value
return hvsr_data
# Function to export data stream to mseed (by default) or other format supported by obspy.write()
[docs]
def export_data(hvsr_data, data_export_path, data_export_format='mseed', starttime=None, endtime=None, tzone=None, export_edited_stream=False,
site=None, project=None, verbose=False, **kwargs):
"""Export data stream to file. This uses the obspy.Stream.write() method on the hvsr_data['stream'] object,
but the stream can first be trimmed using starttime, endtime, and tzone.
Parameters
----------
hvsr_data : HVSRData, HVSRBatch, obspy.Stream, obspy.Trace
Input stream or HVSR object
data_export_path : pathlike-object
Filepath at which to format data. If directory (recommended), filename will be generated automatically.
data_export_format : str, optional
Format of data, should be file format supported by obspy.write(), by default 'mseed'
starttime : str, UTCDateTime, or datetime.datetime, optional
Starttime of stream, if trimming is desired, by default None
endtime : str, UTCDateTime, or datetime.datetime, optional
Endtime of stream, if trimming is desired, by default None
tzone : str, zoneinfo.Zoneinfo, optional
String readable by zoneinfo.Zoneinfo() or Zoneinfo object, by default None
export_edited_stream : bool, optional
Whether to export the raw stream ('stream' property; if False) or edited stream ('stream_edited' property; if True) in HVSRData object, by default False.
site : str, optional
Site name, to be used in filename generation, by default None
project : str, optional
Project or county name, to be used in filename generation, by default None
verbose : bool, optional
Whether to print information to terminal, by default False
Returns
-------
obspy.Stream
Stream object exported
Raises
------
TypeError
hvsr_data must be of type HVSRData, HVSRBatch, obspy.Stream, or obspy.Trace
"""
# Extract stream for export
if isinstance(hvsr_data, HVSRBatch):
for site in hvsr_data:
export_data(hvsr_data[site], data_export_path=data_export_path, data_export_format=data_export_format,
starttime=starttime, endtime=endtime, verbose=verbose, **kwargs)
return
elif isinstance(hvsr_data, (obspy.Stream, obspy.Trace)):
if isinstance(hvsr_data, obspy.Stream):
outputStream = hvsr_data.copy()
else:
outputStream = obspy.Stream([hvsr_data])
else:
# Assume data is in hvsr_data
if not isinstance(hvsr_data, HVSRData):
raise TypeError(f"The sprit.export_data() parameter hvsr_data must be of type HVSRData, HVSRBatch, obspy.Stream, or obspy.Trace, not {type(hvsr_data)}")
if export_edited_stream and hasattr(hvsr_data, 'stream_edited'):
outputStream = hvsr_data['stream_edited'].copy()
else:
outputStream = hvsr_data['stream'].copy()
# Get starttime in obspy.UTCDateTime format
if starttime is not None:
if type(starttime) == str:
sTimeDT = sprit_utils._format_time(starttime, tzone=tzone)
acqDate = outputStream[0].stats.starttime.date
sTimeDT.replace(year=acqDate.year, month=acqDate.month, day=acqDate.day)
sTimeUTC = obspy.UTCDateTime(sTimeDT)
elif isinstance(starttime, datetime.datetime):
if tzone is not None:
starttime = starttime.replace(tzinfo=tzone)
sTimeUTC = obspy.UTCDateTime(starttime.astimezone(datetime.timezone.utc))
else:
sTimeUTC = obspy.UTCDateTime(starttime)
else:
sTimeUTC = outputStream[0].stats.starttime
# Get endtime in obspy.UTCDateTime format
if endtime is not None:
if type(endtime) == str:
eTimeDT = sprit_utils._format_time(endtime, tzone=tzone)
acqDate = outputStream[-1].stats.endtime.date
eTimeDT.replace(year=acqDate.year, month=acqDate.month, day=acqDate.day)
eTimeUTC = obspy.UTCDateTime(eTimeDT)
elif isinstance(endtime, datetime.datetime):
if tzone is not None:
endtime = endtime.replace(tzinfo=tzone)
eTimeUTC = obspy.UTCDateTime(endtime.astimezone(datetime.timezone.utc))
else:
eTimeUTC = obspy.UTCDateTime(endtime)
else:
eTimeUTC = outputStream[-1].stats.endtime
# Build filepath
siteName = site
if site is None:
siteName = "HVSRSite"
projectName = project
if project is None:
projectName = ""
if projectName != "" and len(projectName)>0 and projectName[-1] != '-':
projectName += "-"
sDateStr = outputStream[0].stats.starttime.strftime("%Y%m%d")
sTimeStr = outputStream[0].stats.starttime.strftime("%H%M")
staStr = outputStream[0].stats.station
deFormat = str(data_export_format).upper()
if data_export_format[0] == '.':
deFormat = deFormat[1:]
dePath = pathlib.Path(data_export_path)
autoFname = f"{siteName}_Stream_{projectName}{sDateStr}-{sTimeStr}-{staStr}_{datetime.date.today()}.{deFormat}"
if dePath.is_dir():
if not dePath.exists():
dePath.mkdir(parents=True)
outfPath = dePath.joinpath(autoFname)
elif dePath.is_file():
outfPath = dePath
# Trim stream as needed
if starttime is None and endtime is None:
pass
else:
isMasked = False
doTrim = False
for tr in outputStream:
if isinstance(tr.data, np.ma.masked_array):
isMasked = True
if sTimeUTC > tr.stats.endtime or eTimeUTC < tr.stats.starttime:
doTrim = True
if isMasked:
outputStream = outputStream.split()
if doTrim:
if verbose:
print(f"\t Trimming data to {sTimeUTC} and {eTimeUTC}\n\t\t Stream starttime: {outputStream[0].stats.starttime}\n\t\t Stream endtime: {outputStream[0].stats.endtime}")
outputStream.trim(starttime=sTimeUTC, endtime=eTimeUTC)
outputStream.merge(method=1)
# Take care of masked arrays for writing purposes
if 'fill_value' in kwargs.keys():
for tr in outputStream:
if isinstance(tr.data, np.ma.masked_array):
tr.data = tr.data.filled(kwargs['fill_value'])
else:
outputStream = outputStream.split()
outputStream.write(filename=outfPath.as_posix())
if verbose:
print('Stream has been written to ' + outfPath.as_posix())
return outputStream
# Function to export data to .hvsr file (pickled)
[docs]
def export_hvsr(hvsr_data, hvsr_export_path=None, ext='hvsr', export_type='gzip',
export_plots=False,
verbose=False):
"""Export data into pickle format that can be read back in using import_data().
Intended so data does not need to be processed each time it needs to be used.
By default, first, export_hvsr serializes the HVSRData object(s) using pickle.dumps().
Then, to save space, it writes that to a gzip file.
Default extension is .hvsr no matter the format, though this can be set with `ext` parameter.
Parameters
----------
hvsr_data : HVSRData or HVSRBatch
Data to be exported
hvsr_export_path : str or filepath object, default = None
String or filepath object to be read by pathlib.Path() and/or a with open(hvsr_export_path, 'wb') statement. If None, defaults to input input_data directory, by default None
ext : str, default = 'hvsr'
Filepath extension to use for data file, by default 'hvsr'.
This will be the extension no matter the export_type
export_type : str, default = 'gzip'
Export type to use. If `export_type` is 'pickle', will just save to disk using pickle.dump.
Otherwise, saves a pickle-serialized object to a gzip file (with a .hvsr extension in both cases, by default).
verbose : bool, default=False
Whether to print information about export. A confirmation message is printed no matter what.
"""
def _hvsr_export(_hvsr_data=hvsr_data, _export_path=hvsr_export_path, _ext=ext):
fname = f"{_hvsr_data['site']}_HVSRData_{_hvsr_data['hvsr_id']}_{datetime.date.today()}_pickled.{ext}"
if _export_path is None or _export_path is True:
_export_path = _hvsr_data['input_data']
_export_path = pathlib.Path(_export_path).with_name(fname)
else:
_export_path = pathlib.Path(_export_path)
if _export_path.is_dir():
_export_path = _export_path.joinpath(fname)
_export_path = str(_export_path)
if export_type == 'pickle':
with open(_export_path, 'wb') as f:
pickle.dump(_hvsr_data, f)
else:
with gzip.open(_export_path, 'wb') as f:
f.write(pickle.dumps(_hvsr_data))
if verbose:
print('EXPORT COMPLETE')
print(f"Processed data exported as pickled data to: {_export_path} [~{round(float(pathlib.Path(_export_path).stat().st_size)/2**20,1)} Mb]")
hvData = hvsr_data
hvData = hvsr_data.copy()
if export_plots is False:
for pk in PLOT_KEYS:
if hasattr(hvData, pk):
delattr(hvData, pk)
if isinstance(hvData, HVSRBatch):
for sitename in hvData.keys():
_hvsr_export(_hvsr_data=hvData[sitename], _export_path=hvsr_export_path, _ext=ext)
elif isinstance(hvData, HVSRData):
_hvsr_export(_hvsr_data=hvData, _export_path=hvsr_export_path, _ext=ext)
else:
print("Error in data export. Data must be either of type sprit.HVSRData or sprit.HVSRBatch")
return
# Function to export reports to disk in various formats
[docs]
def export_report(hvsr_results, report_export_path=None, report_export_format=['pdf'], azimuth='HV', csv_handling='rename', show_report=True, verbose=False):
"""Function to export reports to disk. Exportable formats for report_export_format include:
* 'table': saves a pandas DataFrame as a csv)
* 'plot': saves the matplotlib or plotly plot figure (depending on what is designated via plot_engine) as an image (png by default)
* 'print': saves the print report as a .txt file
* 'html': saves the html report as a .html file
* 'pdf': saves the pdf report as a .pdf file
Parameters
----------
hvsr_results : HVSRData object
HVSRData object containing the HVSR data
report_export_path : path-like object, optional
The path to where the report should be exported.
If this is None (default), this is written to the home directory.
If this is a True, uses the same directory as the input data, but generates a filename.
If this is a directory, generates a filename.
If filename is specified and the extension does not match the report type, the extension is adjusted.
Otherwise, this is the output file or , by default None
csv_handling : {'rename', 'append', 'overwrite', 'keep'}, optional
If table is the report type, this can prevent overwriting data, by default 'rename'.
* "rename" (or "keep"): renames the new file to prevent overwrite, appends a digit to the end of filename
* "append": appends the new data to the existing file
* "overwrite": overwrites the existing file
report_export_format : str or list, optional
The format (or a list of formats) to export the report, by default ['pdf'].
show_report : bool, optional
Whether to show the designated reports that were chosen for export, by default True
verbose : bool, optional
Whether to print progress and other information to terminal, by default False
Returns
-------
HVSRData
An HVSRData object that is the same as hvsr_results, but with any additionally generated reports.
"""
if type(report_export_format) is str:
report_export_format = [report_export_format]
for ref in report_export_format:
if report_export_path is None:
print('The export_report(report_export_path) parameter was not specified.')
print(f'The report will be saved the home directory: {pathlib.Path.home()}')
if ref == 'table':
ext = '.csv'
elif ref == 'plot':
ext = '.png'
elif ref == 'print':
ext = '.txt'
elif ref == 'html':
ext = '.html'
else:
ref == 'pdf'
ext = '.pdf'
sitename = hvsr_results['input_params']['site']
fname = f"{sitename}_REPORT_{hvsr_results['hvsr_id']}_{datetime.date.today()}{ext}"
fname = fname.replace(':', '')
# Initialize output as file in home directory (if not updated)
outFile = pathlib.Path().home().joinpath(fname)
if report_export_path is True or report_export_path is None:
# Check so we don't write in sample directory
if pathlib.Path(hvsr_results['input_params']['input_data']) in sampleFileKeyMap.values():
if pathlib.Path(os.getcwd()) in sampleFileKeyMap.values(): #Just in case current working directory is also sample directory
inFile = pathlib.Path.home() #Use the path to user's home if all else fails
else:
inFile = pathlib.Path(os.getcwd())
else:
inFile = pathlib.Path(hvsr_results['input_params']['input_data'])
if inFile.is_dir():
outFile = inFile.joinpath(fname)
else:
outFile = inFile.with_name(fname)
else:
if report_export_path is False:
pass
elif pathlib.Path(report_export_path).is_dir():
outFile = pathlib.Path(report_export_path).joinpath(fname)
else:
outFile = pathlib.Path(report_export_path)
if ref == 'table':
if not hasattr(hvsr_results, 'Table_Report'):
hvsr_results = _generate_table_report(hvsr_results, azimuth=azimuth, show_table_report=show_report, verbose=verbose)
reportDF = hvsr_results['Table_Report']
# Check if file already exists, and handle as specified in csv_handling
if outFile.exists():
existFile = pd.read_csv(outFile)
if csv_handling.lower() == 'append':
# Append report to existing report as new row
reportDF = pd.concat([existFile, reportDF], ignore_index=True, join='inner')
elif csv_handling.lower() == 'overwrite':
# Overwrite existing report file
pass
else: # csv_handling.lower() in ['keep', 'rename', or other]:
# Rename new report so as not to modify existing report (default handling)
if outFile.stem[-3] == '_' and outFile.stem[-2:].isdigit():
fileDigit = int(outFile.stem[-2:]) + 1
else:
fileDigit = 1
fileDigit = str(fileDigit).zfill(2)
outFile = outFile.with_stem(outFile.stem + '_' + fileDigit)
# Export to csv using pandas to_csv method
try:
print(f'\nSaving table report to: {outFile}')
reportDF.to_csv(outFile, index_label='ID')
except:
warnings.warn("Table report not exported. \n\tDataframe to be exported as csv has been saved in hvsr_results['BestPeak']['Report']['Table_Report]", category=RuntimeWarning)
if show_report or verbose:
print('\nTable Report:\n')
maxColWidth = 13
print(' ', end='')
for col in reportDF.columns:
if len(str(col)) > maxColWidth:
colStr = str(col)[:maxColWidth-3]+'...'
else:
colStr = str(col)
print(colStr.ljust(maxColWidth), end=' ')
print() #new line
for c in range(len(reportDF.columns) * (maxColWidth+2)):
if c % (maxColWidth+2) == 0:
print('|', end='')
else:
print('-', end='')
print('|') #new line
print(' ', end='') #Small indent at start
for row in reportDF.iterrows():
for col in row[1]:
if len(str(col)) > maxColWidth:
colStr = str(col)[:maxColWidth-3]+'...'
else:
colStr = str(col)
print(colStr.ljust(maxColWidth), end=' ')
print()
elif ref == 'plot':
if not hasattr(hvsr_results, 'Plot_Report'):
fig = plot_hvsr(hvsr_results, return_fig=True)
hvsr_results['BestPeak'][azimuth]['Report']['Plot_Report'] = hvsr_results['Plot_Report'] = fig
if verbose:
print(f'\nSaving plot to: {outFile}')
plt.scf = fig
plt.savefig(outFile)
elif ref == 'print':
if not hasattr(hvsr_results, "Print_Report") or hvsr_results['Print_Report'] is None:
hvsr_results = _generate_print_report(hvsr_results, azimuth=azimuth, show_print_report=show_report, verbose=verbose)
with open(outFile, 'w') as outF:
outF.write(hvsr_results['Print_Report'])
# Could write more details in the future
if show_report or verbose:
print(hvsr_results['Print_Report'])
elif ref == "html":
if not hasattr(hvsr_results, "HTML_Report") or hvsr_results['HTML_Report'] is None:
hvsr_results = _generate_html_report(hvsr_results)
with open(outFile, 'w') as outF:
outF.write(hvsr_results['HTML_Report'])
elif ref == "pdf":
hvsr_results = _generate_pdf_report(hvsr_results, pdf_report_filepath=report_export_path, show_pdf_report=show_report, verbose=verbose)
return hvsr_results
# **WORKING ON THIS**
# Save default instrument and processing settings to json file(s)
[docs]
def export_settings(hvsr_data, export_settings_path='default', export_settings_type='all', include_location=False, verbose=True):
"""Save processing settings to json file.
Parameters
----------
export_settings_path : str, default="default"
Where to save the json file(s) containing the settings, by default 'default'.
If "default," will save to sprit package resources. Otherwise, set a filepath location you would like for it to be saved to.
If 'all' is selected, a directory should be supplied.
Otherwise, it will save in the directory of the provided file, if it exists. Otherwise, defaults to the home directory.
export_settings_type : str, {'all', 'instrument', 'processing'}
What kind of settings to save.
If 'all', saves all possible types in their respective json files.
If 'instrument', save the instrument settings to their respective file.
If 'processing', saves the processing settings to their respective file. By default 'all'
include_location : bool, default=False, input CRS
Whether to include the location parametersin the exported settings document.This includes xcoord, ycoord, elevation, elev_unit, and input_crs
verbose : bool, default=True
Whether to print outputs and information to the terminal
"""
fnameDict = {}
fnameDict['instrument'] = "instrument_settings.json"
fnameDict['processing'] = "processing_settings.json"
if export_settings_path == 'default' or export_settings_path is True:
settingsPath = RESOURCE_DIR.joinpath('settings')
else:
export_settings_path = pathlib.Path(export_settings_path)
if not export_settings_path.exists():
if not export_settings_path.parent.exists():
print(f'The provided value for export_settings_path ({export_settings_path}) does not exist. Saving settings to the home directory: {pathlib.Path.home()}')
settingsPath = pathlib.Path.home()
else:
settingsPath = export_settings_path.parent
if export_settings_path.is_dir():
settingsPath = export_settings_path
elif export_settings_path.is_file():
settingsPath = export_settings_path.parent
fnameDict['instrument'] = export_settings_path.name+"_instrumentSettings.json"
fnameDict['processing'] = export_settings_path.name+"_processingSettings.json"
#Get final filepaths
instSetFPath = settingsPath.joinpath(fnameDict['instrument'])
procSetFPath = settingsPath.joinpath(fnameDict['processing'])
#Get settings values
instKeys = ["instrument", "net", "sta", "loc", "cha", "depth", "metadata", "hvsr_band"]
inst_location_keys = ['xcoord', 'ycoord', 'elevation', 'elev_unit', 'input_crs']
procFuncs = [fetch_data, remove_noise, generate_psds, process_hvsr, check_peaks, get_report]
instrument_settings_dict = {}
processing_settings_dict = {}
for k in instKeys:
if isinstance(hvsr_data[k], pathlib.PurePath):
#For those that are paths and cannot be serialized
instrument_settings_dict[k] = hvsr_data[k].as_posix()
else:
instrument_settings_dict[k] = hvsr_data[k]
if include_location:
for k in inst_location_keys:
if isinstance(hvsr_data[k], pathlib.PurePath):
#For those that are paths and cannot be serialized
instrument_settings_dict[k] = hvsr_data[k].as_posix()
else:
instrument_settings_dict[k] = hvsr_data[k]
for func in procFuncs:
funcName = func.__name__
processing_settings_dict[funcName] = {}
for arg in hvsr_data['processing_parameters'][funcName]:
if isinstance(hvsr_data['processing_parameters'][funcName][arg], (HVSRBatch, HVSRData)):
pass
else:
processing_settings_dict[funcName][arg] = hvsr_data['processing_parameters'][funcName][arg]
if verbose:
print("Exporting Settings")
#Save settings files
if export_settings_type.lower()=='instrument' or export_settings_type.lower()=='all':
try:
with open(instSetFPath.with_suffix('.inst').as_posix(), 'w') as instSetF:
jsonString = json.dumps(instrument_settings_dict, indent=2)
#Format output for readability
jsonString = jsonString.replace('\n ', ' ')
jsonString = jsonString.replace('[ ', '[')
jsonString = jsonString.replace('\n ]', ']')
#Export
instSetF.write(jsonString)
except:
instSetFPath = pathlib.Path.home().joinpath(instSetFPath.name)
with open(instSetFPath.with_suffix('.inst').as_posix(), 'w') as instSetF:
jsonString = json.dumps(instrument_settings_dict, indent=2)
#Format output for readability
jsonString = jsonString.replace('\n ', ' ')
jsonString = jsonString.replace('[ ', '[')
jsonString = jsonString.replace('\n ]', ']')
#Export
instSetF.write(jsonString)
if verbose:
print(f"Instrument settings exported to {instSetFPath}")
print(f"{jsonString}")
print()
if export_settings_type.lower()=='processing' or export_settings_type.lower()=='all':
try:
with open(procSetFPath.with_suffix('.proc').as_posix(), 'w') as procSetF:
jsonString = json.dumps(processing_settings_dict, indent=2)
#Format output for readability
jsonString = jsonString.replace('\n ', ' ')
jsonString = jsonString.replace('[ ', '[')
jsonString = jsonString.replace('\n ]', ']')
jsonString = jsonString.replace('\n },','\n\t\t},\n')
jsonString = jsonString.replace('{ "', '\n\t\t{\n\t\t"')
jsonString = jsonString.replace(', "', ',\n\t\t"')
jsonString = jsonString.replace('\n }', '\n\t\t}')
jsonString = jsonString.replace(': {', ':\n\t\t\t{')
#Export
procSetF.write(jsonString)
except:
procSetFPath = pathlib.Path.home().joinpath(procSetFPath.name)
with open(procSetFPath.with_suffix('.proc').as_posix(), 'w') as procSetF:
jsonString = json.dumps(processing_settings_dict, indent=2)
#Format output for readability
jsonString = jsonString.replace('\n ', ' ')
jsonString = jsonString.replace('[ ', '[')
jsonString = jsonString.replace('\n ]', ']')
jsonString = jsonString.replace('\n },','\n\t\t},\n')
jsonString = jsonString.replace('{ "', '\n\t\t{\n\t\t"')
jsonString = jsonString.replace(', "', ',\n\t\t"')
jsonString = jsonString.replace('\n }', '\n\t\t}')
jsonString = jsonString.replace(': {', ':\n\t\t\t{')
#Export
procSetF.write(jsonString)
if verbose:
print(f"Processing settings exported to {procSetFPath}")
print(f"{jsonString}")
print()
# Reads in traces to obspy stream
[docs]
def fetch_data(params, source='file', data_export_path=None, data_export_format='mseed',
detrend='spline', detrend_options=2, filter_type=None, filter_options={},
update_metadata=True,
plot_input_stream=False, plot_engine='matplotlib', show_plot=True,
verbose=False, **kwargs):
"""Fetch ambient seismic data from a source to read into obspy stream.
Parameters
----------
params : dict
Dictionary containing all the necessary params to get data.
Parameters defined using input_params() function.
source : str, {'raw', 'dir', 'file', 'batch'}
String indicating where/how data file was created. For example, if raw data, will need to find correct channels.
'raw' finds raspberry shake data, from raw output copied using scp directly from Raspberry Shake, either in folder or subfolders;
'dir' is used if the day's 3 component files (currently Raspberry Shake supported only) are all 3 contained in a directory by themselves.
'file' is used if the params['input_data'] specified in input_params() is the direct filepath to a single file to be read directly into an obspy stream.
'batch' is used to read a list or specified set of seismic files.
Most commonly, a csv file can be read in with all the parameters. Each row in the csv is a separate file. Columns can be arranged by parameter.
data_export_path : None or str or pathlib obj, default=None
If None (or False), data is not trimmed in this function.
Otherwise, this is the directory to save trimmed and exported data.
data_export_format: str='mseed'
If data_export_path is not None, this is the format in which to save the data
detrend : str or bool, default='spline'
If False, data is not detrended.
Otherwise, this should be a string accepted by the type parameter of the obspy.core.trace.Trace.detrend method: https://docs.obspy.org/packages/autogen/obspy.core.trace.Trace.detrend.html
detrend_options : int, default=2
If detrend parameter is 'spline' or 'polynomial', this is passed directly to the order parameter of obspy.core.trace.Trace.detrend method.
filter_type : None, str
Type of filter to use on raw data.
This should either be None or any of {'bandpass', 'bandstop', 'lowpass', 'highpass', 'lowpass_cheby_2', 'lowpass_fir', 'remez_fir'}.
This passes `filter_type` to the `type` parameter and `**filter_options` to the `**options` parameter of the obspy.Stream filter() method.
See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html
If None, no filtering is done on the input seismic data.
filter_options : dict
Dictionary that will be unpacked into the `**options` parameter of the filter() method of the obspy.Stream class.
This should fit the parameters of whichever filter type is specifed by filter_type.
Example options for the 'bandpass' filter_type might be: `filter_options={'freqmin': 0.1, 'freqmax':50, 'df':100, 'corners':4, 'zerophase':True}`.
See here for more information: https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.filter.html
update_metadata : bool, default=True
Whether to update the metadata file, used primarily with Raspberry Shake data which uses a generic inventory file.
plot_input_stream : bool, default=False
Whether to plot the raw input stream. This plot includes a spectrogram (Z component) and the raw (with decimation for speed) plots of each component signal.
plot_engine : str, default='matplotlib'
Which plotting library/engine to use for plotting the Input stream. Options are 'matplotlib', 'plotly', or 'obspy' (not case sensitive).
verbose : bool, default=False
Whether to print outputs and inputs to the terminal
**kwargs
Keywords arguments, primarily for 'batch' and 'dir' sources
Returns
-------
params : HVSRData or HVSRBatch object
Same as params parameter, but with an additional "stream" attribute with an obspy data stream with 3 traces: Z (vertical), N (North-south), and E (East-west)
"""
# Get intput paramaters
orig_args = locals().copy()
start_time = datetime.datetime.now()
# Keep track of any updates made to raw input along the way
update_msg = []
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in params.keys():
if 'fetch_data' in params['processing_parameters'].keys():
defaultVDict = dict(zip(inspect.getfullargspec(fetch_data).args[1:],
inspect.getfullargspec(fetch_data).defaults))
defaultVDict['kwargs'] = kwargs
for k, v in params['processing_parameters']['fetch_data'].items():
# Manual input to function overrides the imported parameter values
if k != 'params' and k in orig_args.keys() and orig_args[k]==defaultVDict[k]:
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
# Update local variables, in case of previously-specified parameters
source = orig_args['source'].lower()
data_export_path = orig_args['data_export_path']
data_export_format = orig_args['data_export_format']
detrend = orig_args['detrend']
detrend_options = orig_args['detrend_options']
filter_type = orig_args['filter_type']
filter_options = orig_args['filter_options']
update_metadata = orig_args['update_metadata']
plot_input_stream = orig_args['plot_input_stream']
plot_engine = orig_args['plot_engine']
verbose = orig_args['verbose']
kwargs = orig_args['kwargs']
# Print inputs for verbose setting
if verbose:
print('\nFetching data (fetch_data())')
for key, value in orig_args.items():
if not isinstance(value, (HVSRData, HVSRBatch)):
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in params.keys() and 'fetch_data' in params['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
raspShakeInstNameList = ['raspberry shake', 'shake', 'raspberry', 'rs', 'rs3d', 'rasp. shake', 'raspshake']
trominoNameList = ['tromino', 'trom','tromino blue', 'tromino blu', 'tromino 3g', 'tromino 3g+', 'tr', 't']
# Check if data is from tromino, and adjust parameters accordingly
if 'trc' in pathlib.Path(str(params['input_data'])).suffix:
if verbose and hasattr(params, 'instrument') and params['instrument'].lower() not in trominoNameList:
print(f"\t Data from tromino detected. Changing instrument from {params['instrument']} to 'Tromino'")
if 'tromino' not in str(params['instrument']).lower():
params['instrument'] = 'Tromino'
# Get metadata (inventory/response information)
params = get_metadata(params, update_metadata=update_metadata, source=source, verbose=verbose)
inv = params['inv']
date = params['acq_date']
# Cleanup for gui input
if isinstance(params['input_data'], (obspy.Stream, obspy.Trace)):
pass
elif '}' in str(params['input_data']): # This is how tkinter gui data comes in
params['input_data'] = params['input_data'].as_posix().replace('{', '')
params['input_data'] = params['input_data'].split('}')
# Make sure input_data is pointing to an actual file
if isinstance(params['input_data'], list):
for i, d in enumerate(params['input_data']):
params['input_data'][i] = sprit_utils._checkifpath(str(d).strip(), sample_list=SAMPLE_LIST)
dPath = params['input_data']
elif isinstance(params['input_data'], (obspy.Stream, obspy.Trace)):
dPath = pathlib.Path() #params['input_data']
elif isinstance(params['input_data'], HVSRData):
dPath = pathlib.Path(params['input_data']['input_data'])
if not isinstance(params['input_data']['stream'], (obspy.Stream, obspy.Trace)):
try:
for k, v in params.items():
if isinstance(v, (obspy.Trace, obspy.Stream)):
params['input_data']['stream'] = v
elif pathlib.Path(str(v)).exists():
try:
params['input_data']['stream'] = obspy.read(v)
except Exception as e:
pass
except:
raise RuntimeError(f'The params["input_data"] parameter of fetch_data() was determined to be an HVSRData object, but no data in the "stream" attribute.')
else:
if verbose:
print('\tThe params["input_data"] argument is already an HVSRData obect.')
print("\tChecking metadata then moving on.")
else:
dPath = sprit_utils._checkifpath(params['input_data'], sample_list=SAMPLE_LIST)
inst = params['instrument']
# Need to put dates and times in right formats first
if type(date) is datetime.datetime:
doy = date.timetuple().tm_yday
year = date.year
elif type(date) is datetime.date:
date = datetime.datetime.combine(date, datetime.time(hour=0, minute=0, second=0))
doy = date.timetuple().tm_yday
year = date.year
elif type(date) is tuple:
if date[0]>366:
raise ValueError('First item in date tuple must be day of year (0-366)', 0)
elif date[1] > datetime.datetime.now().year:
raise ValueError('Second item in date tuple should be year, but given item is in the future', 0)
else:
doy = date[0]
year = date[1]
elif type(date) is str:
if '/' in date:
dateSplit = date.split('/')
elif '-' in date:
dateSplit = date.split('-')
else:
dateSplit = date
if int(dateSplit[0]) > 31:
date = datetime.datetime(int(dateSplit[0]), int(dateSplit[1]), int(dateSplit[2]))
doy = date.timetuple().tm_yday
year = date.year
elif int(dateSplit[0])<=12 and int(dateSplit[2]) > 31:
warnings.warn("Preferred date format is 'yyyy-mm-dd' or 'yyyy/mm/dd'. Will attempt to parse date.")
date = datetime.datetime(int(dateSplit[2]), int(dateSplit[0]), int(dateSplit[1]))
doy = date.timetuple().tm_yday
year = date.year
else:
warnings.warn("Preferred date format is 'yyyy-mm-dd' or 'yyyy/mm/dd'. Cannot parse date.")
elif type(date) is int:
doy = date
year = datetime.datetime.today().year
else:
date = datetime.datetime.now()
doy = date.timetuple().tm_yday
year = date.year
warnings.warn("Did not recognize date, using year {} and day {}".format(year, doy))
# Select which instrument we are reading from (requires different processes for each instrument)
# Get any kwargs that are included in obspy.read
obspyReadKwargs = {}
for argName in inspect.getfullargspec(obspy.read)[0]:
if argName in kwargs.keys():
obspyReadKwargs[argName] = kwargs[argName]
# Select how reading will be done
if isinstance(params['input_data'], obspy.Stream):
rawDataIN = params['input_data'].copy()
tr = params['input_data'][0]
params['input_data'] = '_'.join([tr.id, str(tr.stats.starttime)[:10],
str(tr.stats.starttime)[11:19],
str(tr.stats.endtime)[11:19]])
elif isinstance(params['input_data'], obspy.Trace):
rawDataIN = obspy.Stream(params['input_data'])
tr = params['input_data']
params['input_data'] = '_'.join([tr.id, str(tr.stats.starttime)[:10],
str(tr.stats.starttime)[11:19],
str(tr.stats.endtime)[11:19]])
elif isinstance(params['input_data'], HVSRData):
rawDataIN = params['input_data']['stream']
else:
if source == 'raw':
try:
if inst.lower() in trominoNameList:
params['instrument'] = 'Tromino'
params['params']['instrument'] = 'Tromino'
trominoKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())}
paramDict = {k: v for k, v in params.items()}
trominoKwargs.update(paramDict)
rawDataIN = read_tromino_files(params, verbose=verbose, **trominoKwargs)
if 'site' in rawDataIN[0].stats:
if hasattr(params, 'site'):
params['site'] = rawDataIN[0].stats.site
if hasattr(params, input_params):
params['input_params']['site'] = rawDataIN[0].stats.site
else:
if inst.lower() not in raspShakeInstNameList:
print(f"Unrecognized value instrument={inst}. Defaulting to raw raspberry shake data.")
rawDataIN = __read_RS_file_struct(dPath, source, year, doy, inv, params, verbose=verbose)
except Exception as e:
raise RuntimeError(f"Data not fetched for {params['site']}. Check input parameters or the data file.\n\n{e}")
elif source == 'stream' or isinstance(params, (obspy.Stream, obspy.Trace)):
rawDataIN = params['input_data'].copy()
elif source == 'dir':
if inst.lower() in raspShakeInstNameList:
rawDataIN = __read_RS_file_struct(dPath, source, year, doy, inv, params, verbose=verbose)
else:
obspyFiles = {}
for obForm in OBSPY_FORMATS:
temp_file_glob = pathlib.Path(dPath.as_posix().lower()).glob('.'+obForm.lower())
for f in temp_file_glob:
currParams = params
currParams['input_data'] = f
curr_data = fetch_data(params, source='file', #all the same as input, except just reading the one file using the source='file'
data_export_path=data_export_path, data_export_format=data_export_format, detrend=detrend, detrend_options=detrend_options, update_metadata=update_metadata, verbose=verbose, **kwargs)
curr_data.merge()
obspyFiles[f.stem] = curr_data #Add path object to dict, with filepath's stem as the site name
return HVSRBatch(obspyFiles)
elif source == 'file' and str(params['input_data']).lower() not in SAMPLE_LIST:
# Read the file specified by input_data
# Automatically read tromino data
if str(inst).lower() in trominoNameList or 'tromino' in str(inst).lower() or 'trc' in dPath.suffix:
params['instrument'] = 'Tromino'
params['params']['instrument'] = 'Tromino'
if 'blu' in str(inst).lower():
params['instrument'] = 'Tromino Blue'
params['params']['instrument'] = 'Tromino Blue'
try:
trominoKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(read_tromino_files).parameters.keys())}
paramDict = {k: v for k, v in params.items()}
if 'input_data' in trominoKwargs:
del trominoKwargs['input_data']
if 'tromino_model' not in trominoKwargs:
trominoKwargs['tromino_model'] = params['instrument']
rawDataIN = read_tromino_files(input_data=params, verbose=verbose, **trominoKwargs)
if 'site' in rawDataIN[0].stats and params['site'] == 'HVSRSite':
if hasattr(params, 'site'):
params['site'] = rawDataIN[0].stats.site
if hasattr(params, 'params'):
params['params']['site'] = rawDataIN[0].stats.site
params['acq_date'] = rawDataIN[0].stats.starttime.date
params['starttime'] = rawDataIN[0].stats.starttime
params['endtime'] = rawDataIN[0].stats.endtime
except Exception:
try:
rawDataIN = obspy.read(dPath)
except Exception:
raise ValueError(f"{dPath.suffix} is not a a filetype that can be read by SpRIT (via ObsPy)")
else:
if isinstance(dPath, list) or isinstance(dPath, tuple):
rawStreams = []
for datafile in dPath:
rawStream = obspy.read(datafile, **obspyReadKwargs)
rawStreams.append(rawStream) #These are actually streams, not traces
for i, stream in enumerate(rawStreams):
if i == 0:
rawDataIN = obspy.Stream(stream) #Just in case
else:
rawDataIN = rawDataIN + stream #This adds a stream/trace to the current stream object
elif str(dPath)[:6].lower() == 'sample':
pass
else:
rawDataIN = obspy.read(dPath, **obspyReadKwargs)#, starttime=obspy.core.UTCDateTime(params['starttime']), endttime=obspy.core.UTCDateTime(params['endtime']), nearest_sample =True)
elif source == 'batch' and str(params['input_data']).lower() not in SAMPLE_LIST:
if verbose:
print('\nFetching data (fetch_data())')
batch_data_read_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(batch_data_read).parameters.keys())}
params = batch_data_read(batch_data=params['input_data'], verbose=verbose, **batch_data_read_kwargs)
params = HVSRBatch(params, df_as_read=params.input_df)
return params
elif str(params['input_data']).lower() in SAMPLE_LIST or f"sample{params['input_data'].lower()}" in SAMPLE_LIST:
if source=='batch':
params['input_data'] = SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv')
params = batch_data_read(batch_data=params['input_data'], batch_type='sample', verbose=verbose)
params = HVSRBatch(params, df_as_read=params.input_df)
return params
elif source=='dir':
params['input_data'] = SAMPLE_DATA_DIR.joinpath('Batch_SampleData.csv')
params = batch_data_read(batch_data=params['input_data'], batch_type='sample', verbose=verbose)
params = HVSRBatch(params, df_as_read=params.input_df)
return params
elif source=='file':
params['input_data'] = str(params['input_data']).lower()
if params['input_data'].lower() in sampleFileKeyMap.keys():
if params['input_data'].lower() == 'sample':
params['input_data'] = sampleFileKeyMap
params['input_data'] = sampleFileKeyMap[params['input_data'].lower()]
else:
params['input_data'] = SAMPLE_DATA_DIR.joinpath('SampleHVSRSite1_AM.RAC84.00.2023.046_2023-02-15_1704-1734.MSEED')
dPath = params['input_data']
rawDataIN = obspy.read(dPath)#, starttime=obspy.core.UTCDateTime(params['starttime']), endttime=obspy.core.UTCDateTime(params['endtime']), nearest_sample =True)
#import warnings
#with warnings.catch_warnings():
# warnings.simplefilter(action='ignore', category=UserWarning)
# rawDataIN.attach_response(inv)
else:
# Last try if source cannot be read correctly
try:
rawDataIN = obspy.read(dPath)
except:
RuntimeError(f'source={source} not recognized, and input_data cannot be read using obspy.read()')
if verbose:
print('\t Data as read in initially:')
print(f'\t {len(rawDataIN)} trace(s) in Stream:')
for i, trace in enumerate(rawDataIN):
if i == 0:
prevComponent = trace.stats.component
print(f'\t\t{prevComponent} Component')
currComponent = trace.stats.component
if prevComponent != currComponent:
print(f"\t\t{currComponent} Component")
print("\t\t ", trace)
prevComponent = trace.stats.component
print()
# Get metadata from the data itself, if not reading raw data
try:
# If the data already exists (not reading in raw from RS, for example), get the parameters from the data
dataIN = rawDataIN.copy()
if source != 'raw':
# Use metadata from file for updating:
# site
site_default = inspect.signature(input_params).parameters['site'].default
updateMsg = []
if params['site'] == site_default and params['site'] != dPath.stem:
if isinstance(dPath, (list, tuple)):
dPath = dPath[0]
params['site'] = dPath.stem
params['params']['site'] = dPath.stem
if verbose:
updateMsg.append(f"\tSite name updated to {params['site']}")
# network
net_default = inspect.signature(input_params).parameters['network'].default
if params['net'] == net_default and net_default != dataIN[0].stats.network:
params['net'] = dataIN[0].stats.network
params['params']['net'] = dataIN[0].stats.network
if verbose:
updateMsg.append(f"\tNetwork name updated to {params['net']}")
# station
sta_default = inspect.signature(input_params).parameters['station'].default
if str(params['sta']) == sta_default and str(params['sta']) != dataIN[0].stats.station:
params['sta'] = dataIN[0].stats.station
params['station'] = dataIN[0].stats.station
params['params']['sta'] = dataIN[0].stats.station
params['params']['station'] = dataIN[0].stats.station
if verbose:
updateMsg.append(f"\tStation name updated to {params['sta']}")
# location
loc_default = inspect.signature(input_params).parameters['location'].default
if params['location'] == loc_default and params['location'] != dataIN[0].stats.location:
params['location'] = dataIN[0].stats.location
params['params']['location'] = dataIN[0].stats.location
if verbose:
updateMsg.append(f"\tLocation updated to {params['location']}")
# channels
channelList = []
cha_default = inspect.signature(input_params).parameters['channels'].default
if str(params['cha']) == cha_default:
for tr in dataIN:
if tr.stats.channel not in channelList:
channelList.append(tr.stats.channel)
channelList.sort(reverse=True) #Just so z is first, just in case
if set(params['cha']) != set(channelList):
params['cha'] = channelList
params['params']['cha'] = channelList
if verbose:
updateMsg.append(f"\tChannels updated to {params['cha']}")
# Acquisition date
# acqdate_default = inspect.signature(input_params).parameters['acq_date'].default
acqdate_default = str(NOWTIME.date())
# If input date is default date and does not match date in the data, update to match data
if str(params['acq_date']) == acqdate_default and params['acq_date'] != dataIN[0].stats.starttime.date:
params['acq_date'] = params['params']['acq_date'] = dataIN[0].stats.starttime.date
if verbose:
updateMsg.append(f"\tAcquisition Date updated to {params['acq_date']}")
elif params['acq_date'] != dataIN[0].stats.starttime.date:
# If date has been input manually and does not match data date, update the data
newStartDate = sprit_utils._format_time(params['acq_date'])
params['acq_date'] = params['params']['acq_date'] = newStartDate.date()
for tr in dataIN.merge():
tr.stats.starttime = obspy.UTCDateTime(newStartDate.year,
newStartDate.month,
newStartDate.day,
tr.stats.starttime.hour,
tr.stats.starttime.minute,
tr.stats.starttime.second,
tr.stats.starttime.microsecond)
# starttime
today_Starttime = obspy.UTCDateTime(datetime.datetime(year=datetime.date.today().year, month=datetime.date.today().month,
day=datetime.date.today().day,
hour=0, minute=0, second=0, microsecond=0))
maxStarttime = datetime.datetime(year=params['acq_date'].year, month=params['acq_date'].month, day=params['acq_date'].day,
hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc)
stime_default = obspy.UTCDateTime(NOWTIME)
sTimeIsDefault = params['starttime'] == stime_default
# Check if stime is not the same as the data starttime (if it is, leave it alone!)
if params['starttime'] != dataIN.merge()[0].stats.starttime:
# Check if stime in params is the default value
if sTimeIsDefault:
# We will update the params starttime to match the data if it is the default input
# Ensure we are getting the largest starttime from the data traces (assumes they all start at the same time, but may be slightly off)
for tr in dataIN.merge():
currTime = datetime.datetime(year=tr.stats.starttime.year, month=tr.stats.starttime.month, day=tr.stats.starttime.day,
hour=tr.stats.starttime.hour, minute=tr.stats.starttime.minute,
second=tr.stats.starttime.second, microsecond=tr.stats.starttime.microsecond, tzinfo=datetime.timezone.utc)
if currTime > maxStarttime:
maxStarttime = currTime
# Calculate new start time based data
dataDate = dataIN.merge()[0].stats.starttime.date
newStarttime = obspy.UTCDateTime(year=dataDate.year, month=dataDate.month,day=dataDate.day,
hour=maxStarttime.hour, minute=maxStarttime.minute,
second=maxStarttime.second, microsecond=maxStarttime.microsecond)
# Update parameters to match new starttime (this will be trimmed later if maxStarttime is different than trace starttimes)
params['starttime'] = newStarttime
params['params']['starttime'] = newStarttime
if verbose:
updateMsg.append(f"\tStarttime updated to {params['starttime']}")
else:
# If we manually set a starttime in order to trim or otherwise update the data
# For trimming data (starttime within data time bounds)
sTimeInDataTime = params['starttime'] > dataIN.merge()[0].stats.starttime and params['starttime'] < dataIN.merge()[-1].stats.endtime
if sTimeInDataTime:
# Don't update anything, will use for trimming later
pass
if verbose:
updateMsg.append(f"\tStart of data will be trimmed to {params['starttime']}")
else:
# If params['starttime'] is not in data time bounds, assume that the dataset timing should be updated
minStartTime = dataIN.merge()[-1].stats.starttime
for tr in dataIN.merge():
# Calculate offset from current trace to starttime (in case traces aren't fully aligned)
if tr.stats.starttime < minStartTime:
minStartTime = tr.stats.starttime
# Calculate the offset between the earliest trace starttime and specified starttime
timeOffset = minStartTime - params['starttime']
# Update the startime for each trace based on offset
for tr in dataIN.merge():
tr.stats.starttime = tr.stats.starttime - timeOffset
if verbose:
updateMsg.append(f"\tStarttime updated to {params['starttime']}")
# endttime
# Endtime only matters if it is used to trim the data
eTimeDefault = obspy.UTCDateTime(NOWTIME.year, NOWTIME.month, NOWTIME.day, 23, 59, 59, 999999)
eTimeIsDefault = params['endtime'] == eTimeDefault
minEndTime = dataIN.merge()[-1].stats.endtime
for i, tr in enumerate(dataIN.merge()):
if tr.stats.endtime < minEndTime:
minEndTime = tr.stats.endtime
# Check if etime is anything other than default
if not eTimeIsDefault:
# If endtime is not default, change params['endtime'] to match data endtime unless it falls within the data time (in which case, will be used to trim later)
eTimeInDataTimeBounds = (params['endtime'] > dataIN.merge()[0].stats.starttime) and (params['endtime'] < minEndTime)
if not eTimeInDataTimeBounds:
params['endtime'] = params['params']['endtime'] = minEndTime
else:
params['endtime'] = params['params']['endtime'] = minEndTime
# HVSR_ID (derived)
project = params['project']
if project is None:
proj_id = ''
else:
proj_id = str(project)+'-'
# Update HVSR_ID with new information
params['hvsr_id'] = f"{proj_id}{params['acq_date'].strftime('%Y%m%d')}-{params['starttime'].strftime('%H%M')}-{params['station']}"
params['params']['hvsr_id'] = f"{proj_id}{params['acq_date'].strftime('%Y%m%d')}-{params['starttime'].strftime('%H%M')}-{params['station']}"
if verbose and len(updateMsg) > 0:
updateMsg.insert(0, 'The following parameters have been updated directly from the data:')
for msgLine in updateMsg:
print('\t', msgLine)
print()
# Clean up
dataIN = dataIN.split()
dataIN = dataIN.trim(starttime=params['starttime'], endtime=params['endtime'])
dataIN.merge()
except Exception as e:
raise RuntimeError(f'Data as read by obspy does not contain the proper metadata. \n{e}.\nCheck your input parameters or the data file.')
# Latitude, Longitude, Elevation
# Maybe make this more comprehensive, like for all input_params
if hasattr(dataIN[0].stats, 'latitude'):
params['latitude'] = params['params']['latitude'] = dataIN[0].stats['latitude']
if hasattr(dataIN[0].stats, 'longitude'):
params['longitude'] = params['params']['longitude'] = dataIN[0].stats['longitude']
if hasattr(dataIN[0].stats, 'elevation'):
params['elevation'] = params['params']['elevation'] = dataIN[0].stats['elevation']
if hasattr(dataIN[0].stats, 'elev_unit'):
params['elev_unit'] = params['params']['elev_unit'] = dataIN[0].stats['elev_unit']
if hasattr(dataIN[0].stats, 'input_crs'):
params['input_crs'] = params['params']['input_crs'] = dataIN[0].stats['input_crs']
# Get and update metadata after updating data from source
params = get_metadata(params, update_metadata=update_metadata, source=source)
inv = params['inv']
# Trim and save data as specified
if data_export_path == 'None':
data_export_path = None
if not data_export_path:
pass
else:
if isinstance(params, HVSRBatch):
pass
else:
dataIN = _trim_data(input=params, stream=dataIN, export_dir=data_export_path, source=source, data_export_format=data_export_format)
# Split data if masked array (if there are gaps)...detrending cannot be done without
for tr in dataIN:
if isinstance(tr.data, np.ma.masked_array):
dataIN = dataIN.split()
#Splits entire stream if any trace is masked_array
break
# Detrend data
if isinstance(params, HVSRBatch):
pass
else:
dataIN = __detrend_data(input=dataIN, detrend=detrend, detrend_options=detrend_options, verbose=verbose, source=source)
# Filter data
if isinstance(params, HVSRBatch):
pass
elif filter_type is None:
pass
else:
dataIN.filter_type(type=filter_type, **filter_options)
# Remerge data
dataIN = dataIN.merge(method=1)
# Plot the input stream?
if plot_input_stream:
if plot_engine.lower() in ['plotly', 'plty', 'p']:
if 'spectrogram_component' in kwargs.keys():
specComp = kwargs['spectrogram_component']
else:
specComp = 'Z'
params['Input_Plot'] = sprit_plot.plot_input_stream(hv_data=params, stream=dataIN, spectrogram_component=specComp, show_plot=show_plot, return_fig=True)
elif plot_engine.lower() in ['obspy', 'ospby', 'osbpy', 'opsby', 'opspy', 'o']:
params['Input_Plot'] = dataIN.plot(method='full', linewidth=0.25, handle=True, show=False)
if show_plot:
plt.show()
else:
plt.close()
else:
try:
params['Input_Plot'] = sprit_plot._plot_input_stream_mpl(stream=dataIN, hv_data=params, component='Z', stack_type='linear', detrend='mean', dbscale=True, fill_gaps=None, ylimstd=3, return_fig=True, fig=None, ax=None, show_plot=False)
if show_plot:
plt.show()
else:
plt.close()
except Exception as e:
print(f'Error with default plotting method: {e}.\n Falling back to internal obspy plotting method')
params['Input_Plot'] = dataIN.plot(method='full', linewidth=0.25, handle=True, show=False)
if show_plot:
plt.show()
else:
plt.close()
else:
params['Input_Plot'] = None
# Sort channels (make sure Z is first, makes things easier later)
if isinstance(params, HVSRBatch):
pass
else:
dataIN = _sort_channels(input=dataIN, source=source, verbose=verbose)
# Clean up the ends of the data unless explicitly specified to do otherwise (this is a kwarg, not a parameter)
if 'clean_ends' not in kwargs.keys():
clean_ends = True
else:
clean_ends = kwargs['clean_ends']
if clean_ends:
maxStarttime = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=36500) # 100 years ago
minEndtime = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=36500) # 100 years from now
for tr in dataIN:
currStarttime = datetime.datetime(year=tr.stats.starttime.year, month=tr.stats.starttime.month, day=tr.stats.starttime.day,
hour=tr.stats.starttime.hour, minute=tr.stats.starttime.minute,
second=tr.stats.starttime.second, microsecond=tr.stats.starttime.microsecond, tzinfo=datetime.timezone.utc)
if currStarttime > maxStarttime:
maxStarttime = currStarttime
currEndtime = datetime.datetime(year=tr.stats.endtime.year, month=tr.stats.endtime.month, day=tr.stats.endtime.day,
hour=tr.stats.endtime.hour, minute=tr.stats.endtime.minute,
second=tr.stats.endtime.second, microsecond=tr.stats.endtime.microsecond, tzinfo=datetime.timezone.utc)
if currEndtime < minEndtime:
minEndtime = currEndtime
maxStarttime = obspy.UTCDateTime(maxStarttime)
minEndtime = obspy.UTCDateTime(minEndtime)
dataIN = dataIN.split()
for tr in dataIN:
tr.trim(starttime=maxStarttime, endtime=minEndtime)
pass
dataIN.merge()
params['batch'] = False # Set False by default, will get corrected later if batch
params['input_stream'] = dataIN.copy() # Original stream as read
params['stream'] = dataIN.copy() # Stream that may be modified later
if 'processing_parameters' not in params.keys():
params['processing_parameters'] = {}
params['processing_parameters']['fetch_data'] = {}
exclude_params_list = ['params']
for key, value in orig_args.items():
if key not in exclude_params_list:
params['processing_parameters']['fetch_data'][key] = value
# Attach response data to stream and get paz (for PPSD later)
# Check if response can be attached
try:
responseMatch = {}
for trace in params['stream']:
k = trace.stats.component
# Check if station, channel, location, and timing match
responseMatch[k] = False # Default to false until proven otherwise
for sta in params['inv'].networks[0].stations: # Assumes only one network per inst
hasCha = False # all default to false until proven otherwise
hasLoc = False
hasSta = False
isStarted = False
notEnded = False
# Check station
if sta.code == params['stream'][0].stats.station:
hasSta = True
else:
continue
# Check Channel
for cha in sta:
if cha.code == trace.stats.channel:
hasCha = True
# Check location
if cha.location_code == trace.stats.location:
hasLoc = True
# Check time
if (cha.start_date is None or cha.start_date <= tr.stats.starttime):
isStarted = True
if (cha.end_date is None or cha.end_date >= tr.stats.endtime):
notEnded = True
if all([hasSta, hasCha, hasLoc, isStarted, notEnded]):
responseMatch[k] = True
if responseMatch[k] is not True:
responseMatch[k] = {'Station': (hasSta, [sta.code for sta in params['inv'].networks[0].stations]),
'Channel': (hasCha, [cha.code for cha in sta for sta in params['inv'].networks[0].stations]),
'Location': (hasLoc, [cha.location_code for cha in sta for sta in params['inv'].networks[0].stations]),
'Starttime':(isStarted, [cha.start_date for cha in sta for sta in params['inv'].networks[0].stations]),
'Endtime': (notEnded, [cha.end_date for cha in sta for sta in params['inv'].networks[0].stations])}
metadataMatchError = False
for comp, matchItems in responseMatch.items():
if matchItems is not True:
metadataMatchError = True
errorMsg = 'The following items in your data need to be matched in the instrument response/metadata:'
for matchType, match in matchItems.items():
if match[0] is False:
errorMsg = errorMsg + f"\n\t{matchType} does not match {match[1]} correctly for component {comp}: {params['stream'].select(component=comp)[0].stats[matchType.lower()]}"
if metadataMatchError:
if verbose:
print(errorMsg)
raise ValueError('Instrument Response/Metadata does not match input data and cannot be used!!\n'+errorMsg)
else:
params['stream'].attach_response(params['inv'])
for tr in params['stream']:
cmpnt = tr.stats.component
params['paz'][cmpnt]['poles'] = tr.stats.response.get_paz().poles
params['paz'][cmpnt]['zeros'] = tr.stats.response.get_paz().zeros
params['paz'][cmpnt]['sensitivity'] = tr.stats.response.get_paz().stage_gain
params['paz'][cmpnt]['gain'] = tr.stats.response.get_paz().normalization_factor
except Exception as e:
if 'obspy_ppsds' in kwargs and kwargs['obspy_ppsds']:
errMsg = "Metadata missing, incomplete, or incorrect. Instrument response cannot be removed."
errMsg += "if metadata cannot be matched, use obspy_ppsds=False to perform analysis on raw data (without instrument response removed)"
raise ValueError(errMsg)
else:
if verbose:
print("\tMetadata/instrument response does not match data.")
print("\t Raw data (without the instrument response removed) will be used for processing.")
params['processing_status']['fetch_data_status'] = True
if verbose and not isinstance(params, HVSRBatch):
print('\n')
dataINStr = dataIN.__str__().split('\n')
for line in dataINStr:
print('\t\t', line)
params = sprit_utils._check_processing_status(params, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return params
# For backwards compatibility (now generate_psds()
[docs]
def generate_ppsds(hvsr_data, **gen_psds_kwargs):
"""This function is to maintain backwards compatibility with previous version
See Also
--------
generate_psds
"""
warnings.warn("generate_ppsds() is now deprecated, use generate_psds()", DeprecationWarning)
hvsrData = generate_psds(hvsr_data, **gen_psds_kwargs)
return hvsrData
# Generate PSDs for each channel
def generate_psds(hvsr_data, window_length=30.0, overlap_pct=0.5, window_type='hann', window_length_method='length',
remove_response=False, skip_on_gaps=True, num_freq_bins=512, hvsr_band=DEFAULT_BAND,
obspy_ppsds=False, azimuthal_psds=False, verbose=False, plot_psds=False, **obspy_ppsd_kwargs):
"""Calculate Power Spectral Density (PSD) curves for each channel.
Uses the [scipy.signal.welch()](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html) function
to generate PSDs by default, or can use Obspy's PPSD class.
Info on Obspy PPSD creation here (if obspy_ppsds=True): https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html
Parameters
----------
hvsr_data : dict, HVSRData object, or HVSRBatch object
Data object containing all the parameters and other data of interest (stream and paz, for example)
window_length : float
Length of the window, in seconds, to use for each PSD calculation. Defaults to 30.0.
overlap_pct : float
Percentage (should be 0-1) for overlapping each window used for PSD calculation. Defaults to 0.5.
window_type : str
Type of window to use. This is passed to the window parameter of the scipy.signal.welch function
window_length_method : str = {'length', 'number'}
Whether the window length should be a measure of length in seconds or number of windows.
If number of windows uses integer value.
remove_response : bool, default=False
Whether to remove the instrument response from the data traces before calculating PSD data.
If True, the appropriate metadata (i.e., obspy.Inventory object) must be attached to the stream and should be stored in the 'inv' attribute of hvsr_data.
skip_on_gaps : bool, default=True
Whether to skip data gaps when processing windows.
This is passed to the skip_on_gaps parameter of the Obspy PPSD class.
num_freq_bins : int, default=512
Number of frequency bins to use. When using the default (i.e., scipy.signal.welch) PSD function, the frequency bins are created manually for processing.
obspy_ppsds : bool, default=False
Whether to use the Obspy PPSD class.
azimuthal_psds : bool, default=False
Whether to generate PPSDs for azimuthal data
verbose : bool, default=True
Whether to print inputs and results to terminal
plot_psds : bool, default=False
Whether to show a plot of the psds here.
**obspy_ppsd_kwargs : dict
Dictionary with keyword arguments that are passed directly to obspy.signal.PPSD.
If the following keywords are not specified, their defaults are amended in this function from the obspy defaults for its PPSD function. Specifically:
- ppsd_length defaults to 30 (seconds) here instead of 3600
- skip_on_gaps defaults to True instead of False
- period_step_octaves defaults to 0.03125 instead of 0.125
Returns
-------
psds : HVSRData object
Dictionary containing entries with psds for each channel
See Also
--------
[scipy.signal.welch](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.welch.html)
[obspy.signal.spectral_estimation.PPSD](https://docs.obspy.org/packages/autogen/obspy.signal.spectral_estimation.PPSD.html)
"""
# First, divide up for batch or not
orig_args = locals().copy() # Get the initial arguments
start_time = datetime.datetime.now()
obspy_ppsd_kwargs_sprit_defaults = obspy_ppsd_kwargs.copy()
# Set defaults here that are different than obspy defaults
if 'ppsd_length' not in obspy_ppsd_kwargs.keys():
obspy_ppsd_kwargs_sprit_defaults['ppsd_length'] = 30.0
if 'period_step_octaves' not in obspy_ppsd_kwargs.keys():
obspy_ppsd_kwargs_sprit_defaults['period_step_octaves'] = 0.03125
if 'period_limits' not in obspy_ppsd_kwargs.keys():
if 'hvsr_band' in hvsr_data.keys():
obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_data['hvsr_band'][1], 1/hvsr_data['hvsr_band'][0]]
elif 'input_params' in hvsr_data.keys() and 'hvsr_band' in hvsr_data['input_params'].keys():
obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_data['input_params']['hvsr_band'][1], 1/hvsr_data['input_params']['hvsr_band'][0]]
else:
obspy_ppsd_kwargs_sprit_defaults['period_limits'] = [1/hvsr_band[1], 1/hvsr_band[0]]
else:
if verbose:
print(f"\t\tUpdating hvsr_band to band specified by period_limits={obspy_ppsd_kwargs['period_limits']}")
if 'hvsr_band' in hvsr_data.keys():
if obspy_ppsd_kwargs['period_limits'] is None:
obspy_ppsd_kwargs['period_limits'] = np.round([1/hvsr_data['hvsr_band'][1], 1/hvsr_data['hvsr_band'][0]], 3).tolist()
else:
hvsr_data['hvsr_band'] = np.round([1/obspy_ppsd_kwargs['period_limits'][1], 1/obspy_ppsd_kwargs['period_limits'][0]], 2).tolist()
if 'input_params' in hvsr_data.keys() and 'hvsr_band' in hvsr_data['input_params'].keys():
hvsr_data['input_params']['hvsr_band'] = np.round([1/obspy_ppsd_kwargs['period_limits'][1], 1/obspy_ppsd_kwargs['period_limits'][0]], 2).tolist()
# Get Probablistic power spectral densities (PPSDs)
# Get default args for function
obspy_ppsd_kwargs = sprit_utils._get_default_args(PPSD)
obspy_ppsd_kwargs.update(obspy_ppsd_kwargs_sprit_defaults) # Update with sprit defaults, or user input
orig_args['obspy_ppsd_kwargs'] = obspy_ppsd_kwargs
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'generate_psds' in hvsr_data['processing_parameters'].keys():
defaultVDict = dict(zip(inspect.getfullargspec(generate_psds).args[1:],
inspect.getfullargspec(generate_psds).defaults))
defaultVDict['obspy_ppsd_kwargs'] = obspy_ppsd_kwargs
update_msg = []
for k, v in hvsr_data['processing_parameters']['generate_psds'].items():
# Manual input to function overrides the imported parameter values
if not isinstance(v, (HVSRData, HVSRBatch)) and (k in orig_args.keys()) and (orig_args[k] == defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
azimuthal_psds = orig_args['azimuthal_psds']
verbose = orig_args['verbose']
obspy_ppsd_kwargs = orig_args['obspy_ppsd_kwargs']
# if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']):
if verbose:
print('\nGenerating Probabilistic Power Spectral Densities (generate_psds())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key == 'hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in hvsr_data.keys() and 'generate_psds' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
if isinstance(hvsr_data, HVSRBatch):
# If running batch, we'll loop through each one
for site_name in hvsr_data.keys():
args = orig_args.copy() # Make a copy so we don't accidentally overwrite
individual_params = hvsr_data[site_name] # Get what would normally be the "hvsr_data" variable for each site
args['hvsr_data'] = individual_params # reset the hvsr_data parameter we originally read in to an individual site hvsr_data
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_data[site_name] = __generate_ppsds_batch(**args) #Call another function, that lets us run this function again
except:
hvsr_data[site_name]['processing_status']['generate_psds_status']=False
hvsr_data[site_name]['processing_status']['overall_status'] = False
else:
hvsr_data[site_name]['processing_status']['generate_psds_status']=False
hvsr_data[site_name]['processing_status']['overall_status'] = False
try:
sprit_tkinter_ui.update_progress_bars(prog_percent=5)
except Exception as e:
pass
#print(e)
return hvsr_data
def _get_obspy_ppsds(hvsr_data,**obspy_ppsd_kwargs):
paz = hvsr_data['paz']
stream = hvsr_data['stream']
# Get ppsds of e component
eStream = stream.select(component='E')
estats = eStream.traces[0].stats
ppsdE = PPSD(estats, paz['E'], **obspy_ppsd_kwargs)
ppsdE.add(eStream)
# Get ppsds of n component
nStream = stream.select(component='N')
nstats = nStream.traces[0].stats
ppsdN = PPSD(nstats, paz['N'], **obspy_ppsd_kwargs)
ppsdN.add(nStream)
# Get ppsds of z component
zStream = stream.select(component='Z')
zstats = zStream.traces[0].stats
ppsdZ = PPSD(zstats, paz['Z'], **obspy_ppsd_kwargs)
ppsdZ.add(zStream)
# Get ppsds of R components (azimuthal data)
has_az = False
ppsds = {'Z':ppsdZ, 'E':ppsdE, 'N':ppsdN}
rStream = stream.select(component='R')
for curr_trace in stream:
if 'R' in curr_trace.stats.channel:
curr_stats = curr_trace.stats
ppsd_curr = PPSD(curr_stats, paz['E'], **obspy_ppsd_kwargs)
has_az = True
ppsdName = curr_trace.stats.location
ppsd_curr.add(rStream)
ppsds[ppsdName] = ppsd_curr
# Add to the input dictionary, so that some items can be manipulated later on, and original can be saved
hvsr_data['ppsds_obspy'] = ppsds
hvsr_data['psds'] = {}
anyKey = list(hvsr_data['ppsds_obspy'].keys())[0]
# Get ppsd class members
members = [mems for mems in dir(hvsr_data['ppsds_obspy'][anyKey]) if not callable(mems) and not mems.startswith("_")]
for k in ppsds.keys():
hvsr_data['psds'][k] = {}
#Get lists/arrays so we can manipulate data later and copy everything over to main 'psds' subdictionary (convert lists to np.arrays for consistency)
listList = ['times_data', 'times_gaps', 'times_processed','current_times_used', 'psd_values'] #Things that need to be converted to np.array first, for consistency
timeKeys= ['times_processed','current_times_used','psd_values']
timeDiffWarn = True
dfList = []
time_data = {}
time_dict = {}
for m in members:
for k in hvsr_data['psds'].keys():
hvsr_data['psds'][k][m] = getattr(hvsr_data['ppsds_obspy'][k], m)
if m in listList:
hvsr_data['psds'][k][m] = np.array(hvsr_data['psds'][k][m])
if str(m)=='times_processed':
unique_times = np.unique(np.array([hvsr_data['psds']['Z'][m],
hvsr_data['psds']['E'][m],
hvsr_data['psds']['N'][m]]))
common_times = []
for currTime in unique_times:
if currTime in hvsr_data['psds']['Z'][m]:
if currTime in hvsr_data['psds']['E'][m]:
if currTime in hvsr_data['psds']['N'][m]:
common_times.append(currTime)
cTimeIndList = []
for cTime in common_times:
ZArr = hvsr_data['psds']['Z'][m]
EArr = hvsr_data['psds']['E'][m]
NArr = hvsr_data['psds']['N'][m]
cTimeIndList.append([int(np.where(ZArr == cTime)[0][0]),
int(np.where(EArr == cTime)[0][0]),
int(np.where(NArr == cTime)[0][0])])
# Make sure number of time windows is the same between PPSDs (this can happen with just a few slightly different number of samples)
if m in timeKeys:
if str(m) != 'times_processed':
time_data[str(m)] = (hvsr_data['psds']['Z'][m], hvsr_data['psds']['E'][m], hvsr_data['psds']['N'][m])
tSteps_same = hvsr_data['psds']['Z'][m].shape[0] == hvsr_data['psds']['E'][m].shape[0] == hvsr_data['psds']['N'][m].shape[0]
if not tSteps_same:
shortestTimeLength = min(hvsr_data['psds']['Z'][m].shape[0], hvsr_data['psds']['E'][m].shape[0], hvsr_data['psds']['N'][m].shape[0])
maxPctDiff = 0
for comp in hvsr_data['psds'].keys():
currCompTimeLength = hvsr_data['psds'][comp][m].shape[0]
timeLengthDiff = currCompTimeLength - shortestTimeLength
percentageDiff = timeLengthDiff / currCompTimeLength
if percentageDiff > maxPctDiff:
maxPctDiff = percentageDiff
for comp in hvsr_data['psds'].keys():
while hvsr_data['psds'][comp][m].shape[0] > shortestTimeLength:
hvsr_data['psds'][comp][m] = hvsr_data['psds'][comp][m][:-1]
if maxPctDiff > 0.05 and timeDiffWarn:
warnings.warn(f"\t Number of ppsd time windows between different components is significantly different: {round(maxPctDiff*100,2)}% > 5%. Last windows will be trimmed.")
elif verbose and timeDiffWarn:
print(f"\t Number of ppsd time windows between different components is different by {round(maxPctDiff*100,2)}%. Last window(s) of components with larger number of ppsd windows will be trimmed.")
timeDiffWarn = False #So we only do this warning once, even though there may be multiple arrays that need to be trimmed
for i, currTStep in enumerate(cTimeIndList):
colList = []
currTStepList = []
colList.append('Use')
currTStepList.append(np.ones_like(common_times[i]).astype(bool))
for tk in time_data.keys():
if 'current_times_used' not in tk:
for i, k in enumerate(hvsr_data['psds'].keys()):
if k.lower() in ['z', 'e', 'n']:
colList.append(str(tk)+'_'+k)
currTStepList.append(time_data[tk][i][currTStep[i]])
dfList.append(currTStepList)
return hvsr_data, dfList, colList, common_times
if obspy_ppsds:
hvsr_data, dfList, colList, common_times = _get_obspy_ppsds(hvsr_data, **obspy_ppsd_kwargs)
else:
psdDict, times_bool = __single_psd_from_raw_data(hvsr_data, window_length=window_length, window_length_method=window_length_method, window_type=window_type,
num_freq_bins=num_freq_bins,
overlap=overlap_pct, remove_response=remove_response, do_azimuths=azimuthal_psds, show_psd_plot=False)
common_times = [ct[0] for ct in times_bool]
use_times = [ut[1] for ut in times_bool]
x_freqs = np.flip(np.logspace(np.log10(hvsr_data['hvsr_band'][0]), np.log10(hvsr_data['hvsr_band'][1]), num_freq_bins))
psdDictUpdate = {}
hvsr_data['psds'] = {}
for key, compdict in psdDict.items():
psdDictUpdate[key] = np.array([list(np.flip(arr)) for time, arr in compdict.items()])
hvsr_data['psds'][key] = {}
#hvsr_data['psds'] = {'Z':{}, 'E':{}, 'N':{}}
for key, item in psdDict.items():
currSt = hvsr_data.stream.select(component=key).merge()
hvsr_data['psds'][key]['channel'] = currSt[0].stats.channel
hvsr_data['psds'][key]['current_times_used'] = common_times
hvsr_data['psds'][key]['delta'] = float(currSt[0].stats.delta)
#hvsr_data['psds'][key]['get_mean'] = np.nanmean(item)
#hvsr_data['psds'][key]['mean'] = np.nanmean(item)
#hvsr_data['psds'][key]['get_mode'] = scipy.stats.mode(item)
#hvsr_data['psds'][key]['mode'] = scipy.stats.mode(item)
hvsr_data['psds'][key]['id'] = currSt[0].id
hvsr_data['psds'][key]['len'] = int(window_length / hvsr_data['psds'][key]['delta'])
hvsr_data['psds'][key]['location'] = currSt[0].stats.location
hvsr_data['psds'][key]['metadata'] = [currSt[0].stats.response if hasattr(currSt[0].stats, 'response') else None][0]
hvsr_data['psds'][key]['network'] = currSt[0].stats.network
hvsr_data['psds'][key]['nfft'] = int(window_length / hvsr_data['psds'][key]['delta'])
hvsr_data['psds'][key]['nlap'] = int(overlap_pct*window_length / hvsr_data['psds'][key]['delta'])
hvsr_data['psds'][key]['overlap'] = overlap_pct
hvsr_data['psds'][key]['period_bin_centers'] = [round(1/float(f + np.diff(x_freqs)[i]/2), 4) for i, f in enumerate(x_freqs[:-1])]
hvsr_data['psds'][key]['period_bin_centers'].append(float(round(1/x_freqs[-1], 3)))
hvsr_data['psds'][key]['period_bin_centers'] = np.array(hvsr_data['psds'][key]['period_bin_centers'])
hvsr_data['psds'][key]['period_bin_left_edges'] = 1/x_freqs[:-1]
hvsr_data['psds'][key]['period_bin_right_edges'] = 1/x_freqs[1:]
hvsr_data['psds'][key]['period_xedges'] = 1/x_freqs
hvsr_data['psds'][key]['ppsd_length'] = window_length
hvsr_data['psds'][key]['psd_length'] = window_length
hvsr_data['psds'][key]['psd_frequencies'] = x_freqs
hvsr_data['psds'][key]['psd_periods'] = 1/x_freqs
hvsr_data['psds'][key]['psd_values'] = psdDictUpdate[key]
hvsr_data['psds'][key]['sampling_rate'] = currSt[0].stats.sampling_rate
hvsr_data['psds'][key]['skip_on_gaps'] = skip_on_gaps
hvsr_data['psds'][key]['station'] = currSt[0].stats.station
hvsr_data['psds'][key]['step'] = window_length * (1-overlap_pct)
hvsr_data['psds'][key]['times_data'] = common_times
hvsr_data['psds'][key]['times_gaps'] = [[None, None]]
hvsr_data['psds'][key]['times_processed'] = [[None, None]]
hvsr_data['ppsds_obspy'] = {}
dfList = []
for i, w in enumerate(common_times):
ws = str(w)
dfList.append([use_times[i], psdDictUpdate['Z'][i], psdDictUpdate['E'][i], psdDictUpdate['N'][i]])
colList = ["Use", "psd_values_Z", "psd_values_E", "psd_values_N"]
# dfList: list of np.arrays, fitting the above column
# common_times: times in common between all, should be length of 1 psd dimension above
# hvsr_data['psds']['Z']['times_gaps']: list of two-item lists with UTCDatetimes for gaps
# #Maybe not needed hvsr_data['psds']['Z']['current_times_used']
hvsrDF = pd.DataFrame(dfList, columns=colList)
if verbose:
print(f"\t\t{hvsrDF.shape[0]} processing windows generated and psd values stored in hvsr_windows_df with columns: {', '.join(hvsrDF.columns)}")
hvsrDF['Use'] = hvsrDF['Use'].astype(bool)
# Add azimuthal psds values
for k in hvsr_data['psds'].keys():
if k.upper() not in ['Z', 'E', 'N']:
hvsrDF['psd_values_'+k] = hvsr_data['psds'][k]['psd_values'].tolist()
hvsrDF['TimesProcessed_Obspy'] = common_times
hvsrDF['TimesProcessed_ObspyEnd'] = hvsrDF['TimesProcessed_Obspy'] + obspy_ppsd_kwargs['ppsd_length']
# colList.append('TimesProcessed_Obspy')
# currTStepList.append(common_times[i])
# Add other times (for start times)
# Create functions to be used in pandas .apply() for datetime conversions
def convert_to_datetime(obspyUTCDateTime):
return obspyUTCDateTime.datetime.replace(tzinfo=datetime.timezone.utc)
def convert_to_mpl_dates(obspyUTCDateTime):
return obspyUTCDateTime.matplotlib_date
hvsrDF['TimesProcessed'] = hvsrDF['TimesProcessed_Obspy'].apply(convert_to_datetime)
hvsrDF['TimesProcessed_End'] = hvsrDF['TimesProcessed'] + datetime.timedelta(days=0, seconds=obspy_ppsd_kwargs['ppsd_length'])
hvsrDF['TimesProcessed_MPL'] = hvsrDF['TimesProcessed_Obspy'].apply(convert_to_mpl_dates)
hvsrDF['TimesProcessed_MPLEnd'] = hvsrDF['TimesProcessed_MPL'] + (obspy_ppsd_kwargs['ppsd_length']/86400)
# Take care of existing time gaps, in case not taken care of previously
if obspy_ppsds:
for gap in hvsr_data['psds']['Z']['times_gaps']:
hvsrDF['Use'] = (hvsrDF['TimesProcessed_MPL'].gt(gap[1].matplotlib_date))| \
(hvsrDF['TimesProcessed_MPLEnd'].lt(gap[0].matplotlib_date)).astype(bool)# | \
hvsrDF.set_index('TimesProcessed', inplace=True)
hvsr_data['hvsr_windows_df'] = hvsrDF
# Remove data set for removal during remove_noise()
if 'x_windows_out' in hvsr_data.keys():
if verbose:
print("\t\tRemoving Noisy windows from hvsr_windows_df.")
hvsr_data = __remove_windows_from_df(hvsr_data, verbose=verbose)
#for window in hvsr_data['x_windows_out']:
# print(window)
# hvsrDF['Use'] = (hvsrDF['TimesProcessed_MPL'][hvsrDF['Use']].lt(window[0]) & hvsrDF['TimesProcessed_MPLEnd'][hvsrDF['Use']].lt(window[0]) )| \
# (hvsrDF['TimesProcessed_MPL'][hvsrDF['Use']].gt(window[1]) & hvsrDF['TimesProcessed_MPLEnd'][hvsrDF['Use']].gt(window[1])).astype(bool)
#hvsrDF['Use'] = hvsrDF['Use'].astype(bool)
# Create dict entry to keep track of how many outlier hvsr curves are removed
# This is a (2-item list with [0]=current number, [1]=original number of curves)
hvsr_data['tsteps_used'] = [int(hvsrDF['Use'].sum()), hvsrDF['Use'].shape[0]]
#hvsr_data['tsteps_used'] = [hvsr_data['psds']['Z']['times_processed'].shape[0], hvsr_data['psds']['Z']['times_processed'].shape[0]]
#hvsr_data['tsteps_used'][0] = hvsr_data['psds']['Z']['current_times_used'].shape[0]
hvsr_data = sprit_utils._make_it_classy(hvsr_data)
if 'processing_parameters' not in hvsr_data.keys():
hvsr_data['processing_parameters'] = {}
hvsr_data['processing_parameters']['generate_psds'] = {}
exclude_params_list = ['hvsr_data']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_data['processing_parameters']['generate_psds'][key] = value
hvsr_data['processing_status']['generate_psds_status'] = True
hvsr_data = sprit_utils._check_processing_status(hvsr_data, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
#for ind, row in hvsrDF.iterrows():
# print(row['psd_values_Z'].shape)
if plot_psds:
for i, r in hvsrDF.iterrows():
plt.plot(r['psd_values_Z'], c='k', linewidth=0.5)
plt.plot(r['psd_values_E'], c='b', linewidth=0.5)
plt.plot(r['psd_values_N'], c='r', linewidth=0.5)
plt.show()
return hvsr_data
# Gets the metadata for Raspberry Shake, specifically for 3D v.7
# Get report (report generation and export)
[docs]
def get_report(hvsr_results, report_formats=['print', 'table', 'plot', 'html', 'pdf'], azimuth='HV',
plot_type=DEFAULT_PLOT_STR, plot_engine='matplotlib',
show_print_report=True, show_table_report=False, show_plot_report=False, show_html_report=False, show_pdf_report=True,
suppress_report_outputs=False, show_report_outputs=False,
csv_handling='append',
report_export_format=None, report_export_path=None,
verbose=False, **kwargs):
"""Generate and/or print and/or export a report of the HVSR analysis in a variety of formats.
Formats include:
* 'print': A (monospace) text summary of the HVSR results
* 'table': A pandas.DataFrame summary of the HVSR Results.
This is useful for copy/pasting directly into a larger worksheet.
* 'plot': A plot summary of the HVSR results, generated using the plot_hvsr() function.
* 'html': An HTML document/text of the HVSR results. This includes the table, print, and plot reports in one document.
* 'pdf': A PDF document showing the summary of the HVSR Results.
The PDF report is simply the HTML report saved to an A4-sized PDF document.
Parameters
----------
hvsr_results : dict
Dictionary containing all the information about the processed hvsr data
report_formats : {'table', 'print', plot}
Format in which to print or export the report.
The following report_formats return the following items in the following attributes:
- 'plot': hvsr_results['Print_Report'] as a str
- 'print': hvsr_results['Plot_Report'] - matplotlib.Figure object
- 'table': hvsr_results['Table_Report']- pandas.DataFrame object
- list/tuple - a list or tuple of the above objects, in the same order they are in the report_formats list
- 'html': hvsr_results['HTML_Report'] - a string containing the text for an HTML document
- 'pdf': currently does not save to the HVSRData object itself, can only be saved to the disk directly
plot_type : str, default = 'HVSR p ann C+ p ann Spec p ann'
What type of plot to plot, if 'plot' part of report_formats input
azimuth : str, default = 'HV'
Which azimuth to plot, by default "HV" which is the main "azimuth" combining the E and N components
csv_handling : str, {'append', 'overwrite', 'keep/rename'}
How to handle table report outputs if the designated csv output file already exists. By default, appends the new information to the end of the existing file.
suppress_report_outputs : bool, default=False
If True, only reads output to appropriate attribute of data class (ie, print does not print, only reads text into variable). If False, performs as normal.
report_export_format : list or str, default=['pdf']
A string or list of strings indicating which report formats should be exported to disk.
report_export_path : None, bool, or filepath, default = None
If None or False, does not export; if True, will export to same directory as the input_data parameter in the input_params() function.
Otherwise, it should be a string or path object indicating where to export results. May be a file or directory.
If a directory is specified, the filename will be "<site_name>_<acq_date>_<UTC start time>-<UTC end time>".
The extension/suffix defaults to png for report_formats="plot", csv for 'table', txt for 'print', html for 'html', and pdf for 'pdf.'
verbose : bool, default=True
Whether to print the results to terminal. This is the same output as report_formats='print', and will not repeat if that is already selected
Returns
-------
sprit.HVSRData
"""
orig_args = locals().copy() #Get the initial arguments
orig_args['report_formats'] = [str(f).lower() for f in orig_args['report_formats']]
update_msg = []
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_results.keys():
if 'get_report' in hvsr_results['processing_parameters'].keys():
for k, v in hvsr_results['processing_parameters']['get_report'].items():
defaultVDict = dict(zip(inspect.getfullargspec(get_report).args[1:],
inspect.getfullargspec(get_report).defaults))
defaultVDict['kwargs'] = {}
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
report_formats = orig_args['report_formats']
azimuth = orig_args['azimuth']
plot_type = orig_args['plot_type']
plot_engine = orig_args['plot_engine']
show_print_report = orig_args['show_print_report']
show_table_report = orig_args['show_table_report']
show_plot_report = orig_args['show_plot_report']
show_html_report = orig_args['show_html_report']
show_pdf_report = orig_args['show_pdf_report']
suppress_report_outputs = orig_args['suppress_report_outputs']
show_report_outputs = orig_args['show_report_outputs']
report_export_format = orig_args['report_export_format']
report_export_path = orig_args['report_export_path']
csv_handling = orig_args['csv_handling']
verbose = orig_args['verbose']
kwargs = orig_args['kwargs']
# Put Processing parameters in hvsr_results immediately (gets used later local function in get_report)
hvsr_results['processing_parameters']['get_report'] = {}
exclude_params_list = ['hvsr_results']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_results['processing_parameters']['get_report'][key] = value
if verbose:
print('\nGetting HVSR Report: get_report()')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key == 'params' or isinstance(value, (HVSRData, HVSRBatch)):
pass
else:
print('\t {}={}'.format(key, value))
print()
if update_msg != [] and verbose:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
if isinstance(hvsr_results, HVSRBatch):
if verbose:
print('\nGetting Reports: Running in batch mode')
print('\tUsing parameters:')
for key, value in orig_args.items():
print(f'\t {key}={value}')
print()
#If running batch, we'll loop through each site
for site_name in hvsr_results.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
individual_params = hvsr_results[site_name] #Get what would normally be the "params" variable for each site
args['hvsr_results'] = individual_params #reset the params parameter we originally read in to an individual site params
if hvsr_results[site_name]['processing_status']['overall_status']:
try:
hvsr_results[site_name] = __get_report_batch(**args) #Call another function, that lets us run this function again
except:
hvsr_results[site_name] = hvsr_results[site_name]
else:
hvsr_results[site_name] = hvsr_results[site_name]
combined_csvReport = pd.DataFrame()
for site_name in hvsr_results.keys():
if 'Table_Report' in hvsr_results[site_name].keys():
combined_csvReport = pd.concat([combined_csvReport, hvsr_results[site_name]['Table_Report']], ignore_index=True, join='inner')
if report_export_path is not None:
if report_export_path is True:
if pathlib.Path(hvsr_results['input_params']['input_data']) in sampleFileKeyMap.values():
csvExportPath = pathlib.Path(os.getcwd())
else:
csvExportPath = pathlib.Path(hvsr_results['input_params']['input_data']).parent
elif pathlib.Path(report_export_path).is_dir():
csvExportPath = report_export_path
elif pathlib.Path(report_export_path).is_file():
csvExportPath = report_export_path.parent
else:
csvExportPath = pathlib.Path(hvsr_results[site_name].input_data)
if csvExportPath.is_dir():
pass
else:
csvExportPath = csvExportPath.parent
combined_csvReport.to_csv(csvExportPath, index=False)
return hvsr_results
if suppress_report_outputs:
show_print_report = show_plot_report = show_table_report = show_html_report = show_pdf_report = False
elif show_report_outputs:
show_print_report = show_plot_report = show_table_report = show_html_report = show_pdf_report = True
#if 'BestPeak' in hvsr_results.keys() and 'PassList' in hvsr_results['BestPeak'].keys():
try:
curvTestsPassed = (hvsr_results['BestPeak'][azimuth]['PassList']['WinLen'] +
hvsr_results['BestPeak'][azimuth]['PassList']['SigCycles']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowCurveStD'])
curvePass = curvTestsPassed > 2
#Peak Pass?
peakTestsPassed = ( hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceLow'] +
hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceHi']+
hvsr_results['BestPeak'][azimuth]['PassList']['AmpClarity']+
hvsr_results['BestPeak'][azimuth]['PassList']['FreqStability']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Freq']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Amp'])
peakPass = peakTestsPassed >= 5
except Exception as e:
errMsg= 'No BestPeak identified. Check peak_freq_range or hvsr_band or try to remove bad noise windows using remove_noise() or change processing parameters in process_hvsr() or generate_psds(). Otherwise, data may not be usable for HVSR.'
print(errMsg)
print(e)
plotString_noBestPeak = 'HVSR t all C+ t SPEC'
hvsr_results['Plot_Report'] = plot_hvsr(hvsr_results, plot_type=plotString_noBestPeak, azimuth=azimuth, return_fig=True)
return hvsr_results
#raise RuntimeError('No BestPeak identified. Check peak_freq_range or hvsr_band or try to remove bad noise windows using remove_noise() or change processing parameters in process_hvsr() or generate_psds(). Otherwise, data may not be usable for HVSR.')
# Figure out which reports will be used, and format them correctly
if isinstance(report_formats, (list, tuple)):
report_formats = [str(rf).lower() for rf in report_formats]
else:
#We will use a loop later even if it's just one report type, so reformat to prepare for for loop
allList = [':', 'all']
if report_formats.lower() in allList:
report_formats = ['print', 'table', 'plot', 'html', 'pdf']
else:
report_formats = [str(report_formats).lower()]
# Format the export formats correctly
if isinstance(report_export_format, (list, tuple)):
pass
elif report_export_format is None:
pass
else:
# We will use list methods later even if it's just one report type, so reformat as list
allList = [':', 'all']
if report_export_format.lower() in allList:
report_export_format = ['print', 'table', 'plot', 'html', 'pdf']
else:
report_export_format = [report_export_format]
# Put print first to get results immediatley while plots and others are created
if 'print' in report_formats and report_formats[0] != 'print':
report_formats = ['table', 'plot', 'print', 'html', 'pdf']
report_formats.pop(report_formats.index('print'))
report_formats.insert(0, 'print')
for i, rep_form in enumerate(report_formats):
if isinstance(report_export_path, (list, tuple)):
if not isinstance(report_formats, (list, tuple)):
warnings.warn('report_export_path is a list/tuple and report_formats is not. This may result in unexpected behavior.')
if isinstance(report_formats, (list, tuple)) and isinstance(report_export_path, (list, tuple)) and len(report_formats) != len(report_export_path):
warnings.warn('report_export_path and report_formats are both lists or tuples, but they are not the same length. This may result in unexpected behavior.')
exp_path = report_export_path[i]
else:
exp_path = report_export_path
if report_export_format is None:
report_export_format = ''
# Print_Report
if rep_form == 'print':
verbose_print = verbose
if show_print_report:
verbose_print = True
# Generates print report and saves to hvsr_results["Print_Report"]
hsvr_results = _generate_print_report(hvsr_results,
azimuth = azimuth,
show_print_report = True, verbose=verbose_print)
if 'print' in report_export_format:
if exp_path is None:
print_exp_path = exp_path
else:
print_exp_path = pathlib.Path(exp_path).with_suffix('.txt')
export_report(hvsr_results, azimuth=azimuth,
report_export_format='print', report_export_path=print_exp_path,
show_report = False, # If report is to be shown, done in previous step
verbose = verbose_print)
# Table_Report
elif rep_form == 'table':
verbose_table = verbose
if show_table_report:
verbose_table = True
hsvr_results = _generate_table_report(hvsr_results,
azimuth=azimuth,
show_table_report=show_table_report,
verbose=verbose_table)
if 'table' in report_export_format:
if exp_path is None:
table_exp_path = exp_path
else:
table_exp_path = pathlib.Path(exp_path).with_suffix('.csv')
export_report(hvsr_results, azimuth=azimuth,
report_export_format='table', report_export_path=table_exp_path,
csv_handling=csv_handling,
show_report = False, # If report is to be shown, done in previous step
verbose = verbose_table)
# Plot_Report
elif rep_form == 'plot':
plot_hvsr_kwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(plot_hvsr).parameters.keys())}
if 'plot_type' in plot_hvsr_kwargs.keys():
plot_hvsr_kwargs.pop('plot_type')
if 'plot_engine' in plot_hvsr_kwargs.keys():
plot_hvsr_kwargs.pop('plot_engine')
fig = plot_hvsr(hvsr_results, plot_type=plot_type, azimuth=azimuth, plot_engine=plot_engine, show_plot=show_plot_report, return_fig=True)
expFigAx = fig
if 'plot' in report_export_format:
export_report(hvsr_results=hvsr_results, report_export_path=report_export_path, report_export_format='plot')
#hvsr_results['BestPeak'][azimuth]['Report']['Plot_Report'] = fig
hvsr_results['Plot_Report'] = fig
if show_plot_report:#'show_plot' in plot_hvsr_kwargs.keys() and plot_hvsr_kwargs['show_plot'] is False:
if not verbose:
if str(plot_engine).lower():
plt.show()
else:
fig.show()
else:
print('\nPlot of data report:')
if str(plot_engine).lower():
plt.show()
else:
fig.show()
else:
if verbose:
print("\n\tPlot of data report created and saved in ['Plot_Report'] attribute")
# HTML_Report
elif rep_form == 'html':
verbose_html = verbose
if verbose or show_html_report:
verbose_html = True
hvsr_results = _generate_html_report(hsvr_results, show_html_report=show_html_report, verbose=verbose_html)
if 'html' in report_export_format:
if exp_path is None:
html_exp_path = exp_path
else:
html_exp_path = pathlib.Path(exp_path).with_suffix('.html')
export_report(hvsr_results, azimuth=azimuth,
report_export_format='html', report_export_path=html_exp_path,
show_report = False, # If report is to be shown, done in previous step
verbose = verbose_html)
# PDF_Report
elif rep_form == 'pdf':
verbose_pdf = verbose
# Don't repeat html printing, etc. if already done
if 'html' in report_formats:
show_html_report = False
else:
show_html_report = show_html_report
if exp_path is None:
pdf_exp_path = exp_path
else:
pdf_exp_path = pathlib.Path(exp_path)
hvsr_results = _generate_pdf_report(hvsr_results, pdf_report_filepath=pdf_exp_path,
show_pdf_report=show_pdf_report, show_html_report=show_html_report, verbose=verbose_pdf)
return hvsr_results
# Import data
[docs]
def import_data(import_filepath, data_format='gzip', show_data=True):
"""Function to import .hvsr (or other extension) data exported using export_hvsr() function
Parameters
----------
import_filepath : str or path object
Filepath of file created using export_hvsr() function. This is usually a pickle file with a .hvsr extension
data_format : str, default='pickle'
Type of format data is in. Currently, only 'pickle' supported. Eventually, json or other type may be supported, by default 'pickle'.
Returns
-------
HVSRData or HVSRBatch object
"""
sample_list = ['sample', 'sampledata', 's']
if import_filepath in sample_list:
import_filepath = RESOURCE_DIR.joinpath(r'sample_data')
import_filepath = import_filepath.joinpath(r'SampleHVSRSite01.hvsr')
if data_format == 'pickle':
with open(import_filepath, 'rb') as f:
dataIN = pickle.load(f)
elif data_format.lower() == 'dataframe':
dataIN = pd.read_csv(import_filepath)
else:
try:
with gzip.open(import_filepath, 'rb') as f:
dataIN = pickle.loads(f.read())
except Exception as e:
with open(import_filepath, 'rb') as f:
dataIN = pickle.load(f)
if show_data:
print(dataIN)
return dataIN
# Import settings
[docs]
def import_settings(settings_import_path, settings_import_type='instrument', verbose=False):
"""Function to import settings, intended for use with settings saved to disk using export_settings
Parameters
----------
settings_import_path : pathlike object
Filepath to exported settings document
settings_import_type : str, optional
What type of settings to export (can be 'instrument' or 'all'), by default 'instrument'
verbose : bool, optional
Whether to print information to terminal, by default False
Returns
-------
dict
A dictionary containing the function names as keys of internal dictionaries,
with key:value pairs for each parameter name:value in that function.
"""
allList = ['all', ':', 'both', 'any']
if settings_import_type.lower() not in allList:
# if just a single settings dict is desired
with open(settings_import_path, 'r') as f:
settingsDict = json.load(f)
else:
# Either a directory or list
if isinstance(settings_import_path, (list, tuple)):
for setPath in settings_import_path:
pass
else:
settings_import_path = sprit_utils._checkifpath(settings_import_path)
if not settings_import_path.is_dir():
raise RuntimeError(f'settings_import_type={settings_import_type}, but settings_import_path is not list/tuple or filepath to directory')
else:
instFile = settings_import_path.glob('*.inst')
procFile = settings_import_path.glob('*.proc')
return settingsDict
# Define input parameters
# Plot Azimuth data
[docs]
def plot_azimuth(hvsr_data, fig=None, ax=None, show_azimuth_peaks=False, interpolate_azimuths=True, show_azimuth_grid=False, show_plot=True, **plot_azimuth_kwargs):
"""Function to plot azimuths when azimuths are calculated
Parameters
----------
hvsr_data : HVSRData or HVSRBatch
HVSRData that has gone through at least the sprit.fetch_data() step, and before sprit.generate_psds()
show_azimuth_peaks : bool, optional
Whether to display the peak value at each azimuth calculated on the chart, by default False
interpolate_azimuths : bool, optional
Whether to interpolate the azimuth data to get a smoother plot.
This is just for visualization, does not change underlying data.
It takes a lot of time to process the data, but interpolation for vizualization can happen fairly fast. By default True.
show_azimuth_grid : bool, optional
Whether to display the grid on the chart, by default False
Returns
-------
matplotlib.Figure, matplotlib.Axis
Figure and axis of resulting azimuth plot
"""
orig_args = locals().copy() #Get the initial arguments
if isinstance(hvsr_data, HVSRBatch):
#If running batch, we'll loop through each site
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
individual_params = hvsr_data[site_name] #Get what would normally be the "params" variable for each site
args['hvsr_data'] = individual_params #reset the params parameter we originally read in to an individual site params
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_data['Azimuth_Fig'] = __plot_azimuth_batch(**args) #Call another function, that lets us run this function again
except:
print(f"ERROR: {site_name} will not have azimuths plotted.")
elif isinstance(hvsr_data, HVSRData):
if fig is None:
fig = plt.figure()
hvsr_band = hvsr_data.hvsr_band
azDataList = []
azExtraDataList = []
for k in sorted(hvsr_data.hvsr_az.keys()):
currData = hvsr_data.hvsr_az[k]
azDataList.append(currData)
azExtraDataList.append(currData)
freq = hvsr_data.x_freqs['Z'].tolist()[1:]
a = np.deg2rad(np.array(sorted(hvsr_data.hvsr_az.keys())).astype(float))
b = a + np.pi
z = np.array(azDataList)
z2 =np.array(azExtraDataList)
def interp_along_theta(orig_array, orig_ind):
newArrayList = []
for a1 in orig_array.T:
# Resample the array along the first dimension using numpy.interp
newZ = np.interp(
np.linspace(np.pi/180, np.pi, 180), # New indices
orig_ind, # Original indices
a1)
newArrayList.append(newZ)
return np.array(newArrayList).T
if 'plot_type' in plot_azimuth_kwargs.keys():
if 'i' in plot_azimuth_kwargs['plot_type']:
interpolate_azimuths = True
if '-i' in plot_azimuth_kwargs['plot_type']:
interpolate_azimuths = False
if interpolate_azimuths:
z = interp_along_theta(z, a)
z2 = interp_along_theta(z2, a)
a = np.linspace(np.deg2rad(1), np.pi, 180)
b = (a + np.pi).tolist()
a = a.tolist()
r, th = np.meshgrid(freq, a)
r2, th2 = np.meshgrid(freq, b)
# Set up plot
if ax is None:
ax = plt.subplot(polar=True)
plt.title(hvsr_data['site'])
else:
plt.sca(ax)
plt.semilogy()
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
plt.xlim([0, np.pi*2])
plt.ylim([hvsr_band[1], hvsr_band[0]])
# Plot data
pmesh1 = plt.pcolormesh(th, r, z, cmap = 'jet')
pmesh2 = plt.pcolormesh(th2, r2, z2, cmap = 'jet')
azList = ['azimuth', 'az', 'a', 'radial', 'r']
azOpts = []
if 'plot_type' in plot_azimuth_kwargs.keys():
if type(plot_azimuth_kwargs['plot_type']) is str:
ptList = plot_azimuth_kwargs['plot_type'].split(' ')
elif isinstance(plot_azimuth_kwargs['plot_type'], (list, tuple)):
ptList = list(plot_azimuth_kwargs['plot_type'])
for az in azList:
if az in ptList:
azOpts = [item.lower() for item in ptList[ptList.index(az)+1:]]
if 'p' in azOpts:
show_azimuth_peaks = True
if 'g' in azOpts:
show_azimuth_grid = True
if show_azimuth_peaks:
peakVals = []
peakThetas = []
for k in sorted(hvsr_data.hvsr_az.keys()):
peakVals.append(hvsr_data.BestPeak[k]['f0'])
peakThetas.append(int(k))
peakThetas = peakThetas + (180 + np.array(peakThetas)).tolist()
peakThetas = np.deg2rad(peakThetas).tolist()
peakVals = peakVals + peakVals
peakVals.append(peakVals[0])
peakThetas.append(peakThetas[0]+(np.pi*2))
peakThetas.append(peakThetas[1]+(np.pi*2))
peakThetas = (np.convolve(peakThetas, np.ones(2), 'full')/2).tolist()[1:-1]
newThetas = []
newVals = []
for i, p in enumerate(peakThetas):
newThetas.append(p)
newThetas.append(p)
if i == 0:
newVals.append(peakVals[-1])
newVals.append(peakVals[-1])
else:
newVals.append(peakVals[i])
newVals.append(peakVals[i])
newThetas.insert(0, newThetas[-1])
newThetas.pop()
newVals.append(newVals[0])
newThetas.append(newThetas[0])
#peakThetas = newThetas
#peakVals = newVals
if len(peakThetas) >= 20:
alphaVal = 0.2
else:
alphaVal = 0.9 - (19/28)
plt.scatter(peakThetas, peakVals, marker='h', facecolors='none', edgecolors='k', alpha=alphaVal)
#plt.plot(a, r, ls='none', color = 'k')
if show_azimuth_grid:
plt.grid(visible=show_azimuth_grid, which='both', alpha=0.5)
plt.grid(visible=show_azimuth_grid, which='major', c='k', linewidth=1, alpha=1)
#plt.colorbar(pmesh1)
if show_plot:
plt.show()
hvsr_data['AzimuthFig'] = fig
else:
warnings.warn(f'hvsr_data must be of type HVSRData or HVSRBatch, not {type(hvsr_data)}')
return fig, ax
# Main function for plotting results
[docs]
def plot_hvsr(hvsr_data, plot_type=DEFAULT_PLOT_STR, azimuth='HV', use_subplots=True, fig=None, ax=None, return_fig=False, plot_engine='matplotlib', save_dir=None, save_suffix='', show_legend=False, show_plot=True, close_figs=False, clear_fig=True,**kwargs):
"""Function to plot HVSR data
Parameters
----------
hvsr_data : dict
Dictionary containing output from process_hvsr function
plot_type : str or list, default = 'HVSR ann p C+ ann p SPEC ann p'
The plot_type of plot(s) to plot. If list, will plot all plots listed
- 'HVSR' - Standard HVSR plot, including standard deviation. Options are included below:
- 'p' shows a vertical dotted line at frequency of the "best" peak
- 'ann' annotates the frequency value of of the "best" peak
- 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified)
- 't' shows the H/V curve for all time windows
- 'tp' shows all the peaks from the H/V curves of all the time windows
- 'fr' shows the window within which SpRIT will search for peak frequencies, as set by peak_freq_range
- 'test' shows a visualization of the results of the peak validity test(s). Examples:
- 'tests' visualizes the results of all the peak tests (not the curve tests)
- 'test12' shows the results of tests 1 and 2.
- Append any number 1-6 after 'test' to show a specific test result visualized
- 'COMP' - plot of the PPSD curves for each individual component ("C" also works)
- '+' (as a suffix in 'C+' or 'COMP+') plots C on a plot separate from HVSR (C+ is default, but without + will plot on the same plot as HVSR)
- 'p' shows a vertical dotted line at frequency of the "best" peak
- 'ann' annotates the frequency value of of the "best" peak
- 'all' shows all the peaks identified in check_peaks() (by default, only the max is identified)
- 't' shows the H/V curve for all time windows
- 'SPEC' - spectrogram style plot of the H/V curve over time
- 'p' shows a horizontal dotted line at the frequency of the "best" peak
- 'ann' annotates the frequency value of the "best" peak
- 'all' shows all the peaks identified in check_peaks()
- 'tp' shows all the peaks of the H/V curve at all time windows
- 'AZ' - circular plot of calculated azimuthal HV curves, similar in style to SPEC plot.
- 'p' shows a point at each calculated (not interpolated) azimuth peak
- 'g' shows grid lines at various angles
- 'i' interpolates so that there is an interpolated azimuth at each degree interval (1 degree step)
This is the default, so usually 'i' is not needed.
- '-i' prohibits interpolation (only shows the calculated azimuths, as determined by azimuth_angle (default = 30))
azimuth : str, default = 'HV'
What 'azimuth' to plot, default being standard N E components combined
use_subplots : bool, default = True
Whether to output the plots as subplots (True) or as separate plots (False)
fig : matplotlib.Figure, default = None
If not None, matplotlib figure on which plot is plotted
ax : matplotlib.Axis, default = None
If not None, matplotlib axis on which plot is plotted
return_fig : bool
Whether to return figure and axis objects
plot_engine : str, default='Matplotlib'
Which engine to use for plotting. Both "matplotlib" and "plotly" are acceptable. For shorthand, 'mpl', 'm' also work for matplotlib; 'plty' or 'p' also work for plotly. Not case sensitive.
save_dir : str or None
Directory in which to save figures
save_suffix : str
Suffix to add to end of figure filename(s), if save_dir is used
show_legend : bool, default=False
Whether to show legend in plot
show_plot : bool
Whether to show plot
close_figs : bool, default=False
Whether to close figures before plotting
clear_fig : bool, default=True
Whether to clear figures before plotting
**kwargs : keyword arguments
Keyword arguments for matplotlib.pyplot
Returns
-------
fig, ax : matplotlib figure and axis objects
Returns figure and axis matplotlib.pyplot objects if return_fig=True, otherwise, simply plots the figures
"""
orig_args = locals().copy() #Get the initial arguments
if isinstance(hvsr_data, HVSRBatch):
#If running batch, we'll loop through each site
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
individual_params = hvsr_data[site_name] #Get what would normally be the "params" variable for each site
args['hvsr_results'] = individual_params #reset the params parameter we originally read in to an individual site params
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
__hvsr_plot_batch(**args) #Call another function, that lets us run this function again
except:
print(f"{site_name} not able to be plotted.")
return
mplList = ['matplotlib', 'mpl', 'm']
plotlyList = ['plotly', 'plty', 'p']
if plot_engine.lower() in plotlyList:
plotlyFigure = sprit_plot.plot_results_plotly(hvsr_data, plot_string=plot_type, azimuth=azimuth,
results_fig=fig, return_fig=return_fig, use_figure_widget=False,
show_results_plot=show_plot)
if return_fig:
return plotlyFigure
else: #plot_engine.lower() in mplList or any other value not in plotly list
if clear_fig and fig is not None and ax is not None: #Intended use for tkinter
#Clear everything
for key in ax:
ax[key].clear()
for t in fig.texts:
del t
fig.clear()
if close_figs:
plt.close('all')
# The possible identifiers in plot_type for the different kind of plots
hvsrList = ['hvsr', 'hv', 'h']
compList = ['c', 'comp', 'component', 'components']
specgramList = ['spec', 'specgram', 'spectrogram']
azList = ['azimuth', 'az', 'a', 'radial', 'r']
hvsrInd = np.nan
compInd = np.nan
specInd = np.nan
azInd = np.nan
plot_type = plot_type.replace(',', '')
kList = plot_type.split(' ')
for i, k in enumerate(kList):
kList[i] = k.lower()
# Get the plots in the right order, no matter how they were input (and ensure the right options go with the right plot)
# HVSR index
if len(set(hvsrList).intersection(kList)):
for i, hv in enumerate(hvsrList):
if hv in kList:
hvsrInd = kList.index(hv)
break
# Component index
#if len(set(compList).intersection(kList)):
for i, c in enumerate(kList):
if '+' in c and c[:-1] in compList:
compInd = kList.index(c)
break
# Specgram index
if len(set(specgramList).intersection(kList)):
for i, sp in enumerate(specgramList):
if sp in kList:
specInd = kList.index(sp)
break
# Azimuth index
if len(set(azList).intersection(kList)):
for i, sp in enumerate(azList):
if sp in kList:
azInd = kList.index(sp)
break
# Get indices for all plot type indicators
indList = [hvsrInd, compInd, specInd, azInd]
indListCopy = indList.copy()
plotTypeList = ['hvsr', 'comp', 'spec', 'az']
plotTypeOrder = []
plotIndOrder = []
# Get lists with first and last indices of the specifiers for each plot
lastVal = 0
while lastVal != 99:
firstInd = np.nanargmin(indListCopy)
plotTypeOrder.append(plotTypeList[firstInd])
plotIndOrder.append(indList[firstInd])
lastVal = indListCopy[firstInd]
indListCopy[firstInd] = 99 #just a high number
plotTypeOrder.pop()
plotIndOrder[-1] = len(kList)
# set up subplots
figLayout = 'constrained'
figWidth = 6
figHeight = 4
figdpi = 220
for i, p in enumerate(plotTypeOrder):
pStartInd = plotIndOrder[i]
pEndInd = plotIndOrder[i+1]
plotComponents = kList[pStartInd:pEndInd]
if use_subplots and i == 0 and fig is None and ax is None:
mosaicPlots = []
for pto in plotTypeOrder:
if pto == 'az':
for i, subp in enumerate(mosaicPlots):
if (subp[0].lower() == 'hvsr' or subp[0].lower() == 'comp') and len([item for item in plotTypeOrder if item != "hvsr"]) > 0:
mosaicPlots[i].append(subp[0])
mosaicPlots[i].append(subp[0])
else:
mosaicPlots[i].append(subp[0])
mosaicPlots[i].append(pto)
else:
mosaicPlots.append([pto])
perSubPDict = {}
if 'az' in plotTypeOrder:
perSubPDict['az'] = {'projection':'polar'}
fig, ax = plt.subplot_mosaic(mosaicPlots, per_subplot_kw=perSubPDict,
layout=figLayout, figsize=(figWidth, figHeight), dpi=figdpi)
axis = ax[p]
elif use_subplots:
with warnings.catch_warnings():
warnings.simplefilter("ignore") #Often warns about xlim when it is not an issue
if hasattr(ax, '__len__'):#print(dir(ax), ax, len(ax))
ax[p].clear()
axis = ax[p]
else:
fig, axis = plt.subplots(figsize=(figWidth, figHeight), dpi=figdpi)
if p == 'hvsr':
kwargs['subplot'] = p
fig, ax[p] = _plot_hvsr(hvsr_data, fig=fig, ax=axis, plot_type=plotComponents, azimuth=azimuth, xtype='x_freqs', show_legend=show_legend, axes=ax, **kwargs)
elif p == 'comp':
plotComponents[0] = plotComponents[0][:-1]
kwargs['subplot'] = p
minY = 99999 # Start high
maxY = -99999 # Start low
for key in hvsr_data.psd_raw.keys():
if min(hvsr_data.ppsd_std_vals_m[key]) < minY:
minY = min(hvsr_data.ppsd_std_vals_m[key])
if max(hvsr_data.ppsd_std_vals_m[key]) > maxY:
maxY = max(hvsr_data.ppsd_std_vals_m[key])
yRange = maxY - minY
compYlim = [float(minY - (yRange*0.05)), float(maxY + (yRange * 0.05))]
compYlim.reverse()
compKwargs = {'ylim':compYlim}
compKwargs.update(kwargs)
fig, ax[p] = _plot_hvsr(hvsr_data, fig=fig, ax=axis, plot_type=plotComponents, azimuth=azimuth, xtype='x_freqs', show_legend=show_legend, axes=ax, **kwargs)
elif p == 'spec':
plottypeKwargs = {}
for c in plotComponents:
plottypeKwargs[c] = True
kwargs.update(plottypeKwargs)
_plot_specgram_hvsr(hvsr_data, fig=fig, ax=axis, azimuth=azimuth, colorbar=False, **kwargs)
elif p == 'az':
kwargs['plot_type'] = plotComponents
hvsr_data['Azimuth_fig'] = plot_azimuth(hvsr_data, fig=fig, ax=axis, **kwargs)
else:
warnings.warn('Plot type {p} not recognized', UserWarning)
windowsUsedStr = f"{hvsr_data['hvsr_windows_df']['Use'].astype(bool).sum()}/{hvsr_data['hvsr_windows_df'].shape[0]} windows used"
winText = fig.text(x=1, y=0.0, s=windowsUsedStr, ha='right', va='bottom', fontsize='xx-small',
bbox=dict(facecolor='w', edgecolor=None, linewidth=0, alpha=1, pad=-1))
winText.set_in_layout(False)
if len(plotTypeOrder)>1:
matplotlib.rcParams["figure.constrained_layout.h_pad"] = 0.075
#if use_subplots:
# fig.subplots_adjust()#.set(h_pad=0.075, hspace=-5)
if show_plot:
fig.canvas.draw()
if return_fig:
return fig
return
# Main function for processing HVSR Curve
[docs]
def process_hvsr(hvsr_data, horizontal_method=None, smooth=True, freq_smooth='konno ohmachi',
f_smooth_width=40, resample=True,
outlier_curve_percentile_threshold=False, azimuth=None, verbose=False):
"""Process the input data and get HVSR data
This is the main function that uses other (private) functions to do
the bulk of processing of the HVSR data and the data quality checks.
Parameters
----------
hvsr_data : HVSRData or HVSRBatch
Data object containing all the parameters input and generated by the user (usually, during sprit.input_params(), sprit.fetch_data(), sprit.generate_psds() and/or sprit.remove_noise()).
horizontal_method : int or str, default=3
Method to use for combining the horizontal components. Default is 3) Geometric Mean
0) (not used)
1) 'Diffuse field assumption' H = √( (eie_E + eie_N) / eie_Z), eie = equal interval energy
2) 'Arithmetic Mean' H ≡ (HN + HE)/2
3) 'Geometric Mean' H ≡ √(HN · HE), recommended by the SESAME project (2004)
4) 'Vector Summation' H ≡ √(HN^2 + HE^2)
5) 'Quadratic Mean' H ≡ √(HN^2 + HE^2)/2
6) 'Maximum Horizontal Value' H ≡ max {HN, HE}
7) 'Minimum Horizontal Valey' H ≡ min {HN, HE}
8) 'Single Azimuth' H = H2·cos(az) + H1·sin(az)
smooth : bool, default=True
bool or int may be used.
If True, default to smooth H/V curve to using savgoy filter with window length of 51 (works well with default resample of 1000 pts)
If int, the length of the window in the savgoy filter.
freq_smooth : str {'konno ohmachi', 'constant', 'proportional'}
Which frequency smoothing method to use. By default, uses the 'konno ohmachi' method.
- The Konno & Ohmachi method uses the obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing() function: https://docs.obspy.org/packages/autogen/obspy.signal.konnoohmachismoothing.konno_ohmachi_smoothing.html
- The constant method uses a window of constant length f_smooth_width
- The proportional method uses a window the percentage length of the frequncy steps/range (f_smooth_width now refers to percentage)
See here for more information: https://www.geopsy.org/documentation/geopsy/hv-processing.html
f_smooth_width : int, default = 40
- For 'konno ohmachi': passed directly to the bandwidth parameter of the konno_ohmachi_smoothing() function, determines the width of the smoothing peak, with lower values resulting in broader peak. Must be > 0.
- For 'constant': the size of a triangular smoothing window in the number of frequency steps
- For 'proportional': the size of a triangular smoothing window in percentage of the number of frequency steps (e.g., if 1000 frequency steps/bins and f_smooth_width=40, window would be 400 steps wide)
resample : bool, default = True
bool or int.
If True, default to resample H/V data to include 1000 frequency values for the rest of the analysis
If int, the number of data points to interpolate/resample/smooth the component psd/HV curve data to.
outlier_curve_percentile_threshold : bool, float, default = False
If False, outlier curve removal is not carried out here.
If True, defaults to 98 (98th percentile).
Otherwise, float of percentile used as outlier_threshold of remove_outlier_curve().
azimuth : float, default = None
The azimuth angle to use when method is single azimuth.
verbose : bool, defualt=False
Whether to print output to terminal
Returns
-------
hvsr_out : dict
Dictionary containing all the information about the data, including input parameters
"""
orig_args = locals().copy() #Get the initial arguments
start_time = datetime.datetime.now()
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'process_hvsr' in hvsr_data['processing_parameters'].keys():
update_msg = []
for k, v in hvsr_data['processing_parameters']['process_hvsr'].items():
defaultVDict = dict(zip(inspect.getfullargspec(process_hvsr).args[1:],
inspect.getfullargspec(process_hvsr).defaults))
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
horizontal_method = orig_args['horizontal_method']
smooth = orig_args['smooth']
freq_smooth = orig_args['freq_smooth']
f_smooth_width = orig_args['f_smooth_width']
resample = orig_args['resample']
outlier_curve_percentile_threshold = orig_args['outlier_curve_percentile_threshold']
verbose = orig_args['verbose']
if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']):
if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']:
pass
else:
print('\nCalculating Horizontal/Vertical Ratios at all frequencies/time steps (process_hvsr())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key=='hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in hvsr_data.keys() and 'process_hvsr' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
# PROCESSING STARTS HERE (SEPARATE LOOP FOR BATCH)
if isinstance(hvsr_data, HVSRBatch):
#If running batch, we'll loop through each site
hvsr_out = {}
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_out[site_name] = __process_hvsr_batch(**args) #Call another function, that lets us run this function again
except:
hvsr_out = hvsr_data
hvsr_out[site_name]['processing_status']['process_hvsr_status']=False
hvsr_out[site_name]['processing_status']['overall_status'] = False
else:
hvsr_out = hvsr_data
hvsr_out[site_name]['processing_status']['process_hvsr_status']=False
hvsr_out[site_name]['processing_status']['overall_status'] = False
hvsr_out = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df)
hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_out
psds = hvsr_data['psds'].copy()#[k]['psd_values']
psds = sprit_utils._check_xvalues(psds)
methodList = ['<placeholder_0>', # 0
'Diffuse Field Assumption', # 1
'Arithmetic Mean', # 2
'Geometric Mean', # 3
'Vector Summation', # 4
'Quadratic Mean', # 5
'Maximum Horizontal Value', # 6
'Minimum Horizontal Value', # 7
'Single Azimuth' ] # 8
x_freqs = {}
x_periods = {}
psdValsTAvg = {}
stDev = {}
stDevValsP = {}
stDevValsM = {}
psdRaw={}
currTimesUsed={}
hvsr_data['hvsr_windows_df']['Use'] = hvsr_data['hvsr_windows_df']['Use'].astype(bool)
hvsrDF = hvsr_data['hvsr_windows_df']
def move_avg(y, box_pts):
#box = np.ones(box_pts)/box_pts
box = np.hanning(box_pts)
y_smooth = np.convolve(y, box, mode='same') / sum(box)
return y_smooth
resampleList = ['period_bin_centers', 'period_bin_left_edges', 'period_bin_right_edges', 'period_xedges',
'psd_frequencies', 'psd_periods']
for k in psds.keys():
#for ppsdk, ppsdv in psds[k].items():
#print(ppsdk, isinstance(ppsdv, np.ndarray))
#input_ppsds = psds[k]['psd_values'] #original, not used anymore
input_ppsds = np.stack(hvsrDF['psd_values_'+k].values)
#currPPSDs = hvsrDF['psd_values_'+k][hvsrDF['Use']].values
#used_ppsds = np.stack(currPPSDs)
xValMin_per = np.round(1/hvsr_data['hvsr_band'][1], 4)
xValMax_per = np.round(1/hvsr_data['hvsr_band'][0], 4)
# If resampling has been selected...
if resample is True or type(resample) is int or type(resample) is float:
if resample is True:
resample = 1000 #Default smooth value
# Resample period bin values
x_periods[k] = np.logspace(np.log10(xValMin_per), np.log10(xValMax_per), num=resample)
if smooth or isinstance(smooth, (int, float)):
if smooth:
smooth = 51 #Default smoothing window
padVal = 25
elif smooth % 2==0:
smooth + 1 #Otherwise, needs to be odd
padVal = smooth // 2
if padVal % 2 == 0:
padVal += 1
# Resample raw ppsd values
for i, ppsd_t in enumerate(input_ppsds):
if i==0:
psdRaw[k] = np.interp(x_periods[k], psds[k]['period_bin_centers'], ppsd_t)
if smooth is not False and smooth is not None:
padRawKPad = np.pad(psdRaw[k], [padVal, padVal], mode='reflect')
#padRawKPadSmooth = scipy.signal.savgol_filter(padRawKPad, smooth, 3)
padRawKPadSmooth = move_avg(padRawKPad, smooth)
psdRaw[k] = padRawKPadSmooth[padVal:-padVal]
else:
psdRaw[k] = np.vstack((psdRaw[k], np.interp(x_periods[k], psds[k]['period_bin_centers'], ppsd_t)))
if smooth is not False:
padRawKiPad = np.pad(psdRaw[k][i], [padVal, padVal], mode='reflect')
#padRawKiPadSmooth = scipy.signal.savgol_filter(padRawKiPad, smooth, 3)
padRawKiPadSmooth = move_avg(padRawKiPad, smooth)
psdRaw[k][i] = padRawKiPadSmooth[padVal:-padVal]
# Resample other values
for keys in resampleList:
if keys == 'period_bin_centers':
baseLength = len(psds[k][keys])
if psds[k][keys].ndim == 1:
if psds[k][keys].shape[-1] == baseLength:
psds[k][keys] = np.logspace(np.log10(min(psds[k][keys])), np.log10(max(psds[k][keys])), num=resample)
else:
psds[k][keys] = np.logspace(np.log10(min(psds[k][keys])), np.log10(max(psds[k][keys])), num=resample-1)
else:
arrList = []
for arr in psds[k][keys]:
arrList.append(np.logspace(np.log10(min(arr)), np.log10(max(arr)), num=resample))
psds[k][keys] = np.array(arrList)
else:
#If no resampling desired
x_periods[k] = np.array(psds[k]['period_bin_centers'])#[:-1]#np.round([1/p for p in hvsr_data['psds'][k]['period_xedges'][:-1]], 3)
# Clean up edge freq. values
x_periods[k][0] = 1/hvsr_data['hvsr_band'][1]
x_periods[k][-1] = 1/hvsr_data['hvsr_band'][0]
# If simple curve smooothing desired
if smooth or isinstance(smooth, (int, float)):
if smooth:
smooth = 51 #Default smoothing window
padVal = 25
elif smooth % 2==0:
smooth + 1 #Otherwise, needs to be odd
padVal = smooth // 2
if padVal % 2 == 0:
padVal += 1
for i, ppsd_t in enumerate(input_ppsds):
if i == 0:
psdRaw[k] = ppsd_t
padRawKPad = np.pad(psdRaw[k], [padVal, padVal], mode='reflect')
#padRawKPadSmooth = scipy.signal.savgol_filter(padRawKPad, smooth, 3)
padRawKPadSmooth = move_avg(padRawKPad, smooth)
psdRaw[k] = padRawKPadSmooth[padVal:-padVal]
else:
psdRaw[k] = np.vstack((psdRaw[k], ppsd_t))
padRawKiPad = np.pad(psdRaw[k][i], [padVal, padVal], mode='reflect')
#padRawKiPadSmooth = scipy.signal.savgol_filter(padRawKiPad, smooth, 3)
padRawKiPadSmooth = move_avg(padRawKiPad, smooth)
psdRaw[k][i] = padRawKiPadSmooth[padVal:-padVal]
else:
# If no simple curve smoothing
psdRaw[k] = np.array(input_ppsds)
hvsrDF['psd_values_'+k] = list(psdRaw[k])
use = hvsrDF['Use'].astype(bool)
#Get average psd value across time for each channel (used to calc main H/V curve)
psdValsTAvg[k] = np.nanmedian(np.stack(hvsrDF[use]['psd_values_'+k]), axis=0)
x_freqs[k] = np.array([1/p for p in x_periods[k]]) #np.divide(np.ones_like(x_periods[k]), x_periods[k])
stDev[k] = np.nanstd(np.stack(hvsrDF[use]['psd_values_'+k]), axis=0)
stDevValsM[k] = np.array(psdValsTAvg[k] - stDev[k])
stDevValsP[k] = np.array(psdValsTAvg[k] + stDev[k])
currTimesUsed[k] = np.stack(hvsrDF[use]['TimesProcessed_Obspy'])
#currTimesUsed[k] = psds[k]['current_times_used'] #original one
#print('XFREQS', x_freqs[k].shape)
#print('XPERs', x_periods[k].shape)
#print('PSDRAW', psdRaw[k].shape)
# Get string of horizontal_method type
# First, define default
if horizontal_method is None:
horizontal_method = 3 # Geometric mean is used as default if nothing is specified
# If an azimuth has been calculated and it's only one, automatically use the single azimuth method
if len(hvsr_data.stream.merge().select(component='R')) == 1:
horizontal_method = 8 # Single azimuth
# horizontal_method needs to be str or int
# First check if input is a string
if type(horizontal_method) is str:
if horizontal_method.isdigit():
horizontal_method = int(horizontal_method)
elif str(horizontal_method).title() in methodList:
horizontal_method = methodList.index(horizontal_method.title())
else:
print(f"\tHorizontal method {f} not recognized, reverting to default (geometric mean).\n\tMust be one of {methodList}")
horizontal_method = 3
# Now, horizontal_method is int no matter how it was entered
methodInt = horizontal_method
horizontal_method = methodList[horizontal_method]
hvsr_data['horizontal_method'] = horizontal_method
#This gets the main hvsr curve averaged from all time steps
anyK = list(x_freqs.keys())[0]
hvsr_curve, hvsr_az, hvsr_tSteps = __get_hvsr_curve(x=x_freqs[anyK], psd=psdValsTAvg, horizontal_method=methodInt, hvsr_data=hvsr_data, azimuth=azimuth, verbose=verbose)
origPPSD = hvsr_data['ppsds_obspy'].copy()
#print('hvcurv', np.array(hvsr_curve).shape)
#print('hvaz', np.array(hvsr_az).shape)
#Add some other variables to our output dictionary
hvsr_dataUpdate = {'input_params':hvsr_data,
'x_freqs':x_freqs,
'hvsr_curve':hvsr_curve,
'hvsr_az':hvsr_az,
'x_period':x_periods,
'psd_raw':psdRaw,
'current_times_used': currTimesUsed,
'psd_values_tavg':psdValsTAvg,
'ppsd_std':stDev,
'ppsd_std_vals_m':stDevValsM,
'ppsd_std_vals_p':stDevValsP,
'horizontal_method':horizontal_method,
'psds':psds,
'ppsds_obspy':origPPSD,
'tsteps_used': hvsr_data['tsteps_used'].copy(),
'hvsr_windows_df':hvsr_data['hvsr_windows_df']
}
hvsr_out = HVSRData(hvsr_dataUpdate)
#This is if manual editing was used (should probably be updated at some point to just use masks)
if 'x_windows_out' in hvsr_data.keys():
hvsr_out['x_windows_out'] = hvsr_data['x_windows_out']
else:
hvsr_out['x_windows_out'] = []
freq_smooth_ko = ['konno ohmachi', 'konno-ohmachi', 'konnoohmachi', 'konnohmachi', 'ko', 'k']
freq_smooth_constant = ['constant', 'const', 'c']
freq_smooth_proport = ['proportional', 'proportion', 'prop', 'p']
#Frequency Smoothing
if not freq_smooth:
if verbose:
warnings.warn('No frequency smoothing is being applied. This is not recommended for noisy datasets.')
elif freq_smooth is True or (freq_smooth.lower() in freq_smooth_ko and (not not f_smooth_width and not not freq_smooth)):
from obspy.signal import konnoohmachismoothing
for k in hvsr_out['psd_raw']:
colName = f'psd_values_{k}'
psd_data = np.stack(hvsr_out['hvsr_windows_df'][colName])
psd_data = hvsr_out['psd_raw'][k]
freqs = hvsr_out['x_freqs'][k]
padding_length = int(f_smooth_width)
padding_value_R = np.nanmean(psd_data[:,-1*padding_length:])
padding_value_L = np.nanmean(psd_data[:,:padding_length])
# Pad the data to prevent boundary anamolies
padded_ppsd_data = np.pad(psd_data, ((0, 0), (padding_length, padding_length)),
'constant', constant_values=(padding_value_L, padding_value_R))
# Pad the frequencies
ratio = freqs[1] / freqs[0]
# Generate new elements on either side and combine
left_padding = [freqs[0] / (ratio ** i) for i in range(padding_length, 0, -1)]
right_padding = [freqs[-1] * (ratio ** i) for i in range(1, padding_length + 1)]
padded_freqs = np.concatenate([left_padding, freqs, right_padding])
#Filter out UserWarning for just this method, since it throws up a UserWarning that doesn't really matter about dtypes often
with warnings.catch_warnings():
#warnings.simplefilter('ignore', category=UserWarning)
padded_ppsd_data = padded_ppsd_data.astype(padded_freqs.dtype) # Make them the same datatype
padded_ppsd_data = np.round(padded_ppsd_data, 12) # Prevent overflows
padded_freqs = np.round(padded_freqs, 9)
smoothed_ppsd_data = konnoohmachismoothing.konno_ohmachi_smoothing(padded_ppsd_data, padded_freqs,
bandwidth=f_smooth_width, normalize=True)
# Only use the original, non-padded data
smoothed_ppsd_data = smoothed_ppsd_data[:,padding_length:-1*padding_length]
hvsr_out['psd_raw'][k] = smoothed_ppsd_data
hvsr_out['hvsr_windows_df'][colName] = pd.Series(list(smoothed_ppsd_data), index=hvsr_out['hvsr_windows_df'].index)
elif freq_smooth.lower() in freq_smooth_constant:
hvsr_out = __freq_smooth_window(hvsr_out, f_smooth_width, kind_freq_smooth='constant')
elif freq_smooth.lower() in freq_smooth_proport:
hvsr_out = __freq_smooth_window(hvsr_out, f_smooth_width, kind_freq_smooth='proportional')
else:
if verbose:
warnings.warn(f'You indicated no frequency smoothing should be applied (freq_smooth = {freq_smooth}). This is not recommended for noisy datasets.')
#Get hvsr curve from three components at each time step
anyK = list(hvsr_out['psd_raw'].keys())[0]
if horizontal_method==1 or horizontal_method =='dfa' or horizontal_method =='Diffuse Field Assumption':
hvsr_tSteps_az = {}
else:
hvsr_tSteps = []
hvsr_tSteps_az = {}
for tStep in range(len(hvsr_out['psd_raw'][anyK])):
tStepDict = {}
for k in hvsr_out['psd_raw']:
tStepDict[k] = hvsr_out['psd_raw'][k][tStep]
hvsr_tstep, hvsr_az_tstep, _ = __get_hvsr_curve(x=hvsr_out['x_freqs'][anyK], psd=tStepDict, horizontal_method=methodInt, hvsr_data=hvsr_out, verbose=verbose)
hvsr_tSteps.append(np.float64(hvsr_tstep)) #Add hvsr curve for each time step to larger list of arrays with hvsr_curves
for k, v in hvsr_az_tstep.items():
if tStep == 0:
hvsr_tSteps_az[k] = [np.float32(v)]
else:
hvsr_tSteps_az[k].append(np.float32(v))
hvsr_out['hvsr_windows_df']['HV_Curves'] = hvsr_tSteps
# Add azimuth HV Curves to hvsr_windows_df, if applicable
for key, values in hvsr_tSteps_az.items():
hvsr_out['hvsr_windows_df']['HV_Curves_'+key] = values
hvsr_out['ind_hvsr_curves'] = {}
for col_name in hvsr_out['hvsr_windows_df']:
if "HV_Curves" in col_name:
if col_name == 'HV_Curves':
colID = 'HV'
else:
colID = col_name.split('_')[2]
hvsr_out['ind_hvsr_curves'][colID] = np.stack(hvsr_out['hvsr_windows_df'][hvsr_out['hvsr_windows_df']['Use']][col_name])
#Initialize array based only on the curves we are currently using
indHVCurvesArr = np.stack(hvsr_out['hvsr_windows_df']['HV_Curves'][hvsr_out['hvsr_windows_df']['Use']])
if outlier_curve_percentile_threshold:
if outlier_curve_percentile_threshold is True:
outlier_curve_percentile_threshold = 98
hvsr_out = remove_outlier_curves(hvsr_out, use_percentile=True, outlier_threshold=outlier_curve_percentile_threshold, use_hv_curves=True, verbose=verbose)
hvsr_out['ind_hvsr_stdDev'] = {}
for col_name in hvsr_out['hvsr_windows_df'].columns:
if "HV_Curves" in col_name:
if col_name == 'HV_Curves':
keyID = 'HV'
else:
keyID = col_name.split('_')[2]
curr_indHVCurvesArr = np.stack(hvsr_out['hvsr_windows_df'][col_name][hvsr_out['hvsr_windows_df']['Use']])
hvsr_out['ind_hvsr_stdDev'][keyID] = np.nanstd(curr_indHVCurvesArr, axis=0)
#Get peaks for each time step
hvsr_out['ind_hvsr_peak_indices'] = {}
tStepPFDict = {}
#hvsr_out['hvsr_windows_df']['CurvesPeakFreqs'] = {}
for col_name in hvsr_out['hvsr_windows_df'].columns:
if col_name.startswith("HV_Curves"):
tStepPeaks = []
if len(col_name.split('_')) > 2:
colSuffix = "_"+'_'.join(col_name.split('_')[2:])
else:
colSuffix = '_HV'
for tStepHVSR in hvsr_out['hvsr_windows_df'][col_name]:
tStepPeaks.append(__find_peaks(tStepHVSR))
hvsr_out['ind_hvsr_peak_indices']['CurvesPeakIndices'+colSuffix] = tStepPeaks
tStepPFList = []
for tPeaks in tStepPeaks:
tStepPFs = []
for pInd in tPeaks:
tStepPFs.append(np.float32(hvsr_out['x_freqs'][anyK][pInd]))
tStepPFList.append(tStepPFs)
tStepPFDict['CurvesPeakFreqs'+colSuffix] = tStepPFList
indHVPeakIndsDF = pd.DataFrame(hvsr_out['ind_hvsr_peak_indices'], index=hvsr_out['hvsr_windows_df'].index)
tStepPFDictDF = pd.DataFrame(tStepPFDict, index=hvsr_out['hvsr_windows_df'].index)
for col in indHVPeakIndsDF.columns:
hvsr_out['hvsr_windows_df'][col] = indHVPeakIndsDF.loc[:, col]
for col in tStepPFDictDF.columns:
hvsr_out['hvsr_windows_df'][col] = tStepPFDictDF.loc[:, col]
#Get peaks of main HV curve
hvsr_out['hvsr_peak_indices'] = {}
hvsr_out['hvsr_peak_indices']['HV'] = __find_peaks(hvsr_out['hvsr_curve'])
for k in hvsr_az.keys():
hvsr_out['hvsr_peak_indices'][k] = __find_peaks(hvsr_out['hvsr_az'][k])
#Get frequency values at HV peaks in main curve
hvsr_out['hvsr_peak_freqs'] = {}
for k in hvsr_out['hvsr_peak_indices'].keys():
hvsrPF = []
for p in hvsr_out['hvsr_peak_indices'][k]:
hvsrPF.append(hvsr_out['x_freqs'][anyK][p])
hvsr_out['hvsr_peak_freqs'][k] = np.array(hvsrPF)
#Get other HVSR parameters (i.e., standard deviations, etc.)
hvsr_out = __gethvsrparams(hvsr_out)
#Include the original obspy stream in the output
hvsr_out['input_stream'] = hvsr_dataUpdate['input_params']['input_stream'] #input_stream
hvsr_out = sprit_utils._make_it_classy(hvsr_out)
hvsr_out['processing_status']['process_hvsr_status'] = True
if 'processing_parameters' not in hvsr_out.keys():
hvsr_out['processing_parameters'] = {}
hvsr_out['processing_parameters']['process_hvsr'] = {}
exclude_params_list = ['hvsr_data']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_out['processing_parameters']['process_hvsr'][key] = value
if str(horizontal_method) == '8' or horizontal_method.lower() == 'single azimuth':
if azimuth is None:
azimuth = 90
hvsr_out['single_azimuth'] = azimuth
hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_out
# Read data from Tromino
[docs]
def read_tromino_files(input_data, struct_format='H', tromino_model=None, diagnose=False,
sampling_rate=None, set_record_duration=None, start_byte=24576, verbose=False, **kwargs):
"""Function to read data from tromino. Specifically, this has been lightly tested on Tromino 3G+ and Blue machines
Parameters
----------
input_data : str
Falseilepath to .trc file
struct_format : str, optional
This is the format used in the struct module.
Usually should not be changed, by default 'H'
tromino_model : str, optional
Which tromino model is being read.
Currently only "Yellow" and "Blue" are supported.
If None, assumes "Yellow", by default None.
sampling_rate : int, optional
Sampling rate (samples per second) used during acquisition.
This may later be detected automatically.
If None, 128 used, by default None
set_record_duration : int, optional
Duration of record to set manually in minutes, by default None
start_byte : int, optional
Used internally, by default 24576
verbose : bool, optional
Whether to print information to terminal, by default False
Returns
-------
obspy.stream.Stream
Obspy Stream object with Tromino data
"""
blueModelList = ['blue', 'blu', 'tromino blu', 'tromino blue']
# Check if input_data is HVSRData object and extract filepath if so
input_filepath = input_data
if isinstance(input_data, HVSRData):
input_filepath = input_data['input_data']
# Allow reading of tromino partition folders (and get the .trc file inside), not just .trc file
if pathlib.Path(input_filepath).is_dir():
trDirGlob = pathlib.Path(input_filepath).glob('*trc')
for trcFile in trDirGlob:
input_data = trcFile
if verbose:
print(f'\t Input file updated to {pathlib.Path(input_filepath).name} in specified directory.')
if str(tromino_model).lower() in blueModelList or 'blue' in str(tromino_model).lower():
tBlueKwargs = {k: v for k, v in kwargs.items() if k in tuple(inspect.signature(__read_tromino_data_blue).parameters.keys())}
if 'sampling_rate' not in tBlueKwargs:
tBlueKwargs['sampling_rate'] = sampling_rate
return __read_tromino_data_blue(input_filepath, verbose=False, **tBlueKwargs)
else:
return __read_tromino_data_yellow(input_data=input_data, sampling_rate=sampling_rate,
struct_format=struct_format, tromino_model="3G+",diagnose=diagnose,
set_record_duration=set_record_duration, start_byte=start_byte,
return_dict=False, verbose=verbose, **kwargs)
# Function to remove noise windows from data
[docs]
def remove_noise(hvsr_data, remove_method=None,
processing_window=None, sat_percent=0.995, noise_percent=0.80,
sta=2, lta=30, stalta_thresh=[8, 16],
std_ratio_thresh=2.0, std_window_size=20.0, min_std_win=5.0,
warmup_time=0, cooldown_time=0, min_win_size=1,
remove_raw_noise=False, show_stalta_plot=False, verbose=False):
"""Function to remove noisy windows from data, using various methods.
Methods include
- Manual window selection (by clicking on a chart with spectrogram and stream data),
- Auto window selection, which does the following two in sequence (these can also be done indepently):
- A sta/lta "antitrigger" method (using stalta values to automatically remove triggered windows where there appears to be too much noise)
- A noise threshold method, that cuts off all times where the noise threshold equals more than (by default) 80% of the highest amplitude noise sample for the length specified by lta (in seconds)
- A saturation threshold method, that cuts off all times where the noise threshold equals more than (by default) 99.5% of the highest amplitude noise sample.
Parameters
----------
hvsr_data : dict, obspy.Stream, or obspy.Trace
Dictionary containing all the data and parameters for the HVSR analysis
remove_method : str, {'auto', 'manual', 'stalta'/'antitrigger', 'saturation threshold', 'noise threshold', 'warmup'/'cooldown'/'buffer'/'warm_cool'}
The different methods for removing noise from the dataset. A list of strings will also work, in which case, it should be a list of the above strings. See descriptions above for what how each method works. By default 'auto.'
If remove_method='auto', this is the equivalent of remove_method=['noise threshold', 'antitrigger', 'saturation threshold', 'warm_cool']
processing_window : list, tuple, or None
A list/tuple of two items [s, e] or a list/tuple of two-item lists/tuples [[s0, e0], [s1,e1],...[sn, en]] with start and end time(s) for windows to *keep* for processing.
Data outside of these times will be excluded from processing.
Times should be obspy.UTCDateTime objects to ensure precision, but time strings ("13:05") will also work in most cases (excpetions may be when the data stream starts/ends on different UTC days)
sat_percent : float, default=0.995
Percentage (between 0 and 1), to use as the threshold at which to remove data. This is used in the saturation method. By default 0.995.
If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage.
noise_percent : float, default = 0.8
Percentage (between 0 and 1), to use as the threshold at which to remove data, if it persists for longer than time (in seconds (specified by min_win_size)). This is used in the noise threshold method. By default 0.8.
If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage.
sta : int, optional
Short term average (STA) window (in seconds), by default 2. For use with sta/lta antitrigger method.
lta : int, optional
Long term average (STA) window (in seconds), by default 30. For use with sta/lta antitrigger method.
stalta_thresh : list, default=[0.5,5]
Two-item list or tuple with the thresholds for the stalta antitrigger. The first value (index [0]) is the lower threshold, the second value (index [1] is the upper threshold), by default [0.5,5]
std_ratio_thresh : float, optional
The ratio to use as a threshold for removal of noise. The ratio represents the standard deviation value for a rolling window (the size of which is determined by the std_window_size parameter)
divided by the standard deviation calculated for the entire trace. This rolling window standard deviation method is similar to the default noise removal method used by the Grilla HVSR software.
std_window_size : float, optional
The length of the window (in seconds) to use for calculating the rolling/moving standard deviation of a trace for the rolling standard deviation method.
min_std_win : float, optional
The minimum size of "window" that will be remove using the rolling standard deviation method.
warmup_time : int, default=0
Time in seconds to allow for warmup of the instrument (or while operator is still near instrument). This will renove any data before this time, by default 0.
cooldown_time : int, default=0
Time in seconds to allow for cooldown of the instrument (or for when operator is nearing instrument). This will renove any data before this time, by default 0.
min_win_size : float, default=1
The minumum size a window must be over specified threshold (in seconds) for it to be removed
remove_raw_noise : bool, default=False
If remove_raw_noise=True, will perform operation on raw data ('input_stream'), rather than potentially already-modified data ('stream').
verbose : bool, default=False
Whether to print status of remove_noise
Returns
-------
output : dict
Dictionary similar to hvsr_data, but containing modified data with 'noise' removed
"""
#Get intput paramaters
orig_args = locals().copy()
start_time = datetime.datetime.now()
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'remove_noise' in hvsr_data['processing_parameters'].keys():
update_msg = []
for k, v in hvsr_data['processing_parameters']['remove_noise'].items():
defaultVDict = dict(zip(inspect.getfullargspec(remove_noise).args[1:],
inspect.getfullargspec(remove_noise).defaults))
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
remove_method = orig_args['remove_method']
processing_window = orig_args['processing_window']
sat_percent = orig_args['sat_percent']
noise_percent = orig_args['noise_percent']
sta = orig_args['sta']
lta = orig_args['lta']
stalta_thresh = orig_args['stalta_thresh']
warmup_time = orig_args['warmup_time']
cooldown_time = orig_args['cooldown_time']
min_win_size = orig_args['min_win_size']
remove_raw_noise = orig_args['remove_raw_noise']
verbose = orig_args['verbose']
if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']):
if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']:
pass
else:
print('\nRemoving noisy data windows (remove_noise())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key=='hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in hvsr_data.keys() and 'remove_noise' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
# Set up lists
manualList = ['manual', 'man', 'm', 'window', 'windows', 'w']
autoList = ['auto', 'automatic', 'all', 'a']
antitrigger = ['stalta', 'anti', 'antitrigger', 'trigger', 'at']
movingstdList = ['moving_std', 'std', 'stdev', 'standard deviation', 'stdev', 'moving_stdev', 'movingstd', 'movingstdev']
saturationThresh = ['saturation threshold', 'sat_thresh', 'sat thresh', 'saturation', 'sat', 's']
noiseThresh = ['noise threshold', 'noise thresh', 'noise_thresh', 'noise', 'threshold', 'n']
warmup_cooldown=['warmup', 'cooldown', 'warm', 'cool', 'buffer', 'warmup-cooldown', 'warmup_cooldown', 'wc', 'warm_cool', 'warm-cool']
procWinList = ['processing_window', 'processing window', 'windows', 'window', 'win', 'pw']
# Do batch runs
if isinstance(hvsr_data, HVSRBatch):
#If running batch, we'll loop through each site
hvsr_out = {}
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_out[site_name] = __remove_noise_batch(**args) #Call another function, that lets us run this function again
except Exception as e:
hvsr_out[site_name]['processing_status']['remove_noise_status']=False
hvsr_out[site_name]['processing_status']['overall_status']=False
if verbose:
print(e)
else:
hvsr_data[site_name]['processing_status']['remove_noise_status']=False
hvsr_data[site_name]['processing_status']['overall_status']=False
hvsr_out = hvsr_data
output = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df)
return output
if not isinstance(hvsr_data, (HVSRData, dict, obspy.Stream, obspy.Trace)):
warnings.warn(f"Input of type type(hvsr_data)={type(hvsr_data)} cannot be used.")
return hvsr_data
# Which stream to use (input, or current)
if isinstance(hvsr_data, (HVSRData, dict)):
if remove_raw_noise:
inStream = hvsr_data['input_stream'].copy()
else:
inStream = hvsr_data['stream'].copy()
output = hvsr_data#.copy()
else:
inStream = hvsr_data.copy()
output = inStream.copy()
outStream = inStream
# Get remove_method into consistent format (list)
if isinstance(remove_method, str):
if ',' in remove_method:
remove_method = remove_method.split(',')
else:
remove_method = [remove_method]
elif isinstance(remove_method, (list, tuple)):
pass
elif not remove_method:
remove_method=[None]
else:
warnings.warn(f"Input value remove_method={remove_method} must be either string, list of strings, None, or False. No noise removal will be carried out. Please choose one of the following: 'manual', 'auto', 'antitrigger', 'noise threshold', 'warmup_cooldown'.")
return output
orig_removeMeth = remove_method
# Check if any parameter values are different from default (if they are, automatically add that method to remove_method)
rn_signature = inspect.signature(remove_noise)
methodDict = {'moving_std': ['std_ratio_thresh', 'std_window_size', 'min_std_win'],
'sat_thresh': ['sat_percent'],
'antitrigger': ['sta', 'lta', 'stalta_thresh', 'show_stalta_plot'],
'noise_thresh': ['noise_percent', 'min_win_size'],
'warmup_cooldown': ['warmup_time', 'cooldown_time'],
'processing_window': ['processing_window']}
defaultValDict = {param.name: param.default for param in rn_signature.parameters.values() if param.default is not inspect.Parameter.empty}
# If a non-default parameter is specified, add the method it corresponds to to remove_method
for key, def_val in defaultValDict.items():
if key in orig_args:
if def_val != orig_args[key]:
for methodKey, methParamList in methodDict.items():
if key in methParamList:
# Add the corresponding method to remove_mehtod if not already
if (methodKey not in remove_method) and ('auto' not in remove_method):
if remove_method == [None]:
remove_method = [methodKey]
else:
remove_method.append(methodKey)
# Reorder list so manual is always first, if it is specified
do_manual = False
if len(set(remove_method).intersection(manualList)) > 0:
do_manual = True
manInd = list(set(remove_method).intersection(manualList))[0]
remove_method.remove(manInd)
remove_method.insert(0, manInd)
# Reorder list so auto is always first (if no manual) or second (if manual)
# B/c if 'auto' is carried out, no other methods need to be carried out (repetitive)
newAutoInd = 0
if do_manual:
newAutoInd = 1
if len(set(remove_method).intersection(autoList)) > 0:
autoInd = list(set(remove_method).intersection(autoList))[0]
remove_method.remove(autoInd)
remove_method.insert(newAutoInd, autoInd)
#Go through each type of removal and remove
if orig_removeMeth != remove_method:
if verbose:
print(f'\tThe remove_method parameter has been updated because non-default parameter values were detected.')
print(f'\tThe remove_method parameter was entered as {orig_removeMeth}, but has been updated to {remove_method}')
# REMOVE DATA FROM ANALYSIS
for rem_kind in remove_method:
try:
if not rem_kind:
break
elif rem_kind.lower() in manualList:
if isinstance(output, (HVSRData, dict)):
if 'x_windows_out' in output.keys():
pass
else:
output = _select_windows(output)
window_list = output['x_windows_out']
if isinstance(outStream, obspy.core.stream.Stream):
if window_list is not None:
output['stream'] = __remove_windows(inStream, window_list, warmup_time)
else:
output = _select_windows(output)
elif isinstance(output, (HVSRData, dict)):
pass
else:
RuntimeError("Only obspy.core.stream.Stream data type is currently supported for manual noise removal method.")
elif rem_kind.lower() in autoList:
outStream = __remove_moving_std(stream=outStream, std_ratio_thresh=std_ratio_thresh, std_window_s=std_window_size, min_win_size=min_std_win, verbose=verbose)
outStream = __remove_noise_saturate(outStream, sat_percent=sat_percent, min_win_size=min_win_size, verbose=verbose)
# Break for-loop, since all the rest are already done as part of auto
break
elif rem_kind.lower() in antitrigger:
outStream = __remove_anti_stalta(outStream, sta=sta, lta=lta, thresh=stalta_thresh, show_stalta_plot=show_stalta_plot, verbose=verbose)
elif rem_kind.lower() in movingstdList:
outStream = __remove_moving_std(stream=outStream, std_ratio_thresh=std_ratio_thresh, std_window_s=std_window_size, min_win_size=min_std_win, verbose=verbose)
elif rem_kind.lower() in saturationThresh:
outStream = __remove_noise_saturate(outStream, sat_percent=sat_percent, min_win_size=min_win_size, verbose=verbose)
elif rem_kind.lower() in noiseThresh:
outStream = __remove_noise_thresh(outStream, noise_percent=noise_percent, lta=lta, min_win_size=min_win_size, verbose=verbose)
elif rem_kind.lower() in warmup_cooldown:
outStream = __remove_warmup_cooldown(stream=outStream, warmup_time=warmup_time, cooldown_time=cooldown_time, verbose=verbose)
elif rem_kind.lower() in procWinList and str(processing_window).lower() != 'none':
outStream = _keep_processing_windows(stream=outStream, processing_window=processing_window, verbose=verbose)
else:
if len(remove_method)==1:
warnings.warn(f"Input value remove_method={remove_method} is not recognized. No noise removal will be carried out. Please choose one of the following: 'manual', 'auto', 'antitrigger', 'noise threshold', 'warmup_cooldown'.")
break
warnings.warn(f"Input value remove_method={remove_method} is not recognized. Continuing with other noise removal methods.")
except Exception as e:
print(f'\t *Error with {rem_kind} method. Data was not removed using that method.')
print(f'\t *{e}')
# Add output
if isinstance(output, (HVSRData, dict)):
if isinstance(outStream, (obspy.Stream, obspy.Trace)):
output['stream_edited'] = outStream
else:
output['stream_edited'] = outStream['stream']
output['input_stream'] = hvsr_data['input_stream']
if 'processing_parameters' not in output.keys():
output['processing_parameters'] = {}
output['processing_parameters']['remove_noise'] = {}
for key, value in orig_args.items():
output['processing_parameters']['remove_noise'][key] = value
output['processing_status']['remove_noise_status'] = True
output = sprit_utils._check_processing_status(output, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
output = __remove_windows_from_df(output, verbose=verbose)
#if 'hvsr_windows_df' in output.keys() or ('params' in output.keys() and 'hvsr_windows_df' in output['params'].keys())or ('input_params' in output.keys() and 'hvsr_windows_df' in output['input_params'].keys()):
# hvsrDF = output['hvsr_windows_df']
#
# outStream = output['stream_edited'].split()
# for i, trace in enumerate(outStream):
# if i == 0:
# trEndTime = trace.stats.endtime
# comp_end = trace.stats.component
# continue
# trStartTime = trace.stats.starttime
# comp_start = trace.stats.component
# if trEndTime < trStartTime and comp_end == comp_start:
# gap = [trEndTime,trStartTime]
# output['hvsr_windows_df']['Use'] = (hvsrDF['TimesProcessed_Obspy'].gt(gap[0]) & hvsrDF['TimesProcessed_Obspy'].gt(gap[1]) )| \
# (hvsrDF['TimesProcessed_ObspyEnd'].lt(gap[0]) & hvsrDF['TimesProcessed_ObspyEnd'].lt(gap[1]))# | \
# output['hvsr_windows_df']['Use'] = output['hvsr_windows_df']['Use'].astype(bool)
#
# trEndTime = trace.stats.endtime
#
# outStream.merge()
# output['stream_edited'] = outStream
elif isinstance(hvsr_data, obspy.Stream) or isinstance(hvsr_data, obspy.Trace):
output = outStream
else:
warnings.warn(f"Output of type {type(output)} for this function will likely result in errors in other processing steps. Returning hvsr_data data.")
return hvsr_data
output = sprit_utils._make_it_classy(output)
if 'x_windows_out' not in output.keys():
output['x_windows_out'] = []
return output
# Remove outlier psds
def remove_outlier_curves(hvsr_data, outlier_method='prototype',
outlier_threshold=50, use_percentile=True, min_pts=5,
use_hv_curves=False,
plot_engine='matplotlib', show_outlier_plot=False, generate_outlier_plot=True,
verbose=False, **kwargs):
"""Function used to remove outliers curves using a "prototype" or "dbscan" method.
Prototype method calculates a prototype curve (i.e., median) and calculates the distance of the H/V or PSD curve from each window from that prototype curve.
Currently, Root Mean Square Error is used to calculate the distance for each windowed H/V or PSD curve at each frequency step for all times.
It calculates the RMSE for the PPSD curves of each component individually. All curves are removed from analysis.
DBSCAN uses the DBSCAN method, outlier_threshold being by default the percentile value of distances of all curves from all other curves.
Distance is calculated using scipy.spatial.distance.pdist, by default with 'euclidean' distance.
The `min_pts` parameter specifies the minimum number of curves whose distance must be within the threshold distance percentile/value to be retained.
Some abberant curves often occur due to the remove_noise() function, so this should be run some time after remove_noise().
In general, the recommended workflow is to run this immediately following the `generate_psds()` function. or if use_hv_curves=True, after `process_hvsr()`.
Parameters
----------
hvsr_data : dict
Input dictionary containing all the values and parameters of interest
outlier_method : str, default='prototype'
The method to use for outlier detection. Currently, 'dbscan' and 'prototype' is supported.
outlier_threshold : float or int, default=98
The Root Mean Square Error value to use as a threshold for determining whether a curve is an outlier.
This averages over each individual entire curve so that curves with very abberant data (often occurs when using the remove_noise() method), can be identified.
Otherwise, specify a float or integer to use as the cutoff RMSE value (all curves with RMSE above will be removed)
use_percentile : float, default=True
Whether outlier_threshold should be interepreted as a raw RMSE value or as a percentile of the RMSE values.
min_pts : int, default=5
The minimum number of points to use for the outlier detection method.
This is only used if outlier_method='dbscan'
This is minimum number of points a point needs in its neighborhood to not be considered an outlier.
use_hv_curves : bool, default=False
Whether to use the calculated HV Curve or the individual components. This can only be True after process_hvsr() has been run.
show_plot : bool, default=False
Whether to show a plot of the removed data
verbose : bool, default=False
Whether to print output of function to terminal
Returns
-------
hvsr_data : dict
Input dictionary with values modified based on work of function.
SEE ALSO
--------
[scipy.spatial.distance.pdist](https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html#scipy.spatial.distance.pdist)
"""
# Setup function
#Get intput paramaters
orig_args = locals().copy()
start_time = datetime.datetime.now()
# Update with processing parameters specified previously in input_params, if applicable
if 'processing_parameters' in hvsr_data.keys():
if 'remove_outlier_curves' in hvsr_data['processing_parameters'].keys() and 'remove_noise' in hvsr_data['processing_parameters'].keys():
update_msg = []
for k, v in hvsr_data['processing_parameters']['remove_noise'].items():
defaultVDict = dict(zip(inspect.getfullargspec(remove_outlier_curves).args[1:],
inspect.getfullargspec(remove_outlier_curves).defaults))
# Manual input to function overrides the imported parameter values
if (not isinstance(v, (HVSRData, HVSRBatch))) and (k in orig_args.keys()) and (orig_args[k]==defaultVDict[k]):
update_msg.append(f'\t\t{k} = {v} (previously {orig_args[k]})')
orig_args[k] = v
# Reset parameters in case of manual override of imported parameters
outlier_method = orig_args['outlier_method']
outlier_threshold = orig_args['outlier_threshold']
use_percentile = orig_args['use_percentile']
min_pts = orig_args['min_pts']
use_hv_curves = orig_args['use_hv_curves']
plot_engine = orig_args['plot_engine']
show_outlier_plot = orig_args['show_outlier_plot']
generate_outlier_plot = orig_args['generate_outlier_plot']
verbose = orig_args['verbose']
# Allow skipping step if outlier_method specified as None (may help GUIs)
if str(outlier_method).lower() == 'none' or outlier_method is None:
return hvsr_data
#Print if verbose, which changes depending on if batch data or not
if (verbose and isinstance(hvsr_data, HVSRBatch)) or (verbose and not hvsr_data['batch']):
if isinstance(hvsr_data, HVSRData) and hvsr_data['batch']:
pass
else:
print('\nRemoving outlier curves from further analysis (remove_outlier_curves())')
print('\tUsing the following parameters:')
for key, value in orig_args.items():
if key == 'hvsr_data':
pass
else:
print('\t {}={}'.format(key, value))
print()
if 'processing_parameters' in hvsr_data.keys() and 'remove_outlier_curves' in hvsr_data['processing_parameters'].keys():
if update_msg != []:
update_msg.insert(0, '\tThe following parameters were updated using the processing_parameters attribute:')
for msg_line in update_msg:
print(msg_line)
print()
#First, divide up for batch or not
#Site is in the keys anytime it's not batch
if isinstance(hvsr_data, HVSRBatch):
#If running batch, we'll loop through each site
hvsr_out = {}
for site_name in hvsr_data.keys():
args = orig_args.copy() #Make a copy so we don't accidentally overwrite
args['hvsr_data'] = hvsr_data[site_name] #Get what would normally be the "hvsr_data" variable for each site
if hvsr_data[site_name]['processing_status']['overall_status']:
try:
hvsr_out[site_name] = __remove_outlier_curves(**args) #Call another function, that lets us run this function again
except:
hvsr_out = hvsr_data
hvsr_out[site_name]['processing_status']['remove_outlier_curves_status'] = False
hvsr_out[site_name]['processing_status']['overall_status'] = False
else:
hvsr_out = hvsr_data
hvsr_out[site_name]['processing_status']['remove_outlier_curves_status'] = False
hvsr_out[site_name]['processing_status']['overall_status'] = False
hvsr_out = HVSRBatch(hvsr_out, df_as_read=hvsr_data.input_df)
hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_out
dbscanList = ['dbscan', 'distance', 'dist', 'dbs', 'db', 'd']
prototypeList = ['prototype', 'proto', 'ptype', 'p',
'root mean square', 'root mean square error',
'rms', 'rmse', 'r']
# Determine names of hvsr_windows_df columns to use
if not use_hv_curves:
compNames = ['Z', 'E', 'N']
for col_name in hvsr_data['hvsr_windows_df'].columns:
if 'psd_values' in col_name and 'RMSE' not in col_name:
cName = col_name.split('_')[2]
if cName not in compNames:
compNames.append(cName)
col_prefix = 'psd_values_'
colNames = [col_prefix+cn for cn in compNames]
else:
compNames = []
for col_name in hvsr_data['hvsr_windows_df'].columns:
if col_name.startswith('HV_Curves') and "Log10" not in col_name:
compNames.append(col_name)
colNames = compNames
col_prefix = 'HV_Curves'
# Remove outlier depending on method, prototype as default if nothing else specified
if str(outlier_method).lower() == 'none' or outlier_method is None:
# Skip all outlier removal
if 'processing_parameters' not in hvsr_out.keys():
hvsr_out['processing_parameters'] = {}
hvsr_out['processing_parameters']['remove_outlier_curves'] = {}
exclude_params_list = ['hvsr_data']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_out['processing_parameters']['remove_outlier_curves'][key] = value
hvsr_out['processing_status']['remove_outlier_curves_status'] = None
hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_out
elif str(outlier_method).lower() in dbscanList:
hvsr_out = __dbscan_outlier_detect(hvsr_data=hvsr_data, use_hv_curves=use_hv_curves,
use_percentile=use_percentile,
neighborhood_size=outlier_threshold,
dist_metric='euclidean',
min_neighborhood_pts=min_pts,
col_names=colNames,
comp_names=compNames,
col_prefix=col_prefix,
verbose=verbose)
elif str(outlier_method).lower() in prototypeList:
hvsr_out = __prototype_outlier_detect(hvsr_data, use_hv_curves=use_hv_curves,
use_percentile=use_percentile,
outlier_threshold=outlier_threshold,
col_names=colNames,
comp_names=compNames,
col_prefix=col_prefix,
verbose=verbose)
else:
hvsr_out = __prototype_outlier_detect(hvsr_data, use_hv_curves=use_hv_curves,
use_percentile=use_percentile,
outlier_threshold=outlier_threshold,
col_names=colNames,
comp_names=compNames,
col_prefix=col_prefix,
verbose=verbose)
# Show plot of removed/retained data
if plot_engine.lower() == 'matplotlib' and (generate_outlier_plot or show_outlier_plot):
hvsr_data['Outlier_Plot'] = sprit_plot.plot_outlier_curves(hvsr_data, outlier_threshold=outlier_threshold, use_percentile=use_percentile, use_hv_curves=use_hv_curves, plot_engine='matplotlib', show_plot=show_outlier_plot, verbose=verbose)
elif plot_engine.lower() == 'plotly' and (generate_outlier_plot or show_outlier_plot):
hvsr_data['Outlier_Plot'] = sprit_plot.plot_outlier_curves(hvsr_data, outlier_threshold=outlier_threshold, use_percentile=use_percentile, use_hv_curves=use_hv_curves, plot_engine='plotly', from_roc=True, show_plot=show_outlier_plot, verbose=verbose)
else:
pass
if 'processing_parameters' not in hvsr_out.keys():
hvsr_out['processing_parameters'] = {}
hvsr_out['processing_parameters']['remove_outlier_curves'] = {}
exclude_params_list = ['hvsr_data']
for key, value in orig_args.items():
if key not in exclude_params_list:
hvsr_out['processing_parameters']['remove_outlier_curves'][key] = value
hvsr_out['processing_status']['remove_outlier_curves_status'] = True
hvsr_out = sprit_utils._check_processing_status(hvsr_out, start_time=start_time, func_name=inspect.stack()[0][3], verbose=verbose)
return hvsr_out
# Just for testing
[docs]
def test_function():
print('is this working?')
# Update all elevation-related attriutes
[docs]
def update_elevation(hvsr_data, updated_surface_elevation, updated_elevation_unit):
"""Function to quickly update all attributes associated with elevation of an HVSRData object
Parameters
----------
hvsr_data : HVSRData or HVSRBatch
HVSRData or HVSRBatch object containing attributes related to elevation.
If HVSRBatch, updated_surface_elevation should be list or tuple and
updated_elevation_unit may either be str or list/tuple of strings.
updated_surface_elevation : numbers.Number
Number (float or int) with the updated elevation.
Meters is the preferred unit. If feet are used instead, it will be converted to meters.
updated_elevation_unit : str
Unit used for updated_surface_elevation. If 'feet', it will be converted to meters.
Returns
-------
HVSRData
HVSRData object with all attributes related to elevation updated
"""
# Break out for HVSRBatch
if isinstance(hvsr_data, HVSRBatch):
if len(updated_surface_elevation) != len(hvsr_data.sites):
warnings.warn(f'Elevations for HVSRBatch object could not be updated. \
Length of updated_surface_elevation ({len(updated_surface_elevation)}) must equal\
the number of sites ({len(hvsr_data.sites)}) in hvsr_data')
return hvsr_data
if isinstance(updated_elevation_unit, (list, tuple)):
if len(updated_elevation_unit) != len(hvsr_data.sites):
warnings.warn(f'Elevations for HVSRBatch object could not be updated. \
Length of updated_elevation_unit ({len(updated_elevation_unit)}) must equal\
the number of sites ({len(hvsr_data.sites)}) in hvsr_data')
return hvsr_data
elif type(updated_elevation_unit) is str:
updated_elevation_unit = [updated_elevation_unit] * len(hvsr_data.sites)
else:
warnings.warn(f"updated_elevation_unit must be list, tuple, or str, not {type(updated_elevation_unit)}")
for i, sitename in enumerate( hvsr_data):
hvsr_data[sitename] = _update_elevation(hvsr_data[sitename],
updated_surface_elevation[i],
updated_elevation_unit[i])
return hvsr_data
#elevation_attrs = ['elevation', 'x_elev_m', 'x_elev_ft']
if hasattr(hvsr_data, 'elevation'):
elev_diff = hvsr_data['elevation'] - updated_surface_elevation
else:
elev_diff = -1 * updated_surface_elevation
mList = ['meters', 'm', 'standard', 'metric', 'si', 'metres', 'metre', 'meter']
fList = ['feet', 'ft', 'f', 'foot', 'american', 'imperial', 'imp']
# Update parameters with elevations in them
if str(updated_elevation_unit).lower() in fList:
updated_surface_elevation = updated_surface_elevation * 0.3048
hvsr_data['elevation'] = updated_surface_elevation
hvsr_data['elev_unit'] = 'meters'
if hasattr(hvsr_data, 'x_elev_m'):
hvsr_data['x_elev_m']['Z'] = hvsr_data['x_elev_m']['Z'] - elev_diff
hvsr_data['x_elev_m']['E'] = hvsr_data['x_elev_m']['E'] - elev_diff
hvsr_data['x_elev_m']['N'] = hvsr_data['x_elev_m']['N'] - elev_diff
hvsr_data['x_elev_ft']['Z'] = hvsr_data['x_elev_m']['Z'] / 0.3048
hvsr_data['x_elev_ft']['E'] = hvsr_data['x_elev_m']['E'] / 0.3048
hvsr_data['x_elev_ft']['N'] = hvsr_data['x_elev_m']['N'] / 0.3048
# Update elevations in Table_Report
table_report_cols = ['Elevation', 'BedrockElevation']
if hasattr(hvsr_data, 'Table_Report'):
hvsr_data.Table_Report['Elevation'] = updated_surface_elevation
if 'BedrockDepth' in hvsr_data.Table_Report.columns:
hvsr_data.Table_Report['BedrockElevation'] = updated_surface_elevation - hvsr_data.Table_Report['BedrockDepth']
# Update elevations in Print_Report
if hasattr(hvsr_data, "Print_Report"):
hvsr_data['Print_Report'] = re.sub(r"Elevation:\s*[\d.]+",
f"Elevation: {updated_surface_elevation}",
hvsr_data['Print_Report'])
# Update elevations in HTML_Report
if hasattr(hvsr_data, "HTML_Report"):
hvsr_data['HTML_Report'] = re.sub(r"Elevation:\s*[\d.]+",
f"Elevation: {updated_surface_elevation}",
hvsr_data['HTML_Report'])
# Update elevations in PeakReport attributes
azList = ['HV']
azList.extend(list(hvsr_data.hvsr_az.keys()))
for az in azList:
for peakReport in hvsr_data.PeakReport[az]:
if 'Table_Report' in peakReport['Report']: #This is a dict
peakReport['Report']['Table_Report']['Elevation'] = updated_surface_elevation
if 'BedrockDepth' in peakReport['Report']['Table_Report'].columns:
peakReport['Report']['Table_Report']['BedrockElevation'] = updated_surface_elevation - peakReport['Report']['Table_Report']['BedrockDepth']
if 'Print_Report' in peakReport['Report']: #This is a dict
peakReport['Report']['Print_Report'] = re.sub(r"Elevation:\s*[\d.]+",
f"Elevation: {updated_surface_elevation}",
peakReport['Report']['Print_Report'])
# Update processing_parameters to reflect new elevations
hvsr_data['processing_parameters']['fetch_data']['params']['elevation'] = updated_surface_elevation
hvsr_data['processing_parameters']['fetch_data']['params']['elev_unit'] = 'meters'
hvsr_data['processing_parameters']['fetch_data']['params']['params']['elevation'] = updated_surface_elevation
hvsr_data['processing_parameters']['fetch_data']['params']['params']['elev_unit'] = 'meters'
return hvsr_data
# Update instrument response file headers in .resp format
[docs]
def update_resp_file(resp_file, new_network, new_station,
return_inv=True, new_channels='CHZ', new_location="",
starttime_new=None, endtime_new=None, new_resp_file=None,
existing_starttime='2015,001,00:00:00.0000', existing_endtime="No Ending Time",
existing_network='XX', existing_station='NS124', existing_channel='CHZ', existing_location='??'):
"""Function to update headers in .RESP instrument response files for easy copying.
It is recommended to read this into a variable and set it as the metadata parameter of input_params
if it is desired to correct for instrument response, for example.
Parameters
----------
resp_file : str
Filepath to input response file
new_network : str
Name of network to update header to.
new_station : str
Name of station to update header to.
return_inv : bool, optional
Whether to return an obspy inventory object.
If False, a .RESP file will be saved in the same directory as resp_file, by default True
new_channels : str, optional
Name or list of channels to update the header to.
If list, multiple inventory objects will be created/saved, by default 'CHZ'
new_location : str, optional
New instrument location attribute to update header to, by default ""
starttime : obspy.UTCDateTime, optional
Input to update starttime. Must be readable by obspy.UTCDateTime(), by default None
endtime : obspy.UTCDateTime, optional
Input to update endtime. Must be readable by obspy.UTCDateTime(), by default None
new_resp_file : str, optional
Filepath to designate for .RESP file output, if desired (and return_inv=False)
If None, uses same directory as resp_file, by default None
existing_network : str, optional
Name of network as specified in input file, by default 'XX'
existing_station : str, optional
name of station as specified in input file, by default 'NS124'
existing_channel : str, optional
Name of channel as specified in input file, by default 'CHZ'
existing_location : str, optional
Name of location as specified in input file, by default '??'
Returns
-------
obspy.Inventory
Only returned if return_inv = True
"""
with open(resp_file) as inFile:
respTextIN = inFile.read()
respText = respTextIN.replace(existing_network, new_network)
respText = respText.replace(existing_station, new_station)
respTextNoChann = respText.replace(existing_location, new_location)
if not isinstance(new_channels, (list, tuple)):
new_channels = [new_channels]
if starttime_new is not None:
sTime = obspy.UTCDateTime(starttime_new)
sTimeText = existing_starttime.replace('2015,', str(sTime.year)+',')
sTimeText = sTimeText.replace('001,', str(sTime.julday)+',')
sTimeText = sTimeText.replace('00:00:00.0000', str(sTime.strftime("%H:%M:%S.%f")))
respTextNoChann = respTextNoChann.replace(existing_starttime, sTimeText)
if endtime_new is not None:
eTime = obspy.UTCDateTime(endtime_new)
respTextNoChann = respTextNoChann.replace(existing_endtime,
f"{eTime.year},{eTime.julday},{eTime.strftime('%H:%M:%S.%f')}")
invList = []
for i, newcha in enumerate(new_channels):
print(newcha)
respText = respTextNoChann.replace(existing_channel, newcha)
if return_inv:
invList.append(obspy.read_inventory(io.StringIO(respText)))
else:
if new_resp_file is None:
dir = pathlib.Path(resp_file).parent
new_resp_file = dir.joinpath(f"RESP_{new_network}.{new_station}.{new_station}.{newcha}.resp")
else:
new_resp_file = pathlib.Path(new_resp_file)
with open(new_resp_file.as_posix(), 'w') as outFile:
outFile.write(new_resp_file.as_posix())
if return_inv:
for i, r in enumerate(invList):
if i == 0:
inv = r
else:
inv = inv + r
return inv
# BATCH FUNCTIONS: various functions that are used to help the regular functions handle batch data
# Helper function for batch processing of check_peaks
def __check_peaks_batch(**check_peaks_kwargs):
try:
hvsr_data = check_peaks(**check_peaks_kwargs)
if check_peaks_kwargs['verbose']:
print('\t{} succesfully completed check_peaks()'.format(hvsr_data['input_params']['site']))
except:
warnings.warn(f"Error in check_peaks({check_peaks_kwargs['hvsr_data']['input_params']['site']}, **check_peaks_kwargs)", RuntimeWarning)
hvsr_data = check_peaks_kwargs['hvsr_data']
return hvsr_data
# Support function for running batch
def __generate_ppsds_batch(**generate_psds_kwargs):
try:
params = generate_psds(**generate_psds_kwargs)
if generate_psds_kwargs['verbose']:
print('\t{} successfully completed generate_psds()'.format(params['site']))
except Exception as e:
print(e)
warnings.warn(f"Error in generate_psds({generate_psds_kwargs['params']['site']}, **generate_psds_kwargs)", RuntimeWarning)
params = generate_psds_kwargs['params']
return params
# Helper function for batch processing of get_report
def __get_report_batch(**get_report_kwargs):
try:
hvsr_results = get_report(**get_report_kwargs)
#Print if verbose, but selected report_formats was not print
print('\n\n\n') #add some 'whitespace'
if get_report_kwargs['verbose']:
if 'print' in get_report_kwargs['report_formats']:
pass
else:
get_report_kwargs['report_formats'] = 'print'
get_report(**get_report_kwargs)
except:
warnMsg = f"Error in get_report({get_report_kwargs['hvsr_results']['input_params']['site']}, **get_report_kwargs)"
if get_report_kwargs['verbose']:
print('\t'+warnMsg)
else:
warnings.warn(warnMsg, RuntimeWarning)
hvsr_results = get_report_kwargs['hvsr_results']
return hvsr_results
# Helper function for batch procesing of azimuth
def __azimuth_batch(**azimuth_kwargs):
try:
hvsr_data = calculate_azimuth(**azimuth_kwargs)
if azimuth_kwargs['verbose']:
if 'input_params' in hvsr_data.keys():
print('\t{} successfully completed calculate_azimuth()'.format(hvsr_data['input_params']['site']))
elif 'site' in hvsr_data.keys():
print('\t{} successfully completed calculate_azimuth()'.format(hvsr_data['site']))
except Exception as e:
warnings.warn(f"Error in calculate_azimuth({azimuth_kwargs['input']['site']}, **azimuth_kwargs)", RuntimeWarning)
return hvsr_data
# Helper function for batch procesing of remove_noise
def __remove_noise_batch(**remove_noise_kwargs):
try:
hvsr_data = remove_noise(**remove_noise_kwargs)
if remove_noise_kwargs['verbose']:
if 'input_params' in hvsr_data.keys():
print('\t{} successfully completed remove_noise()'.format(hvsr_data['input_params']['site']))
elif 'site' in hvsr_data.keys():
print('\t{} successfully completed remove_noise()'.format(hvsr_data['site']))
except Exception as e:
warnings.warn(f"Error in remove_noise({remove_noise_kwargs['input']['site']}, **remove_noise_kwargs)", RuntimeWarning)
return hvsr_data
# Helper function batch processing of remove_outlier_curves
def __remove_outlier_curves(**remove_outlier_curves_kwargs):
try:
hvsr_data = remove_outlier_curves(**remove_outlier_curves_kwargs)
if remove_outlier_curves_kwargs['verbose']:
if 'input_params' in hvsr_data.keys():
print('\t{} successfully completed remove_outlier_curves()'.format(hvsr_data['input_params']['site']))
elif 'site' in hvsr_data.keys():
print('\t{} successfully completed remove_outlier_curves()'.format(hvsr_data['site']))
except Exception as e:
warnings.warn(f"Error in remove_outlier_curves({remove_outlier_curves_kwargs['input']['site']}, **remove_outlier_curves_kwargs)", RuntimeWarning)
return hvsr_data
# Batch function for plot_hvsr()
def __hvsr_plot_batch(**hvsr_plot_kwargs):
try:
hvsr_data = plot_hvsr(**hvsr_plot_kwargs)
except:
warnings.warn(f"Error in plotting ({hvsr_plot_kwargs['hvsr_data']['input_params']['site']}, **hvsr_plot_kwargs)", RuntimeWarning)
hvsr_data = hvsr_plot_kwargs['hvsr_data']
return hvsr_data
# Support function for batch of plot_azimuth()
def __plot_azimuth_batch(**plot_azimuth_kwargs):
try:
hvsr_data['Azimuth_Fig'] = plot_azimuth(**plot_azimuth_kwargs)
if plot_azimuth_kwargs['verbose']:
print('\t{} successfully completed plot_azimuth()'.format(hvsr_data['input_params']['site']))
except:
errMsg = f"Error in plot_azimuth({plot_azimuth_kwargs['params']['site']}, **plot_azimuth_kwargs)"
if plot_azimuth_kwargs['verbose']:
print('\t'+errMsg)
else:
warnings.warn(errMsg, RuntimeWarning)
hvsr_data = plot_azimuth_kwargs['params']
return hvsr_data
# Helper function for batch version of process_hvsr()
def __process_hvsr_batch(**process_hvsr_kwargs):
try:
hvsr_data = process_hvsr(**process_hvsr_kwargs)
if process_hvsr_kwargs['verbose']:
print('\t{} successfully completed process_hvsr()'.format(hvsr_data['input_params']['site']))
except:
errMsg=f"Error in process_hvsr({process_hvsr_kwargs['params']['site']}, **process_hvsr_kwargs)"
if process_hvsr_kwargs['verbose']:
print('\t'+errMsg)
else:
warnings.warn(errMsg, RuntimeWarning)
hvsr_data = process_hvsr_kwargs['params']
return hvsr_data
# OTHER HELPER FUNCTIONS
# HELPER functions for fetch_data() and get_metadata()
# Read in metadata .inv file, specifically for RaspShake
def _update_shake_metadata(filepath, params, write_path='', verbose=False):
"""Reads static metadata file provided for Rasp Shake and updates with input parameters. Used primarily in the get_metadata() function.
PARAMETERS
----------
filepath : str or pathlib.Path object
Filepath to metadata file. Should be a file format supported by obspy.read_inventory().
params : dict
Dictionary containing necessary keys/values for updating, currently only supported for STATIONXML with Raspberry Shakes.
Necessary keys: 'net', 'sta',
Optional keys: 'longitude', 'latitude', 'elevation', 'depth'
write_path : str, default=''
If specified, filepath to write to updated inventory file to.
Returns
-------
params : dict
Updated params dict with new key:value pair with updated updated obspy.inventory object (key="inv")
"""
if verbose:
print("\tUpdating Metadata for Raspberry Shake Instrument Type")
network = params['net']
station = params['sta']
optKeys = ['longitude', 'latitude', 'elevation', 'depth']
for k in optKeys:
if k not in params.keys():
params[k] = '0'
wgs84_transformer = Transformer.from_crs(params['input_crs'], "4326")
xcoord = str(params['longitude'])
ycoord = str(params['latitude'])
elevation = str(params['elevation'])
depth = str(params['depth'])
startdate = str(datetime.datetime(year=2023, month=2, day=15)) #First day sprit code worked :)
enddate=str(datetime.datetime.today())
filepath = sprit_utils._checkifpath(filepath)
tree = ET.parse(str(filepath))
root = tree.getroot()
prefix= "{http://www.fdsn.org/xml/station/1}"
for item in root.iter(prefix+'Channel'):
item.attrib['startDate'] = startdate
item.attrib['endDate'] = enddate
for item in root.iter(prefix+'Station'):
item.attrib['code'] = station
item.attrib['startDate'] = startdate
item.attrib['endDate'] = enddate
for item in root.iter(prefix+'Network'):
item.attrib['code'] = network
for item in root.iter(prefix+'Latitude'):
item.text = ycoord
for item in root.iter(prefix+'Longitude'):
item.text = xcoord
for item in root.iter(prefix+'Created'):
nowTime = str(datetime.datetime.now())
item.text = nowTime
for item in root.iter(prefix+'Elevation'):
item.text= elevation
for item in root.iter(prefix+'Depth'):
item.text=depth
#Set up (and) export
#filetag = '_'+str(datetime.datetime.today().date())
#outfile = str(parentPath)+'\\'+filename+filetag+'.inv'
if write_path != '':
try:
write_path = pathlib.Path(write_path)
if write_path.is_dir():
fname = params['network']+'_'+params['station']+'_'+params['site']
fname = fname + '_response.xml'
write_file = write_path.joinpath(fname)
else:
write_file=write_path
tree.write(write_file, xml_declaration=True, method='xml',encoding='UTF-8')
inv = obspy.read_inventory(write_file, format='STATIONXML', level='response')
except:
warnings.warn(f'write_path={write_path} is not recognized as a filepath, updated metadata file will not be written')
write_path=''
else:
try:
#Create temporary file for reading into obspy
tpf = tempfile.NamedTemporaryFile(delete=False)
stringRoot = ET.tostring(root, encoding='UTF-8', method='xml')
tpf.write(stringRoot)
inv = obspy.read_inventory(tpf.name, format='STATIONXML', level='response')
tpf.close()
os.remove(tpf.name)
except:
write_file = pathlib.Path(__file__).with_name('metadata.xml')
tree.write(write_file, xml_declaration=True, method='xml',encoding='UTF-8')
inv = obspy.read_inventory(write_file.as_posix(), format='STATIONXML', level='response')
os.remove(write_file.as_posix())
params['inv'] = inv
params['params']['inv'] = inv
return params
# Support function for get_metadata()
def _read_RS_Metadata(params, source=None):
"""Function to read the metadata from Raspberry Shake using the StationXML file provided by the company.
Intended to be used within the get_metadata() function.
Parameters
----------
params : dict
The parameter dictionary output from input_params() and read into get_metadata()
Returns
-------
params : dict
Further modified parameter dictionary
"""
if 'inv' in params.keys():
inv = params['inv']
else:
sprit_utils._checkifpath(params['metadata'])
inv = obspy.read_inventory(params['metadata'], format='STATIONXML', level='response')
params['inv'] = inv
station = params['sta']
network = params['net']
channels = params['cha']
if isinstance(inv, obspy.core.inventory.inventory.Inventory):
#Create temporary file from inventory object
tpf = tempfile.NamedTemporaryFile(delete=False)
inv.write(tpf.name, format='STATIONXML')
#Read data into xmlTree
tree = ET.parse(tpf.name)
root = tree.getroot()
#Close and remove temporary file
tpf.close()
os.remove(tpf.name)
else:
inv = sprit_utils._checkifpath(inv)
inv = obspy.read_inventory(params['metadata'], format='STATIONXML', level='response')
params['inv'] = inv
tree = ET.parse(inv)
root = tree.getroot()
#if write_path != '':
# inv.write(write_path, format='STATIONXML')
#This is specific to RaspShake
c=channels[0]
pzList = [str(n) for n in list(range(7))]
s=pzList[0]
prefix= "{http://www.fdsn.org/xml/station/1}"
sensitivityPath = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"InstrumentSensitivity/"+prefix+"Value"
gainPath = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"Stage[@number='1']/"+prefix+"StageGain/"+prefix+"Value"
#paz = []
rsCList = ['EHZ', 'EHN', 'EHE']
paz = {}
for c in channels:
channelPaz = {}
#channelPaz['channel'] = c
for item in root.findall(sensitivityPath):
channelPaz['sensitivity']=float(item.text)
for item in root.findall(gainPath):
channelPaz['gain']=float(item.text)
poleList = []
zeroList = []
for s in pzList:
if int(s) < 4:
polePathReal = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"Stage[@number='1']/"+prefix+"PolesZeros/"+prefix+"Pole[@number='"+s+"']/"+prefix+"Real"
polePathImag = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"Stage[@number='1']/"+prefix+"PolesZeros/"+prefix+"Pole[@number='"+s+"']/"+prefix+"Imaginary"
for poleItem in root.findall(polePathReal):
poleReal = poleItem.text
for poleItem in root.findall(polePathImag):
pole = complex(float(poleReal), float(poleItem.text))
poleList.append(pole)
channelPaz['poles'] = poleList
#channelPaz['poles'] = list(set(poleList))
else:
zeroPathReal = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"Stage[@number='1']/"+prefix+"PolesZeros/"+prefix+"Zero[@number='"+s+"']/"+prefix+"Real"
zeroPathImag = "./"+prefix+"Network[@code='"+network+"']/"+prefix+"Station[@code='"+station+"']/"+prefix+"Channel[@code='"+c+"']/"+prefix+"Response/"+prefix+"Stage[@number='1']/"+prefix+"PolesZeros/"+prefix+"Zero[@number='"+s+"']/"+prefix+"Imaginary"
for zeroItem in root.findall(zeroPathReal):
zeroReal = zeroItem.text
for zeroItem in root.findall(zeroPathImag):
zero = complex(float(zeroReal), float(zeroItem.text))
#zero = zeroReal + "+" + zeroItem.text+'j'
zeroList.append(zero)
#channelPaz['zeros'] = list(set(zeroList))
channelPaz['zeros'] = zeroList
if str(c).upper() in rsCList:
c = str(c)[-1].upper()
paz[str(c)] = channelPaz
params['paz'] = paz
params['params']['paz'] = paz
return params
# Helper function to sort channels
def _sort_channels(input, source, verbose):
if source!='batch':
input = {'SITENAME': {'stream':input}} #Make same structure as batch
for site in input.keys():
rawDataIN = input[site]['stream']
if rawDataIN is None:
if verbose:
raise RuntimeError("No data was read using specified parameters {}".format(input[site]))
else:
raise RuntimeError("No data was read using specified parameters")
elif isinstance(rawDataIN, obspy.core.stream.Stream):
#Make sure z component is first
dataIN = rawDataIN.sort(['channel'], reverse=True) #z, n, e order
else:
#Not usually used anymore, retained just in case
dataIN = []
for i, st in enumerate(rawDataIN):
if 'Z' in st[0].stats['channel']:#).split('.')[3]:#[12:15]:
dataIN.append(rawDataIN[i])
else:
dataIN.append(rawDataIN[i].sort(['channel'], reverse=True)) #z, n, e order
input[site]['stream'] = dataIN
if source=='batch':
#Return a dict
output = input
else:
#Return a stream otherwise
output = input[site]['stream']
return output
# Trim data
def _trim_data(input, stream=None, export_dir=None, data_export_format=None, source=None, **kwargs):
"""Function to trim data to start and end time
Trim data to start and end times so that stream being analyzed only contains wanted data.
Can also export data to specified directory using a specified site name and/or data_export_format
Parameters
----------
input : HVSRData
HVSR Data class containing input parameters for trimming
stream : obspy.stream object
Obspy stream to be trimmed
export_dir: str or pathlib obj
Output filepath to export trimmed data to. If not specified, does not export.
data_export_format : str or None, default=None
If None, and export_dir is specified, format defaults to .mseed. Otherwise, exports trimmed stream using obspy.core.stream.Stream.write() method, with data_export_format being passed to the format argument.
https://docs.obspy.org/packages/autogen/obspy.core.stream.Stream.write.html#obspy.core.stream.Stream.write
**kwargs
Keyword arguments passed directly to obspy.core.stream.Stream.trim() method.
Returns
-------
st_trimmed : obspy.stream object
Obpsy Stream trimmed to start and end times
"""
#if source!='batch':
# #input = {'SITENAME': {'stream':input}} #Make same structure as batch
# pass
if 'starttime' in kwargs.keys():
start = kwargs['starttime']
elif isinstance(input, (HVSRData, dict)):
start = input['starttime']
if 'endtime' in kwargs.keys():
end = kwargs['endtime']
else:
end = input['endtime']
if 'site' in kwargs.keys():
site = kwargs['site']
else:
site = input['site']
if stream is not None:
st_trimmed = stream.copy()
elif 'stream' in input.keys():
st_trimmed = input['stream'].copy()
else:
raise UnboundLocalError("stream not specified. Must either be specified using stream parameter or as a key in the input parameters (input['stream'])")
trimStart = obspy.UTCDateTime(start)
trimEnd = obspy.UTCDateTime(end)
#If data is contained in a masked array, split to undo masked array
if isinstance(st_trimmed[0].data, np.ma.masked_array):
st_trimmed = st_trimmed.split()
#This split is undone with the .merge() method a few lines down
for tr in st_trimmed:
if trimStart > tr.stats.endtime or trimEnd < tr.stats.starttime:
pass
else:
st_trimmed.trim(starttime=trimStart, endtime=trimEnd, **kwargs)
st_trimmed.merge(method=1)
if data_export_format is None:
data_export_format = '.mseed'
#Format export filepath, if exporting
if export_dir is not None:
if site is None:
site=''
else:
site = site+'_'
if '.' not in data_export_format:
data_export_format = '.'+data_export_format
net = st_trimmed[0].stats.network
sta = st_trimmed[0].stats.station
loc = st_trimmed[0].stats.location
yr = str(st_trimmed[0].stats.starttime.year)
strtD=str(st_trimmed[0].stats.starttime.date)
strtT=str(st_trimmed[0].stats.starttime.time)[0:2]
strtT=strtT+str(st_trimmed[0].stats.starttime.time)[3:5]
endT = str(st_trimmed[0].stats.endtime.time)[0:2]
endT = endT+str(st_trimmed[0].stats.endtime.time)[3:5]
doy = str(st_trimmed[0].stats.starttime.utctimetuple().tm_yday).zfill(3)
export_dir = sprit_utils._checkifpath(export_dir)
export_dir = str(export_dir)
export_dir = export_dir.replace('\\', '/')
export_dir = export_dir.replace('\\'[0], '/')
if type(data_export_format) is str:
filename = site+net+'.'+sta+'.'+loc+'.'+yr+'.'+doy+'_'+strtD+'_'+strtT+'-'+endT+data_export_format
elif type(data_export_format) is bool:
filename = site+net+'.'+sta+'.'+loc+'.'+yr+'.'+doy+'_'+strtD+'_'+strtT+'-'+endT+'.mseed'
if export_dir[-1]=='/':
export_dir=export_dir[:-1]
exportFile = export_dir+'/'+filename
#Take care of masked arrays for writing purposes
if 'fill_value' in kwargs.keys():
for tr in st_trimmed:
if isinstance(tr.data, np.ma.masked_array):
tr.data = tr.data.filled(kwargs['fill_value'])
else:
st_trimmed = st_trimmed.split()
st_trimmed.write(filename=exportFile)
else:
pass
return st_trimmed
# Helper function to detrend data
def __detrend_data(input, detrend, detrend_options, verbose, source):
"""Helper function to detrend data, specifically formatted for the HVSRData and HVSRBatch objects"""
if source != 'batch':
input = {'SITENAME': {'stream':input}} #Make same structure as batch
for key in input.keys():
dataIN = input[key]['stream']
if detrend==False:
pass
elif detrend==True:
#By default, do a spline removal
for tr in dataIN:
tr.detrend(type='spline', order=detrend_options, dspline=1000)
else:
data_undetrended = dataIN.copy()
try:
if str(detrend).lower()=='simple':
for tr in dataIN:
tr.detrend(type=detrend)
if str(detrend).lower()=='linear':
for tr in dataIN:
tr.detrend(type=detrend)
if str(detrend).lower()=='constant' or detrend=='demean':
for tr in dataIN:
tr.detrend(type=detrend)
if str(detrend).lower()=='polynomial':
for tr in dataIN:
tr.detrend(type=detrend, order=detrend_options)
if str(detrend).lower()=='spline':
for tr in dataIN:
tr.detrend(type=detrend, order=int(detrend_options), dspline=1000)
except Exception as e:
try:
for tr in dataIN:
tr.detrend(type='constant')
print(f'\tDetrend type {detrend} could not be carried out, using "constant" detrend instead.\n')
except Exception as e2:
dataIN = data_undetrended
if verbose:
warnings.warn(f"\tDetrend error, data not detrended. \nDetrend Error Report below. Carrying on processing with non-detrended data: {e}", UserWarning)
input[key]['stream'] = dataIN
if source=='batch':
#Return a dict
output = input
else:
#Return a stream otherwise
output = input[key]['stream']
return output
def __read_tromino_data_yellow(input_data, sampling_rate=None,
struct_format='H', tromino_model='3G',
start_byte=24576, diagnose=False,
return_dict=True,
verbose=False, **kwargs):
input_filepath = input_data
if isinstance(input_data, HVSRData):
input_filepath = input_data['input_data']
# Reconfigure data for some of the analysis
swapped = __read_and_swap_bytes(input_filepath)
# Extract header information (text sections)
header_text = __extract_text_sections(swapped.copy())
result = {
'site_name': None,
'header': {},
'gps_data': None,
'seismometer_data': None, # Will be replaced with a (3, n) numpy array
'stream': None
}
if verbose:
print("\n\t Tromino Header Information")
for text in header_text:
if verbose and len(re.findall(r'\w+', text.decode('ascii', errors='ignore')))>0:
print('\t\t ', text.decode('ascii', errors='ignore'))
if b'NAKAGRILLA FLASHCARD HEADER' in text:
result['header']['file_type'] = text.decode('ascii', errors='ignore').strip('\x00')
# Add more header parsing as needed
if ' DATA' in text.decode('ascii', errors='ignore')[:6]:
result['site_name'] = re.findall(r'\w+', text.decode('ascii', errors='ignore'))[1].strip()
if verbose:
print(f"\t Site name identified from data as {result['site_name']}\n")
station = result['site_name']
# OLD FUNCTION
if 'sampling_rate' in kwargs.keys():
sampling_rate = kwargs['sampling_rate']
strucSizes = {'c':1, 'b':1,'B':1, '?':1,
'h':2,'H':2,'e':2,
'i':4,'I':4,'l':4,'L':4,'f':4,
'q':8,'Q':8,'d':8,
'n':8,'N':8,'s':16,'p':16,'P':16,'x':16}
#H (pretty sure it's Q) I L or Q all seem to work (probably not Q?)
structFormat = struct_format
structSize = strucSizes[structFormat]
dataList = []
with open(input_filepath, 'rb') as f:
while True:
data = f.read(structSize) # Read 4 bytes
if not data: # End of file
break
value = struct.unpack(structFormat, data)[0] # Interpret as a float
dataList.append(value)
dataArr = np.array(dataList)
#medVal = np.nanmedian(dataArr[50000:100000])
if 'start_byte' in kwargs.keys():
start_byte = kwargs['start_byte']
if diagnose:
print("Total file bytes: ", len(dataArr))
fig, ax = plt.subplots(3, sharex=True, sharey=True)
ax[0].plot(comp1, linewidth=0.1, c='k')
ax[1].plot(comp2, linewidth=0.1, c='k')
ax[2].plot(comp3, linewidth=0.1, c='k')
plt.show()
try:
if verbose:
print("\t\tExtracting metadata from tromino yellow instrument:")
metaDict = __get_tromino_yellow_metadata(input_filepath)
if verbose:
print(f"\t\t Starttime: {metaDict['starttime']}\n\t\t Number of Data Channels: {metaDict['no_data_channels']} \n\t\t Sampling Rate: {metaDict['sampling_rate']}\n")
inst_sTime = metaDict['starttime']
no_channels = metaDict['no_data_channels']
sampling_rate = metaDict['sampling_rate']
except Exception as e:
if verbose:
print(e)
# Get default values
acq_date = datetime.date.today()
if 'acq_date' in kwargs:
acq_date = kwargs['acq_date']
starttime = datetime.time(0, 0)
if 'starttime' in kwargs:
starttime = kwargs['starttime']
inst_sTime = obspy.UTCDateTime(acq_date.year, acq_date.month, acq_date.day,
starttime.hour, starttime.minute,
starttime.second, starttime.microsecond)
no_channels = 3 # default to 3 channels (maybe programmatically check this at some point)
if sampling_rate is None:
if verbose:
print("\t `sampling_rate` not specified. Setting as 128 samples/second")
sampling_rate = 128 # default value
if station is None:
station='HVSRSite'
# Get the actual data from the tromino yellow
dataArr = __extract_tromino_yellow_data(input_data=input_filepath, start_byte=start_byte,
swapped_bytes=__read_and_swap_bytes(input_filepath, return_unswapped=True),
no_channels=no_channels,
data_start=0xC000)
# Get geophone data from each channel
compN = dataArr[0]
compE = dataArr[1]
compZ = dataArr[2]
# Calculate end time based on length of data
eTime = inst_sTime + (((len(compN))/sampling_rate)/60)*60
loc = ''
if station is not None and (type(station) is int or station.isdigit()):
loc = str(station)
sTime = inst_sTime
if hasattr(input_data, 'starttime') and input_data['starttime'].datetime!=NOWTIME:
sTime = input_data['starttime']
traceHeaderN = {'sampling_rate':sampling_rate,
'calib' : 1,
'npts':len(compN),
'network':'TR',
'location': loc,
'station' : station,
'channel':'?HN',
'starttime':sTime,
'starttime_instrument':inst_sTime,
'site':station}
traceHeaderE = traceHeaderN.copy()
traceHeaderZ = traceHeaderN.copy()
traceHeaderE['channel'] = '?HE'
traceHeaderZ['channel'] = '?HZ'
traceZ = obspy.Trace(data=compZ, header=traceHeaderZ)
traceE = obspy.Trace(data=compE, header=traceHeaderE)
traceN = obspy.Trace(data=compN, header=traceHeaderN)
st = obspy.Stream([traceZ, traceE, traceN])
result['stream'] = st
if return_dict:
return result
return st
# Helper function to read data from Tromino Blue instruments
def __read_tromino_data_blue(input_data, sampling_rate=None,
channel_map={'Z':6, 'E':4, 'N':2}, data_start_buffer=113,
return_dict=False, verbose=False):
# Reconfigure data for some of the analysis
swapped = __read_and_swap_bytes(input_data)
# Initialize a result dictionary
result = {
'header': {},
'gps_data': [],
'seismometer_data': None, # Will be replaced with a (7, n) numpy array
'stream': None
}
# Extract header information (text sections)
header_text = __extract_text_sections(swapped)
for text in header_text:
if b'NAKAGRILLA FLASHCARD HEADER' in text:
result['header']['file_type'] = text.decode('ascii', errors='ignore').strip('\x00')
# Add more header parsing as needed
# Extract GPS NMEA sentences
gps_data = __extract_gps_data(swapped)
for sentence in gps_data:
if sentence.startswith('$GPGGA'):
# Parse GPGGA sentence (position data)
parts = sentence.split(',')
if len(parts) >= 15:
try:
timestamp = parts[1]
lat = float(parts[2][:2]) + float(parts[2][2:]) / 60 if parts[2] else None
lat_dir = parts[3]
lon = float(parts[4][:3]) + float(parts[4][3:]) / 60 if parts[4] else None
lon_dir = parts[5]
if lat_dir == 'S':
lat = -lat
if lon_dir == 'W':
lon = -lon
result['gps_data'].append({
'type': 'GPGGA',
'timestamp': timestamp,
'latitude': lat,
'longitude': lon,
'raw': sentence
})
except (ValueError, IndexError):
result['gps_data'].append({'type': 'GPGGA', 'raw': sentence, 'parse_error': True})
elif sentence.startswith('$GPZDA'):
# Parse GPZDA sentence (date & time)
parts = sentence.split(',')
if len(parts) >= 5:
try:
timestamp = parts[1]
day = parts[2]
month = parts[3]
year = parts[4]
result['gps_data'].append({
'type': 'GPZDA',
'timestamp': timestamp,
'date': f"{year}-{month}-{day}",
'raw': sentence
})
except (ValueError, IndexError):
result['gps_data'].append({'type': 'GPZDA', 'raw': sentence, 'parse_error': True})
# Extract seismometer data
# Find the start of seismometer data section (after GPS data)
seis_data_start = __locate_data_start_blue(swapped)
# Get seismic starting buffer
for item in header_text:
if "FIRST DATA" in str(item):
data_buffer = data_start_buffer #137#int(str(item).split('-')[2].split("ADDRES ")[1].split('.')[0])
# Get sampling rate
if sampling_rate is None:
for item in header_text:
if "PER SECOND" in str(item):
sampling_rate = int(str(item).split('-')[1].split("BYTE ")[1].split('PER')[0])
if verbose:
print('\tSampling rate detected as:', sampling_rate)
# Read the file as simple bytes
with open(input_data, 'rb') as f:
f.seek(seis_data_start + data_buffer)
# Read the rest of the file
raw_bytes = f.read()
#raw_bytes = swapped[seis_data_start + data_buffer:]
# Assign variables for reading data
bytes_per_sample = 2 # 16-bit
num_channels = 7 #3x accel, 3x seism, 1x trigger
total_samples = len(raw_bytes) // bytes_per_sample
# Decode all samples
values = []
for i in range(total_samples):
start_byte = i * bytes_per_sample
sample_bytes = raw_bytes[start_byte:start_byte + bytes_per_sample]
# Try little-endian first
value = int.from_bytes(sample_bytes, byteorder='little', signed=True)
values.append(value)
# Convert to numpy array
data = np.array(values, dtype=np.int32)
# Ensure we have complete sets of channel data
usable_samples = (len(data) // num_channels) * num_channels
channel_data = data[:usable_samples].reshape(-1, num_channels)
if verbose:
# Analyze the data
zero_percent = np.sum(channel_data == 0) / channel_data.size * 100
print(f"Zero percentage: {zero_percent:.2f}%")
# Check zeros by channel
zeros_by_channel = np.sum(channel_data == 0, axis=0)
samples_per_channel = channel_data.shape[0]
print("Zero percentage by channel:")
for i in range(num_channels):
channel_zero_percent = zeros_by_channel[i] / samples_per_channel * 100
print(f"Channel {i+1}: {channel_zero_percent:.2f}%")
# Plot the first 1000 samples of each channel
plt.figure(figsize=(15, 12))
for i in range(num_channels):
plt.subplot(num_channels, 1, i+1)
plt.plot(channel_data[:1000, i])
plt.title(f"Channel {i}")
plt.grid(True)
plt.tight_layout()
plt.show()
# Extract data from GPS strings
acq_date = obspy.UTCDateTime().now()
sTime = datetime.time()
latPts = []
lonPts = []
elevPts = []
for gpsPt in result['gps_data']:
if 'ZDA' in gpsPt['type']:
if 'timestamp' in gpsPt:
sTime = datetime.time(int(gpsPt['timestamp'][:2]), int(gpsPt['timestamp'][2:4]), int(gpsPt['timestamp'][4:6]))
if 'date' in gpsPt:
acq_date=obspy.UTCDateTime(gpsPt['date'])
if 'GGA' in gpsPt['type']:
latPts.append(gpsPt['latitude'])
lonPts.append(gpsPt['longitude'])
elevPts.append(float(gpsPt['raw'].split(',')[9]))
acq_date = acq_date + (sTime.hour* 60*60 + sTime.minute*60 + sTime.second)
stats = {'network':'TR',
'station':'BLUE',
'sampling_rate':sampling_rate,
'starttime':acq_date,
'longitude': round(float(np.nanmedian(lonPts)), 7),
'latitude':round(float(np.nanmedian(latPts)), 7),
'input_crs':'EPSG:4326',
'elevation':round(float(np.nanmedian(elevPts)), 7),
'elev_unit':'m',
'instrument': 'Tromino Blue'
}
stats['channel'] = 'EHN'
nTrace = obspy.Trace(data=channel_data.T[channel_map['N']], header=stats)
stats['channel'] = 'EHE'
eTrace = obspy.Trace(data=channel_data.T[channel_map['E']], header=stats)
stats['channel'] = 'EHZ'
zTrace = obspy.Trace(data=channel_data.T[channel_map['Z']], header=stats)
st = obspy.Stream([zTrace, eTrace, nTrace])
result['stream'] = st
if return_dict:
return result
return st
# Get the actual data from the tromino yellow
def __extract_tromino_yellow_data(input_data, swapped_bytes, no_channels, struct_format='H', start_byte=24576, data_start=0xC000):
# Assuming data starts at offset 0xC000
data_bytes = swapped_bytes[data_start:]
# Try first with unsigned integers
data_array = np.frombuffer(data_bytes, dtype=np.uint16)
if np.std(data_array) > 20000:
data_array = np.frombuffer(data_bytes, dtype=np.int16)
#max_val = data_array.max()
#dataNorm = data_array/max_val
#num_small = (dataNorm < 0.5).sum()
#small_ratio = num_small / len(data_array)
#print("SMALL RATIO", small_ratio)
#if small_ratio > 0.5:
# print("SWITCHING ENDIANNES")
# data_array = np.frombuffer(data_bytes, dtype='h')
strucSizes = {'c':1, 'b':1,'B':1, '?':1,
'h':2,'H':2,'e':2,
'i':4,'I':4,'l':4,'L':4,'f':4,
'q':8,'Q':8,'d':8,
'n':8,'N':8,'s':16,'p':16,'P':16,'x':16}
structFormat = struct_format
structSize = strucSizes[structFormat]
dataList = []
with open(input_data, 'rb') as f:
while True:
data = f.read(structSize) # Read 4 bytes
if not data: # End of file
break
value = struct.unpack(structFormat, data)[0] # Interpret as a float
dataList.append(value)
#data_array = np.array(dataList)
channel_jump = no_channels
startPt = 0
nChannelStart = startPt
eChannelStart = startPt+int(channel_jump/3)
zChannelStart = startPt+int(channel_jump*(2/3))
comp1 = data_array[nChannelStart::channel_jump]
comp2 = data_array[eChannelStart::channel_jump]
comp3 = data_array[zChannelStart::channel_jump]
comp1 = list(comp1 - np.median(comp1))
comp2 = list(comp2 - np.median(comp2))
comp3 = list(comp3 - np.median(comp3))
data_array = np.array([comp1, comp2, comp3])
return data_array
# Read starttime, number of channels, and sampling rate
def __get_tromino_yellow_metadata(input_data, start_hex='00004020', end_hex='00004030'):
"""
Extracts bytes from [start_hex, end_hex) and tries different decodings.
Parameters
----------
input_data : str
Path to the binary .trc file.
start_hex : int or str
Start hex offset (int or hex string, e.g., default='00004000').
end_hex : int or str
End hex offset (exclusive). Default = '0000402C
"""
# normalize inputs
start = int(start_hex, 16) if isinstance(start_hex, str) else start_hex
end = int(end_hex, 16) if isinstance(end_hex, str) else end_hex
with open(input_data, 'rb') as f:
alldata = f.read()
swapped = bytearray(len(alldata))
for i in range(0, len(alldata) - 1, 2):
swapped[i] = alldata[i + 1]
swapped[i + 1] = alldata[i]
if len(alldata) % 2 == 1:
swapped[-1] = alldata[-1]
data = swapped[start:end]
byteList = [f"{b:b}" for b in data]
hexList = [f"{b:02X}" for b in data]
if len(data) >= 2:
ints16 = struct.iter_unpack(">H", data[:len(data)//2*2])
ints16List = [x[0] for x in ints16]
df = pd.DataFrame({'BYTES':byteList, 'HEX':hexList})
starttime = obspy.UTCDateTime(year=2000+int(df.loc[11, "HEX"]),
month=int(df.loc[9, "HEX"]),
day = int(df.loc[7, "HEX"]),
hour = int(df.loc[5, "HEX"]),
minute = int(df.loc[3, 'HEX']),
second = int(df.loc[1, 'HEX'])
)
noChannels = ints16List[-2]
sampling_rate = ints16List[-1]
return {'starttime':starttime, 'no_data_channels':noChannels, 'sampling_rate':sampling_rate}
def __read_and_swap_bytes(input_file, return_unswapped=False):
"""
Private function (not meant to be called except by internal functions)
to read a binary file and return a bytearray with all bytes swapped in pairs.
This handles odd-length files correctly.
"""
# Open binary file
with open(input_file, 'rb') as f:
data = f.read()
# Create new byte array for the swapped data
swapped = bytearray(len(data))
# Swap bytes in pairs
for i in range(0, len(data) - 1, 2):
swapped[i] = data[i + 1]
swapped[i + 1] = data[i]
# Handle odd length
if len(data) % 2 == 1:
swapped[-1] = data[-1]
if return_unswapped:
return data
return swapped
def __extract_text_sections(data):
"""Extract text sections from binary data"""
# Find blocks of ASCII text
text_sections = []
# Look for consecutive printable ASCII characters
ascii_chunks = re.finditer(rb'[A-Za-z0-9 \t\r\n\.,_\-\+\*\/\$]{6,}', data)
for match in ascii_chunks:
text_sections.append(match.group(0))
return text_sections
def __extract_gps_data(data):
"""Extract GPS NMEA sentences from binary data"""
# NMEA sentences start with $ and end with \r\n
data_str = data.decode('ascii', errors='ignore')
# Look for NMEA sentences
gps_sentences = []
nmea_pattern = r'\$(GP[A-Z]{3},.+?)\r\n'
matches = re.finditer(nmea_pattern, data_str)
for match in matches:
gps_sentences.append(match.group(0))
return gps_sentences
def __locate_data_start_blue(data):
"""This function looks after the last GPS point for an intitial, likely starting position of seismometer data"""
# Look for the last NMEA sentence and start from there (small skip ahead
data_str = data.decode('ascii', errors='ignore')
last_nmea_pos = data_str.rfind('$GP')
# Assuming we find GPS data, find the spot after that indicating a new line
if last_nmea_pos > 0:
# Find the end of this sentence
end_GPS_marker = data_str.find('\r\n', last_nmea_pos)
#end_marker = data_str.find('[', last_nmea_pos)
if end_GPS_marker > 0:
# Skip a bit further to be safe
return end_GPS_marker + 8
return end_GPS_marker
# Read data from raspberry shake
def __read_RS_file_struct(input_data, source, year, doy, inv, params, verbose=False):
""""Private function used by fetch_data() to read in Raspberry Shake data"""
from obspy.core import UTCDateTime
fileList = []
folderPathList = []
filesinfolder = False
input_data = sprit_utils._checkifpath(input_data)
#Read RS files
if source=='raw': #raw data with individual files per trace
if input_data.is_dir():
for child in input_data.iterdir():
if child.is_file() and child.name.startswith('AM') and str(doy).zfill(3) in child.name and str(year) in child.name:
filesinfolder = True
folderPathList.append(input_data)
fileList.append(child)
elif child.is_dir() and child.name.startswith('EH') and not filesinfolder:
folderPathList.append(child)
for c in child.iterdir():
if c.is_file() and c.name.startswith('AM') and c.name.endswith(str(doy).zfill(3)) and str(year) in c.name:
fileList.append(c)
if len(fileList) == 0:
doyList = []
printList= []
for j, folder in enumerate(folderPathList):
for i, file in enumerate(folder.iterdir()):
if j ==0:
doyList.append(str(year) + ' ' + str(file.name[-3:]))
printList.append(f"{datetime.datetime.strptime(doyList[i], '%Y %j').strftime('%b %d')} | Day of year: {file.name[-3:]}")
if len(printList) == 0:
warnings.warn('No files found matching Raspberry Shake data structure or files in specified directory.')
else:
warnings.warn(f'No file found for specified date: {params["acq_date"]}. The following days/files exist for specified year in this directory')
for p in printList:
print('\t',p)
return None
elif len(fileList) !=3:
warnings.warn('3 channels needed! {} found.'.format(len(folderPathList)), UserWarning)
else:
fileList.sort(reverse=True) # Puts z channel first
folderPathList.sort(reverse=True)
if verbose:
print('\n\tReading files: \n\t{}\n\t{}\n\t{}'.format(fileList[0].name, fileList[1].name, fileList[2].name))
traceList = []
for i, f in enumerate(fileList):
with warnings.catch_warnings():
warnings.filterwarnings(action='ignore', message='^readMSEEDBuffer()')
st = obspy.read(str(f))#, starttime=UTCDateTime(params['starttime']), endtime=UTCDateTime(params['endtime']), nearest_sample=False)
st = st.split()
st.trim(starttime=UTCDateTime(params['starttime']), endtime=UTCDateTime(params['endtime']), nearest_sample=False)
st.merge()
tr = (st[0])
#tr= obspy.Trace(tr.data,header=meta)
traceList.append(tr)
rawDataIN = obspy.Stream(traceList)
else:
rawDataIN = obspy.read(str(input_data), starttime=UTCDateTime(params['starttime']), endttime=UTCDateTime(params['endtime']), nearest_sample=True)
elif source=='dir': #files with 3 traces, but may be several in a directory or only directory name provided
OBSPY_FORMATS = ['AH','ALSEP_PSE','ALSEP_WTH','ALSEP_WTN','CSS','DMX','GCF','GSE1','GSE2','KINEMETRICS_EVT','MSEED','NNSA_KB_CORE','PDAS','PICKLE','Q','REFTEK130','RG16','SAC','SACXY','SEG2','SEGY','SEISAN','SH_ASC','SLIST','SU','TSPAIR','WAV','WIN','Y']
for file in input_data.iterdir():
ext = file.suffix[1:]
rawFormat = False
if ext.isnumeric():
if float(ext) >= 0 and float(ext) < 367:
rawFormat=True
if ext.upper() in OBSPY_FORMATS or rawFormat:
filesinfolder = True
folderPathList.append(input_data)
fileList.append(file.name)
filepaths = []
rawDataIN = obspy.Stream()
for i, f in enumerate(fileList):
filepaths.append(folderPathList[i].joinpath(f))
#filepaths[i] = pathlib.Path(filepaths[i])
currData = obspy.read(filepaths[i])
currData.merge()
#rawDataIN.append(currData)
#if i == 0:
# rawDataIN = currData.copy()
if isinstance(currData, obspy.core.stream.Stream):
rawDataIN += currData.copy()
#rawDataIN = obspy.Stream(rawDataIN)
if type(rawDataIN) is list and len(rawDataIN)==1:
rawDataIN = rawDataIN[0]
elif source=='file':
rawDataIN = obspy.read(str(input_data), starttime=UTCDateTime(params['starttime']), endttime=UTCDateTime(params['endtime']), nearest=True)
rawDataIN.merge()
elif isinstance(source, (list, tuple)):
print('List of sources not currently supported')
pass # Eventually do something
return rawDataIN
# Helper functions for remove_noise()
# Helper function for removing gaps
def __remove_gaps(stream, window_gaps_obspy):
"""Helper function for removing gaps"""
# combine overlapping windows
overlapList = []
for i in range(len(window_gaps_obspy)-2):
if window_gaps_obspy[i][1] > window_gaps_obspy[i+1][0]:
overlapList.append(i)
for i, t in enumerate(overlapList):
if i < len(window_gaps_obspy)-2:
window_gaps_obspy[i][1] = window_gaps_obspy[i+1][1]
window_gaps_obspy.pop(i+1)
# Add streams
window_gaps_s = []
for w, win in enumerate(window_gaps_obspy):
if w == 0:
pass
elif w == len(window_gaps_obspy)-1:
pass
else:
window_gaps_s.append(win[1]-win[0])
if len(window_gaps_s) > 0:
stream_windows = []
j = 0
for i, window in enumerate(window_gaps_s):
j=i
newSt = stream.copy()
stream_windows.append(newSt.trim(starttime=window_gaps_obspy[i][1], endtime=window_gaps_obspy[i+1][0]))
i = j + 1
newSt = stream.copy()
stream_windows.append(newSt.trim(starttime=window_gaps_obspy[i][1], endtime=window_gaps_obspy[i+1][0]))
for i, st in enumerate(stream_windows):
if i == 0:
outStream = st.copy()
else:
newSt = st.copy()
gap = window_gaps_s[i-1]
outStream = outStream + newSt.trim(starttime=st[0].stats.starttime - gap, pad=True, fill_value=None)
outStream.merge()
else:
outStream = stream.copy()
return outStream
# Helper function for getting windows to remove noise using stalta antitrigger method
def __remove_anti_stalta(stream, sta, lta, thresh, show_stalta_plot=False, verbose=False):
"""Helper function for getting windows to remove noise using stalta antitrigger method
Parameters
----------
stream : obspy.core.stream.Stream object
Input stream on which to perform noise removal
sta : int
Number of seconds to use as short term window, reads from remove_noise() function.
lta : int
Number of seconds to use as long term window, reads from remove_noise() function.
thresh : list
Two-item list or tuple with the thresholds for the stalta antitrigger.
Reads from remove_noise() function. The first value (index [0]) is the lower threshold (below which trigger is deactivated),
the second value (index [1] is the upper threshold (above which trigger is activated)), by default [8, 8]
show_plot : bool
If True, will plot the trigger and stalta values. Reads from remove_noise() function, by default False.
Returns
-------
outStream : obspy.core.stream.Stream object
Stream with a masked array for the data where 'noise' has been removed
"""
from obspy.signal.trigger import classic_sta_lta
if verbose:
print(f'\tRemoving noise using sta/lta antitrigger method: sta={sta}, lta={lta}, stalta_thresh={thresh}')
sampleRate = float(stream[0].stats.delta)
sta_samples = sta / sampleRate #Convert to samples
lta_samples = lta / sampleRate #Convert to samples
staltaStream = stream.copy()
cFunList = []
for t, tr in enumerate(staltaStream):
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=UserWarning)
cFunList.append(classic_sta_lta(tr, nsta=sta_samples, nlta=lta_samples))
if show_stalta_plot is True:
obspy.signal.trigger.plot_trigger(tr, cFunList[0], thresh[1], thresh[0])
elif type(show_stalta_plot) is int:
obspy.signal.trigger.plot_trigger(tr, cFunList[show_stalta_plot], thresh[1], thresh[0])
windows_samples = []
for t, cf in enumerate(cFunList):
if len(obspy.signal.trigger.trigger_onset(cf, thresh[1], thresh[0])) > 0:
windows_samples.extend(obspy.signal.trigger.trigger_onset(cf, thresh[1], thresh[0]).tolist())
def condense_window_samples(win_samples):
# Sort the list of lists based on the first element of each internal list
sorted_list = sorted(win_samples, key=lambda x: x[0])
# Initialize an empty result list
result = []
if len(win_samples) == 0:
return result
# Initialize variables to track the current range
start, end = sorted_list[0]
# Iterate over the sorted list
for i in range(1, len(sorted_list)):
current_start, current_end = sorted_list[i]
# If the current range overlaps with the previous range
if current_start <= end:
# Update the end of the current range
end = max(end, current_end)
else:
# Add the previous range to the result and update the current range
result.append([start, end])
start, end = current_start, current_end
# Add the last range to the result
result.append([start, end])
return result
windows_samples = condense_window_samples(windows_samples)
startT = stream[0].stats.starttime
endT = stream[0].stats.endtime
window_UTC = []
window_MPL = []
window_UTC.append([startT, startT])
for w, win in enumerate(windows_samples):
for i, t in enumerate(win):
if i == 0:
window_UTC.append([])
window_MPL.append([])
trigShift = sta
if trigShift > t * sampleRate:
trigShift = 0
tSec = t * sampleRate - trigShift
window_UTC[w+1].append(startT+tSec)
window_MPL[w].append(window_UTC[w][i].matplotlib_date)
window_UTC.append([endT, endT])
#window_MPL[w].append(window_UTC[w][i].matplotlib_date)
outStream = __remove_gaps(stream, window_UTC)
return outStream
# Helper function for getting windows to remove noise using moving stdev
def __remove_moving_std(stream, std_ratio_thresh=2, std_window_s=20, min_win_size=5, verbose=False):
"""Helper function for removing noisy data due to high local standard deviation.
This is similar to the default noise removal method used in Grilla software.
Parameters
----------
stream : obspy.Stream
Obspy stream that should be analyzed and segmented for noise removal
std_ratio_thresh : float, optional
Threshold ratio value to use for removing data.
Ratio is calculated as the total standard deviation (of entire trace) over
moving/local standard deviation (over rolling window specified by std_window_s), by default 2
std_window_s : float, optional
Size of the rolling window in seconds to use to calculate the local/moving/rolling standard deviation, by default 20
min_win_size : float, optional
The minimum size of window in seconds for data removal (where all points in that window exceed std_ratio_thresh), by default 5
Returns
-------
obspy.Stream
Obspy Stream object with "noisy" windows calculated by remove_moving_std masked, if applicable.
"""
instream = stream.copy()
outstream = instream.copy()
removeDTs = pd.DatetimeIndex([], tz='UTC') # Empty index to start
# Use pandas to simplify rolling/moving std
for tr in instream.split():
dtList = []
for t in tr.times(type="utcdatetime"):
dtList.append(t.datetime.replace(tzinfo=zoneinfo.ZoneInfo('UTC')))
# Create pandas series out of trace data
traceData = pd.Series(data=tr.data,
index=dtList)
# Get StDev values
totalSTD = traceData.std()
movingSTD = traceData.rolling(datetime.timedelta(seconds=std_window_s), center=True).std()
# Calculate whether ratio is larger than threshold value
boolseries = np.abs(movingSTD/totalSTD) > std_ratio_thresh
# Update single list with indices of just removed windows, from all traces
removeDTs = removeDTs.join(boolseries.iloc[np.nonzero(boolseries)[0]].index, how='outer')
# Get unique indices as datetime.datetime objects
removeDTs = removeDTs.unique() # Get unique dtindex
removeDTs.sort_values() # Sort dt index
removeDTs = removeDTs.to_pydatetime() # Convert to np.array of datetime.datetime objs
delta = stream[0].stats.delta # Get sample step
# Convert instances of mstd/totstd > thresh to windows (keep if longer than min_win_size)
windows = [[None, None]]
windex = 0
windStarted = False
for i, pt4removal in enumerate(boolseries):
currPtPyDT = boolseries.index[i].to_pydatetime()
if i==0:
# Intialize windows list
windStartPDTime = boolseries.index[i]
#print(i, pt4removal, currPtPyDT, boolseries.index[i])
# Algorithim to run once a window has started
if windStarted:
if not pt4removal: #If we have a window started and this is not a point slated for removal
# First check if it is too small of a window
if (currPtPyDT - windows[windex][0]).total_seconds() < min_win_size:
# Erase the started window and continue
windStarted = False
windows[windex] = [None, None]
continue
else:
windows[windex][1] = prevPt2RemovePD.to_pydatetime()
windows.append([None, None])
windex += 1
windStarted = False
else:
prevPt2RemovePD = boolseries.index[i]
# Algorith to run if no window has been started
else:
if pt4removal:
windows[windex][0] = currPtPyDT
windStarted = True
windStartPDTime = boolseries.index[i]
prevPt2RemovePD = boolseries.index[i]
windows = windows[:-1]
if len(windows) == 1 and (windows[0][0]==None or windows==[]):
if verbose:
print('\t\t No windows removed with moving std method.')
else:
# Need to convert these to UTC windows now!
removeUTC = []
for swin, ewin in windows:
removeUTC.append([obspy.UTCDateTime(swin), obspy.UTCDateTime(ewin)])
stime = outstream.split()[0].stats.starttime
etime = outstream.split()[-1].stats.endtime
removeUTC.insert(0, [stime, stime])
removeUTC.append([etime, etime])
#for win0, win1 in removeUTC:
# print(win0, win1, win1>win0)
outstream = __remove_gaps(outstream, removeUTC)
return outstream
# Remove noise saturation
def __remove_noise_saturate(stream, sat_percent, min_win_size, verbose=False):
"""Function to remove "saturated" data points that exceed a certain percent (sat_percent) of the maximum data value in the stream.
Parameters
----------
stream : obspy.Stream
Obspy Stream of interest
sat_percent : float
Percentage of the maximum amplitude, which will be used as the saturation threshold above which data points will be excluded
min_win_size : float
The minumum size a window must be (in seconds) for it to be removed
Returns
-------
obspy.Stream
Stream with masked array (if data removed) with "saturated" data removed
"""
if verbose:
print(f'\tRemoving noise using noise saturation method: sat_percent={sat_percent}, min_win_size={min_win_size}')
if sat_percent > 1:
sat_percent = sat_percent / 100
removeInd = np.array([], dtype=int)
for trace in stream:
dataArr = trace.data.copy()
sample_rate = trace.stats.delta
#Get max amplitude value
maxAmp = np.max(np.absolute(dataArr, where = not None))
thresholdAmp = maxAmp * sat_percent
cond = np.nonzero(np.absolute(dataArr, where=not None) > thresholdAmp)[0]
removeInd = np.hstack([removeInd, cond])
#trace.data = np.ma.where(np.absolute(data, where = not None) > (noise_percent * maxAmp), None, data)
#Combine indices from all three traces
removeInd = np.unique(removeInd)
removeList = [] # initialize
min_win_samples = int(min_win_size / sample_rate)
if len(removeInd) > 0:
startInd = removeInd[0]
endInd = removeInd[0]
for i in range(0, len(removeInd)):
if removeInd[i] - removeInd[i-1] > 1:
if endInd - startInd >= min_win_samples:
removeList.append([int(startInd), int(endInd)])
startInd = removeInd[i]
endInd = removeInd[i]
removeList.append([-1, -1]) #figure out a way to get rid of this
#Convert removeList from samples to seconds after start to UTCDateTime
sampleRate = stream[0].stats.delta
startT = stream[0].stats.starttime
endT = stream[0].stats.endtime
removeSec = []
removeUTC = []
removeUTC.append([startT, startT])
for i, win in enumerate(removeList):
removeSec.append(list(np.round(sampleRate * np.array(win),6)))
removeUTC.append(list(np.add(startT, removeSec[i])))
removeUTC[-1][0] = removeUTC[-1][1] = endT
outstream = __remove_gaps(stream, removeUTC)
return outstream
# Helper function for removing data using the noise threshold input from remove_noise()
def __remove_noise_thresh(stream, noise_percent=0.8, lta=30, min_win_size=1, verbose=False):
"""Helper function for removing data using the noise threshold input from remove_noise()
The purpose of the noise threshold method is to remove noisy windows (e.g., lots of traffic all at once).
This function uses the lta value (which can be specified here), and finds times where the lta value is at least at the noise_percent level of the max lta value for at least a specified time (min_win_size)
Parameters
----------
stream : obspy.core.stream.Stream object
Input stream from which to remove windows. Passed from remove_noise().
noise_percent : float, default=0.995
Percentage (between 0 and 1), to use as the threshold at which to remove data. This is used in the noise threshold method. By default 0.995.
If a value is passed that is greater than 1, it will be divided by 100 to obtain the percentage. Passed from remove_noise().
lta : int, default = 30
Length of lta to use (in seconds)
min_win_size : int, default = 1
Minimum amount of time (in seconds) at which noise is above noise_percent level.
Returns
-------
outStream : obspy.core.stream.Stream object
Stream with a masked array for the data where 'noise' has been removed. Passed to remove_noise().
"""
if verbose:
print(f'\tRemoving noise using continuous noise threshold method: sat_percent={noise_percent}, lta={lta}')
if noise_percent > 1:
noise_percent = noise_percent / 100
removeInd = np.array([], dtype=int)
for trace in stream:
dataArr = trace.data.copy()
sample_rate = trace.stats.delta
lta_samples = int(lta / sample_rate)
#Get lta values across traces data
window_size = lta_samples
if window_size == 0:
window_size = 1
kernel = np.ones(window_size) / window_size
maskedArr = np.ma.array(dataArr, dtype=float, fill_value=None)
ltaArr = np.convolve(maskedArr, kernel, mode='same')
#Get max lta value
maxLTA = np.max(ltaArr, where = not None)
cond = np.nonzero(np.absolute(ltaArr, where=not None) > (noise_percent * maxLTA))[0]
removeInd = np.hstack([removeInd, cond])
#trace.data = np.ma.where(np.absolute(data, where = not None) > (noise_percent * maxAmp), None, data)
#Combine indices from all three traces
removeInd = np.unique(removeInd)
# Make sure we're not removing single indices (we only want longer than min_win_size)
removeList = [] # initialize
min_win_samples = int(min_win_size / sample_rate)
if len(removeInd) > 0:
startInd = removeInd[0]
endInd = removeInd[0]
for i in range(0, len(removeInd)):
#If indices are non-consecutive...
if removeInd[i] - removeInd[i-1] > 1:
#If the indices are non-consecutive and the
if endInd - startInd >= min_win_samples:
removeList.append([int(startInd), int(endInd)])
#Set startInd as the current index
startInd = removeInd[i]
endInd = removeInd[i]
removeList.append([-1, -1])
sampleRate = stream[0].stats.delta
startT = stream[0].stats.starttime
endT = stream[0].stats.endtime
removeSec = []
removeUTC = []
removeUTC.append([startT, startT])
for i, win in enumerate(removeList):
removeSec.append(list(np.round(sampleRate * np.array(win),6)))
removeUTC.append(list(np.add(startT, removeSec[i])))
removeUTC[-1][0] = removeUTC[-1][1] = endT
outstream = __remove_gaps(stream, removeUTC)
return outstream
# Helper function for removing data during warmup (when seismometers are still initializing) and "cooldown" (when there may be noise from deactivating seismometer) time, if desired
def __remove_warmup_cooldown(stream, warmup_time = 0, cooldown_time = 0, verbose=False):
"""Private helper function to remove data from the start and/or end of each site
Parameters
----------
stream : obspy.Stream()
Input stream to use for analysis for noise removal
warmup_time : int, optional
Time in seconds at the start of the record to remove from analysis, by default 0
cooldown_time : int, optional
Time in seconds at the end of the record to remove from analysis, by default 0
verbose : bool, optional
Whether to print information about the process to the terminal, by default False
Returns
-------
obspy.Stream()
obspy.Stream() with masked arrays for the data where removed/kept.
"""
if verbose:
print(f"\tRemoving noise using warmup/cooldown buffers: warmup_time={warmup_time} s, cooldown_time={cooldown_time} s ")
sampleRate = float(stream[0].stats.delta)
outStream = stream.copy()
warmup_samples = int(warmup_time / sampleRate) #Convert to samples
windows_samples=[]
for tr in stream:
totalSamples = len(tr.data)-1#float(tr.stats.endtime - tr.stats.starttime) / tr.stats.delta
cooldown_samples = int(totalSamples - (cooldown_time / sampleRate)) #Convert to samples
# Initiate list with warmup and cooldown samples
windows_samples = [[0, warmup_samples],[cooldown_samples, totalSamples]]
# Remove cooldown and warmup samples if there is none indicated (default of 0 for both)
if cooldown_time == 0:
windows_samples.pop(1)
if warmup_time == 0:
windows_samples.pop(0)
if windows_samples == []:
# If no warmup or cooldown indicated, don't do anything
pass
else:
# Otherwise, get the actual starttime (UTCDateTime)
startT = stream[0].stats.starttime
endT = stream[-1].stats.endtime
window_UTC = []
window_MPL = []
print("warmup starttime", startT)
# Initiate list with starttimes
for w, win in enumerate(windows_samples):
# win is a list with start/end time for each buffer, in samples
for j, tm in enumerate(win):
# For each side (warmup or cooldown), add a new item
# There will be 2 list items for warmup, 2 for cooldown (extra is for "padding")
if j == 0:
window_UTC.append([])
window_MPL.append([])
tSec = tm * sampleRate
# Get the UTC time for the new item
window_UTC[w].append(startT+tSec)
window_MPL[w].append(window_UTC[w][j].matplotlib_date)
# "pad" list with endtime
window_UTC.insert(0, [startT, startT])
window_UTC.append([endT, endT])
outStream = __remove_gaps(stream, window_UTC)
return outStream
# Helper function for selecting windows
def _keep_processing_windows(stream, processing_window=[":"], verbose=False):
"""Keep processing windows
Parameters
----------
stream : obspy.Stream()
Stream
processing_window : list, optional
Processing window list, by default [":"]
verbose : bool, optional
Whether to print information about the removal to the terminal
Returns
-------
obspy.Stream()
Obspy stream object with selected windows retained and all else removed
"""
if verbose:
print(f"\tRemoving noise outside the indicated processing window(s): processing_window={processing_window}")
instream = stream
allList = [':', 'all', 'everything']
year = stream[0].stats.starttime.year
month = stream[0].stats.starttime.month
day = stream[0].stats.starttime.day
if not isinstance(processing_window, (tuple, list)):
processing_window = [processing_window]
windows_to_get = []
for p in processing_window:
if str(p).lower() in allList:
return instream
if isinstance(p, (tuple, list)):
windows_to_get.append([])
if isinstance(p[0], (obspy.UTCDateTime, datetime.datetime)) and isinstance(p[1], (obspy.UTCDateTime, datetime.datetime)):
windows_to_get[-1].append(obspy.UTCDateTime(p[0]))
windows_to_get[-1].append(obspy.UTCDateTime(p[1]))
else:
windows_to_get[-1].append(obspy.UTCDateTime(sprit_utils._format_time(p[0], tzone='UTC')))
windows_to_get[-1].append(obspy.UTCDateTime(sprit_utils._format_time(p[1], tzone='UTC')))
# Make sure time are on the right day
windows_to_get[-1][0] = obspy.UTCDateTime(year, month, day, windows_to_get[-1][0].hour, windows_to_get[-1][0].minute, windows_to_get[-1][0].second)
windows_to_get[-1][1] = obspy.UTCDateTime(year, month, day, windows_to_get[-1][1].hour, windows_to_get[-1][1].minute, windows_to_get[-1][1].second)
else:
if len(processing_window) == 2:
windows_to_get = [[obspy.UTCDateTime(sprit_utils._format_time(processing_window[0], tzone='UTC')),
obspy.UTCDateTime(sprit_utils._format_time(processing_window[1], tzone='UTC'))]]
else:
print(f'The processing_window parameter of remove_noise was set as {processing_window}')
print("The processing_window parameter must be a list or tuple with a start and end time or with lists/tuples of start/end times.")
print('processing_window noise removal method not applied')
return instream
# windows_to_get should be a list of two-item lists with UTCDateTime objects no matter how it came in
stime = instream[0].stats.starttime
etime = instream[-1].stats.endtime
windows_to_get.insert(0, [stime, stime])
windows_to_get.append([etime, etime])
# Need the list formatted slightly different, use window_UTC
window_UTC = []
# Rearrange
for i, win in enumerate(windows_to_get):
if i == 0:
window_UTC.append([stime, windows_to_get[i+1][0]])
elif i < len(windows_to_get) - 1:
window_UTC.append([win[1], windows_to_get[i+1][0]])
window_UTC.insert(0, windows_to_get[0])
window_UTC.append(windows_to_get[-1])
outStream = __remove_gaps(stream, window_UTC)
return outStream
# Plot noise windows
def _plot_noise_windows(hvsr_data, fig=None, ax=None, clear_fig=False, fill_gaps=None,
do_stalta=False, sta=5, lta=30, stalta_thresh=[0.5,5],
do_pctThresh=False, sat_percent=0.8, min_win_size=1,
do_noiseWin=False, noise_percent=0.995,
do_warmup=False, warmup_time=0, cooldown_time=0,
return_dict=False, use_tkinter=False):
if clear_fig: #Intended use for tkinter
#Clear everything
for key in ax:
ax[key].clear()
fig.clear()
#Really make sure it's out of memory
fig = []
ax = []
try:
fig.get_children()
except:
pass
try:
ax.get_children()
except:
pass
if use_tkinter:
try:
pass #Don't think this is being used anymore, defined in sprit_gui separately
#ax=ax_noise #self.ax_noise #?
#fig=fig_noise
except:
pass
#Reset axes, figure, and canvas widget
noise_mosaic = [['spec'],['spec'],['spec'],
['spec'],['spec'],['spec'],
['signalz'],['signalz'], ['signaln'], ['signale']]
fig, ax = plt.subplot_mosaic(noise_mosaic, sharex=True)
#self.noise_canvas = FigureCanvasTkAgg(fig, master=canvasFrame_noise)
#self.noise_canvasWidget.destroy()
#self.noise_canvasWidget = self.noise_canvas.get_tk_widget()#.pack(side=tk.TOP, fill=tk.BOTH, expand=1)
#self.noise_canvasWidget.pack(fill='both')#.grid(row=0, column=0, sticky='nsew')
fig.canvas.draw()
fig, ax = sprit_plot._plot_input_stream_mpl(stream=hvsr_data['stream'], hv_data=hvsr_data, fig=fig, ax=ax, component='Z', stack_type='linear', detrend='mean', fill_gaps=fill_gaps, dbscale=True, return_fig=True, cmap_per=[0.1, 0.9])
fig.canvas.draw()
#Set initial input
input = hvsr_data['stream']
if do_stalta:
hvsr_data['stream'] = remove_noise(hvsr_data=input, remove_method='stalta', sta=sta, lta=lta, stalta_thresh=stalta_thresh)
input = hvsr_data['stream']
if do_pctThresh:
hvsr_data['stream'] = remove_noise(hvsr_data=input, remove_method='saturation', sat_percent=sat_percent, min_win_size=min_win_size)
input = hvsr_data['stream']
if do_noiseWin:
hvsr_data['stream'] = remove_noise(hvsr_data=input, remove_method='noise', noise_percent=noise_percent, lta=lta, min_win_size=min_win_size)
input = hvsr_data['stream']
if do_warmup:
hvsr_data['stream'] = remove_noise(hvsr_data=input, remove_method='warmup', warmup_time=warmup_time, cooldown_time=cooldown_time)
fig, ax, noise_windows_line_artists, noise_windows_window_artists = _get_removed_windows(input=hvsr_data, fig=fig, ax=ax, time_type='matplotlib')
fig.canvas.draw()
plt.show()
if return_dict:
hvsr_data['Windows_Plot'] = (fig, ax)
return hvsr_data
return
# Helper function for manual window selection
def __draw_boxes(event, clickNo, xWindows, pathList, windowDrawn, winArtist, lineArtist, x0, fig, ax):
"""Helper function for manual window selection to draw boxes to show where windows have been selected for removal"""
#Create an axis dictionary if it does not already exist so all functions are the same
if isinstance(ax, np.ndarray) or isinstance(ax, dict):
ax = ax
else:
ax = {'a':ax}
if len(ax) > 1:
if type(ax) is not dict:
axDict = {}
for i, a in enumerate(ax):
axDict[str(i)] = a
ax = axDict
#else:
# ax = {'a':ax}
#if event.inaxes!=ax: return
#y0, y1 = ax.get_ylim()
y0 = []
y1 = []
kList = []
for k in ax.keys():
kList.append(k)
y0.append(ax[k].get_ylim()[0])
y1.append(ax[k].get_ylim()[1])
#else:
# y0 = [ax.get_ylim()[0]]
# y1 = [ax.get_ylim()[1]]
if clickNo == 0:
#y = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1], 2)
x0 = event.xdata
clickNo = 1
lineArtist.append([])
winNums = len(xWindows)
for i, k in enumerate(ax.keys()):
linArt = ax[k].axvline(x0, 0, 1, color='k', linewidth=1, zorder=100)
lineArtist[winNums].append([linArt, linArt])
#else:
# linArt = plt.axvline(x0, y0[i], y1[i], color='k', linewidth=1, zorder=100)
# lineArtist.append([linArt, linArt])
else:
x1 = event.xdata
clickNo = 0
windowDrawn.append([])
winArtist.append([])
pathList.append([])
winNums = len(xWindows)
for i, key in enumerate(kList):
path_data = [
(matplotlib.path.Path.MOVETO, (x0, y0[i])),
(matplotlib.path.Path.LINETO, (x1, y0[i])),
(matplotlib.path.Path.LINETO, (x1, y1[i])),
(matplotlib.path.Path.LINETO, (x0, y1[i])),
(matplotlib.path.Path.LINETO, (x0, y0[i])),
(matplotlib.path.Path.CLOSEPOLY, (x0, y0[i])),
]
codes, verts = zip(*path_data)
path = matplotlib.path.Path(verts, codes)
windowDrawn[winNums].append(False)
winArtist[winNums].append(None)
pathList[winNums].append(path)
__draw_windows(event=event, pathlist=pathList, ax_key=key, windowDrawn=windowDrawn, winArtist=winArtist, xWindows=xWindows, fig=fig, ax=ax)
linArt = plt.axvline(x1, 0, 1, color='k', linewidth=0.5, zorder=100)
[lineArtist[winNums][i].pop(-1)]
lineArtist[winNums][i].append(linArt)
x_win = [x0, x1]
x_win.sort() #Make sure they are in the right order
xWindows.append(x_win)
fig.canvas.draw()
return clickNo, x0
# Helper function for manual window selection to draw boxes to deslect windows for removal
def __remove_on_right(event, xWindows, pathList, windowDrawn, winArtist, lineArtist, fig, ax):
"""Helper function for manual window selection to draw boxes to deslect windows for removal"""
if xWindows is not None:
for i, xWins in enumerate(xWindows):
if event.xdata > xWins[0] and event.xdata < xWins[1]:
linArtists = lineArtist[i]
pathList.pop(i)
for j, a in enumerate(linArtists):
winArtist[i][j].remove()#.pop(i)
lineArtist[i][j][0].remove()#.pop(i)#[i].pop(j)
lineArtist[i][j][1].remove()
windowDrawn.pop(i)
lineArtist.pop(i)#[i].pop(j)
winArtist.pop(i)#[i].pop(j)
xWindows.pop(i)
fig.canvas.draw()
# Helper function for updating the canvas and drawing/deleted the boxes
def __draw_windows(event, pathlist, ax_key, windowDrawn, winArtist, xWindows, fig, ax):
"""Helper function for updating the canvas and drawing/deleted the boxes"""
for i, pa in enumerate(pathlist):
for j, p in enumerate(pa):
if windowDrawn[i][j]:
pass
else:
patch = matplotlib.patches.PathPatch(p, facecolor='k', alpha=0.75)
winArt = ax[ax_key].add_patch(patch)
windowDrawn[i][j] = True
winArtist[i][j] = winArt
if event.button is MouseButton.RIGHT:
fig.canvas.draw()
# Helper function for getting click event information
def __on_click(event):
"""Helper function for getting click event information"""
global clickNo
global x0
if event.button is MouseButton.RIGHT:
__remove_on_right(event, xWindows, pathList, windowDrawn, winArtist, lineArtist, fig, ax)
if event.button is MouseButton.LEFT:
clickNo, x0 = __draw_boxes(event, clickNo, xWindows, pathList, windowDrawn, winArtist, lineArtist, x0, fig, ax)
# Function to select windows using original stream specgram/plots
def _select_windows(input):
"""Function to manually select windows for exclusion from data.
Parameters
----------
input : dict
Dictionary containing all the hvsr information.
Returns
-------
xWindows : list
List of two-item lists containing start and end times of windows to be removed.
"""
from matplotlib.backend_bases import MouseButton
import matplotlib.pyplot as plt
import matplotlib
import time
global fig
global ax
if isinstance(input, (HVSRData, dict)):
if 'hvsr_curve' in input.keys():
fig = plot_hvsr(hvsr_data=input, plot_type='spec', return_fig=True, cmap='turbo')
hvsr_data = input
input_stream = hvsr_data['stream']
#else:
# hvsr_data = input#.copy()
# input_stream = hvsr_data['stream']
if isinstance(input_stream, obspy.core.stream.Stream):
fig = sprit_plot._plot_input_stream_mpl(input_stream, component=['Z'], return_fig=True)
ax = fig.get_axes()
if len(ax)==1:
ax = ax[0]
elif isinstance(input_stream, obspy.core.trace.Trace):
fig = sprit_plot._plot_input_stream_mpl(input_stream, return_fig=True)
global lineArtist
global winArtist
global windowDrawn
global pathList
global xWindows
global clickNo
global x0
x0=0
clickNo = 0
xWindows = []
pathList = []
windowDrawn = []
winArtist = []
lineArtist = []
global fig_closed
fig_closed = False
while fig_closed is False:
fig.canvas.mpl_connect('button_press_event', __on_click)#(clickNo, xWindows, pathList, windowDrawn, winArtist, lineArtist, x0, fig, ax))
fig.canvas.mpl_connect('close_event', _on_fig_close)#(clickNo, xWindows, pathList, windowDrawn, winArtist, lineArtist, x0, fig, ax))
plt.pause(1)
hvsr_data['x_windows_out'] = xWindows
hvsr_data['fig_noise'] = fig
hvsr_data['ax_noise'] = ax
return hvsr_data
# Support function to help select_windows run properly
def _on_fig_close(event):
global fig_closed
fig_closed = True
return
# Shows windows with None on input plot
def _get_removed_windows(input, fig=None, ax=None, lineArtist =[], winArtist = [], existing_lineArtists=[], existing_xWindows=[], exist_win_format='matplotlib', keep_line_artists=True, time_type='matplotlib',show_plot=False):
"""This function is for getting Nones from masked arrays and plotting them as windows"""
if fig is None and ax is None:
fig, ax = plt.subplots()
if isinstance(input, (dict, HVSRData)):
stream = input['stream'].copy()
elif isinstance(input, (obspy.core.trace.Trace, obspy.core.stream.Stream)):
stream = input.copy()
else:
pass #Warning?
samplesList = ['sample', 'samples', 's']
utcList = ['utc', 'utcdatetime', 'obspy', 'u', 'o']
matplotlibList = ['matplotlib', 'mpl', 'm']
#Get masked indices of trace(s)
trace = stream.merge()[0]
sample_rate = trace.stats.delta
windows = []
#windows.append([0,np.nan])
#mask = np.isnan(trace.data) # Create a mask for None values
#masked_array = np.ma.array(trace.data, mask=mask).copy()
masked_array = trace.data.copy()
if isinstance(masked_array, np.ma.MaskedArray):
masked_array = masked_array.mask.nonzero()[0]
lastMaskInd = masked_array[0]-1
wInd = 0
for i in range(0, len(masked_array)-1):
maskInd = masked_array[i]
if maskInd-lastMaskInd > 1 or i==0:
windows.append([np.nan, np.nan])
if i==0:
windows[wInd][0] = masked_array[i]
else:
windows[wInd-1][1] = masked_array[i - 1]
windows[wInd][0] = masked_array[i]
wInd += 1
lastMaskInd = maskInd
windows[wInd-1][1] = masked_array[-1] #Fill in last masked value (wInd-1 b/c wInd+=1 earlier)
winTypeList = ['gaps'] * len(windows)
#Check if the windows are just gaps
if len(existing_xWindows) > 0:
existWin = []
#Check if windows are already being taken care of with the gaps
startList = []
endList = []
for start, end in windows:
startList.append((trace.stats.starttime + start*sample_rate).matplotlib_date)
endList.append((trace.stats.starttime + end*sample_rate).matplotlib_date)
for w in existing_xWindows:
removed=False
if w[0] in startList and w[1] in endList:
existing_xWindows.remove(w)
removed=True
if exist_win_format.lower() in matplotlibList and not removed:
sTimeMPL = trace.stats.starttime.matplotlib_date #Convert time to samples from starttime
existWin.append(list(np.round((w - sTimeMPL)*3600*24/sample_rate)))
windows = windows + existWin
existWinTypeList = ['removed'] * len(existWin)
winTypeList = winTypeList + existWinTypeList
#Reformat ax as needed
if isinstance(ax, np.ndarray):
origAxes = ax.copy()
newAx = {}
for i, a in enumerate(ax):
newAx[i] = a
axes = newAx
elif isinstance(ax, dict):
origAxes = ax
axes = ax
else:
origAxes = ax
axes = {'ax':ax}
for i, a in enumerate(axes.keys()):
ax = axes[a]
pathList = []
windowDrawn = []
winArtist = []
if existing_lineArtists == []:
lineArtist = []
elif len(existing_lineArtists)>=1 and keep_line_artists:
lineArtist = existing_lineArtists
else:
lineArtist = []
for winNums, win in enumerate(windows):
if time_type.lower() in samplesList:
x0 = win[0]
x1 = win[1]
elif time_type.lower() in utcList or time_type.lower() in matplotlibList:
#sample_rate = trace.stats.delta
x0 = trace.stats.starttime + (win[0] * sample_rate)
x1 = trace.stats.starttime + (win[1] * sample_rate)
if time_type.lower() in matplotlibList:
x0 = x0.matplotlib_date
x1 = x1.matplotlib_date
else:
warnings.warn(f'time_type={time_type} not recognized. Defaulting to matplotlib time formatting')
x0 = trace.stats.starttime + (win[0] * sample_rate)
x1 = trace.stats.starttime + (win[1] * sample_rate)
x0 = x0.matplotlib_date
x1 = x1.matplotlib_date
y0, y1 = ax.get_ylim()
path_data = [
(matplotlib.path.Path.MOVETO, (x0, y0)),
(matplotlib.path.Path.LINETO, (x1, y0)),
(matplotlib.path.Path.LINETO, (x1, y1)),
(matplotlib.path.Path.LINETO, (x0, y1)),
(matplotlib.path.Path.LINETO, (x0, y0)),
(matplotlib.path.Path.CLOSEPOLY, (x0, y0)),
]
codes, verts = zip(*path_data)
path = matplotlib.path.Path(verts, codes)
#
windowDrawn.append(False)
winArtist.append(None)
lineArtist.append([])
if winTypeList[winNums] == 'gaps':
clr = '#b13d41'
elif winTypeList[winNums] == 'removed':
clr = 'k'
else:
clr = 'yellow'
linArt0 = ax.axvline(x0, y0, y1, color=clr, linewidth=0.5, zorder=100)
linArt1 = plt.axvline(x1, y0, y1, color=clr, linewidth=0.5, zorder=100)
lineArtist[winNums].append([linArt0, linArt1])
#
pathList.append(path)
for i, pa in enumerate(pathList):
if windowDrawn[i]:
pass
else:
patch = matplotlib.patches.PathPatch(pa, facecolor=clr, alpha=0.75)
winArt = ax.add_patch(patch)
windowDrawn[i] = True
winArtist[i] = winArt
#Reformat ax as needed
if isinstance(origAxes, np.ndarray):
origAxes[i] = ax
elif isinstance(origAxes, dict):
origAxes[a] = ax
else:
origAxes = ax
ax = origAxes
fig.canvas.draw()
if show_plot:
plt.show()
return fig, ax, lineArtist, winArtist
# Helper function for removing windows from data, leaving gaps
def __remove_windows(stream, window_list, warmup_time):
"""Helper function that actually does the work in obspy to remove the windows calculated in the remove_noise function
s
Parameters
----------
stream : obspy.core.stream.Stream object
Input stream from which to remove windows
window_list : list
A list of windows with start and end times for the windows to be removed
warmup_time : int, default = 0
Passed from remove_noise, the amount of time in seconds to allow for warmup. Anything before this is removed as 'noise'.
Returns
-------
outStream : obspy.core.stream.Stream object
Stream with a masked array for the data where 'noise' has been removed
"""
og_stream = stream.copy()
#Find the latest start time and earliest endtime of all traces (in case they aren't consistent)
maxStartTime = obspy.UTCDateTime(-1e10) #Go back pretty far (almost 400 years) to start with
minEndTime = obspy.UTCDateTime(1e10)
for comp in ['E', 'N', 'Z']:
tr = stream.select(component=comp).copy()
if tr[0].stats.starttime > maxStartTime:
maxStartTime = tr[0].stats.starttime
if tr[0].stats.endtime < minEndTime:
minEndTime = tr[0].stats.endtime
#Trim all traces to the same start/end time
stream.trim(starttime=maxStartTime, endtime=minEndTime)
#Sort windows by the start of the window
sorted_window_list = []
windowStart = []
for i, window in enumerate(window_list):
windowStart.append(window[0])
windowStart_og = windowStart.copy()
windowStart.sort()
sorted_start_list = windowStart
ranks = [windowStart_og.index(item) for item in sorted_start_list]
for r in ranks:
sorted_window_list.append(window_list[r])
for i, w in enumerate(sorted_window_list):
if i < len(sorted_window_list) - 1:
if w[1] > sorted_window_list[i+1][0]:
warnings.warn(f"Warning: Overlapping windows. Please start over and reselect windows to be removed or use a different noise removal method: {w[1]} '>' {sorted_window_list[i+1][0]}")
return
window_gaps_obspy = []
window_gaps = []
buffer_time = np.ceil((stream[0].stats.endtime-stream[0].stats.starttime)*0.01)
#Get obspy.UTCDateTime objects for the gap times
window_gaps_obspy.append([stream[0].stats.starttime + warmup_time, stream[0].stats.starttime + warmup_time])
for i, window in enumerate(sorted_window_list):
for j, item in enumerate(window):
if j == 0:
window_gaps_obspy.append([0,0])
window_gaps_obspy[i+1][j] = obspy.UTCDateTime(matplotlib.dates.num2date(item))
window_gaps.append((window[1]-window[0])*86400)
window_gaps_obspy.append([stream[0].stats.endtime-buffer_time, stream[0].stats.endtime-buffer_time])
#Note, we added start and endtimes to obpsy list to help with later functionality
#Clean up stream windows (especially, start and end)
for i, window in enumerate(window_gaps):
newSt = stream.copy()
#Check if first window starts before end of warmup time
#If the start of the first exclusion window is before the warmup_time is over
if window_gaps_obspy[i+1][0] - newSt[0].stats.starttime < warmup_time:
#If the end of first exclusion window is also before the warmup_time is over
if window_gaps_obspy[i+1][1] - newSt[0].stats.starttime < warmup_time:
#Remove that window completely, it is unnecessary
window_gaps.pop(i)
window_gaps_obspy.pop(i+1)
#...and reset the entire window to start at the warmup_time end
window_gaps_obspy[0][0] = window_gaps_obspy[0][1] = newSt[0].stats.starttime + warmup_time
continue
else: #if window overlaps the start of the stream after warmup_time
#Remove that window
window_gaps.pop(i)
#...and reset the start of the window to be the end of warm up time
#...and remove that first window from the obspy list
window_gaps_obspy[0][0] = window_gaps_obspy[0][1] = window_gaps_obspy[i+1][1]#newSt[0].stats.starttime + warmup_time
window_gaps_obspy.pop(i+1)
if stream[0].stats.endtime - window_gaps_obspy[i+1][1] > stream[0].stats.endtime - buffer_time:
if stream[0].stats.endtime - window_gaps_obspy[i+1][0] > stream[0].stats.endtime - buffer_time:
window_gaps.pop(i)
window_gaps_obspy.pop(i+1)
else: #if end of window overlaps the buffer time, just end it at the start of the window (always end with stream, not gap)
window_gaps.pop(i)
window_gaps_obspy[-1][0] = window_gaps_obspy[-1][1] = newSt[0].stats.endtime - buffer_time
#Add streams
stream_windows = []
j = 0
for i, window in enumerate(window_gaps):
j=i
newSt = stream.copy()
stream_windows.append(newSt.trim(starttime=window_gaps_obspy[i][1], endtime=window_gaps_obspy[i+1][0]))
i = j + 1
newSt = stream.copy()
stream_windows.append(newSt.trim(starttime=window_gaps_obspy[i][1], endtime=window_gaps_obspy[i+1][0]))
for i, st in enumerate(stream_windows):
if i == 0:
outStream = st.copy()
else:
newSt = st.copy()
gap = window_gaps[i-1]
outStream = outStream + newSt.trim(starttime=st[0].stats.starttime - gap, pad=True, fill_value=None)
outStream.merge()
return outStream
# Helper functions for remove_outlier_curves()
# Use DBSCAN algorithm for outlier detection
def __dbscan_outlier_detect(hvsr_data, use_hv_curves=True, use_percentile=True,
dist_metric='euclidean',
neighborhood_size=50, min_neighborhood_pts=5,
col_names=['HV_Curves'], comp_names=['Z', 'E', 'N'],
col_prefix = 'HV_Curves',
verbose=False):
"""
This is a helper function for remove_outlier_curves() to use a DBSCAN algorithm
to identify and discard outlier curves.
Parameters
----------
hvsr_data : HVSRData
HVSRData instance on which to perform DBSCAN analysis
use_hv_curves : bool, optional
Whether to use HV_Curves as the curve set of interest, by default True
dist_metric : str, optional
Distance metric to use (see scipy.spatial.distance.pdist), by default 'euclidean'
neighborhood_size : int, optional
Percentile value to use in selecting neighborhood cutoff size.
100 would use the largest distance in the distance matrix. 0 would use the smallest (0), by default 95
min_neighborhood_pts : int, optional
Minimum number of points in a curve's neighborhood for that point to be considered a core point, by default 5
Returns
-------
HVSRData
HVSRData instance with the hvsr_windows_df DataFrame "Use" column updated
"""
# Get the correct set of curves to use
# This can be generalized better (and adapted for azimuthal values)
#if use_hv_curves:
# curveCols = ['HV_Curves']
#else:
# curveCols = ['psd_values_Z', 'psd_values_E', 'psd_values_N']
# Clean up percentile value
if use_percentile:
if neighborhood_size < 0 or neighborhood_size > 100:
print("\tNeighborhood_percentile must be between 0-100, not ", neighborhood_size)
print('\t Resetting neighborhood_size to 95')
neighborhood_size = 95
elif neighborhood_size > 0 and neighborhood_size < 1:
neighborhood_size = neighborhood_size * 100
# Define local function to use general dbscan algorithm for identifying outliers
def _dbscan_outliers(distance_matrix, n_size, min_pts, _use_percentile=True):
n = dist_matrix.shape[0]
has_neighbors = np.ones(n, dtype=bool)
# Get epsilon based on whether it is a percentile
if _use_percentile:
eps = np.percentile(dist_matrix, n_size)
else:
eps = n_size
for i in range(n):
neighbors = np.where(dist_matrix[i] <= eps)[0]
if len(neighbors)-1 < min_pts:
has_neighbors[i] = False
#print(i, len(neighbors), has_neighbors[i])
return has_neighbors
for i, column in enumerate(col_names):
if column in comp_names:
if use_hv_curves == False:
column = col_prefix + column
else:
column = column
# Iterate through curves of interest
for i, column in enumerate(col_names):
if column in comp_names:
if use_hv_curves == False:
column = col_prefix + column
else:
column = column
curves = np.stack(hvsr_data['hvsr_windows_df'][column])
dist_matrix = squareform(pdist(curves, metric=dist_metric))
noise_array = _dbscan_outliers(distance_matrix=dist_matrix,
n_size=neighborhood_size,
min_pts=min_neighborhood_pts,
_use_percentile=use_percentile)
# Remove curves from analysis
hvsr_data.hvsr_windows_df.loc[~noise_array, 'Use'] = False
return hvsr_data
# This is a remove_outlier_curve() helper function to use a "prototype" curve (median curve) to detect outliers
def __prototype_outlier_detect(hvsr_data, use_hv_curves=False,
use_percentile=True, outlier_threshold=98,
col_names=['HV_Curves'], comp_names=['Z', 'E', 'N'],
col_prefix = 'HV_Curves',
verbose=False):
# Loop through each component, and determine which curves are outliers
bad_rmse = []
for i, column in enumerate(col_names):
if column in comp_names:
if use_hv_curves == False:
column = col_prefix + column
else:
column = column
# Retrieve data from dataframe (use all windows, just in case)
curr_data = np.stack(hvsr_data['hvsr_windows_df'][column])
# Calculate a median curve, and reshape so same size as original
medCurve = np.nanmedian(curr_data, axis=0)
medCurveArr = np.tile(medCurve, (curr_data.shape[0], 1))
# Calculate RMSE
rmse = np.sqrt(((np.subtract(curr_data, medCurveArr)**2).sum(axis=1))/curr_data.shape[1])
hvsr_data['hvsr_windows_df']['RMSE_'+column] = rmse
if use_percentile is True:
rmse_threshold = np.percentile(rmse[~np.isnan(rmse)], outlier_threshold)
if verbose:
print(f'\tRMSE at {outlier_threshold}th percentile for {column} calculated at: {rmse_threshold:.2f}')
else:
rmse_threshold = outlier_threshold
# Retrieve index of those RMSE values that lie outside the threshold
for j, curve in enumerate(curr_data):
if rmse[j] > rmse_threshold:
bad_rmse.append(j)
# Get unique values of bad_rmse indices and set the "Use" column of the hvsr_windows_df to False for that window
bad_rmse = np.unique(bad_rmse)
if len(bad_rmse) > 0:
hvsr_data['hvsr_windows_df']['Use'] = hvsr_data['hvsr_windows_df']['Use'] * (rmse_threshold > hvsr_data['hvsr_windows_df']['RMSE_'+column])
#hvsr_data['hvsr_windows_df'].loc[bad_index, "Use"] = False
if verbose:
if len(bad_rmse) > 0:
print(f"\n\t\tThe windows starting at the following times have been removed from further analysis ({len(bad_rmse)}/{hvsr_data['hvsr_windows_df'].shape[0]}):")
for b in hvsr_data['hvsr_windows_df'].index[pd.Series(bad_rmse)]:
print(f"\t\t {b}")
else:
print('\tNo outlier curves have been removed')
return hvsr_data
# Helper functions for generate_psds()
# Generate psds from raw data (no response removed)
def __single_psd_from_raw_data(hvsr_data, window_length=30.0, window_length_method='length', window_type='hann',
overlap=0.5, num_freq_bins=512,
show_psd_plot=False, remove_response=False, do_azimuths=False, verbose=False):
"""Helper function to get psds from raw trace streams (no response information is needed in this case)
Parameters
----------
hvsr_data : HVSRData object
HVSRData object containing data to be processed
window_length : float, optional
Length of FFT processing window for in seconds, by default 30.0
overlap : float, optional
Percent overlap between windows (0-1), by default 0.5.
A percentage value between 1-100 will be accepted, but will be divided by 100 to convert to 0-1.
If the value is over 100, the modulus of 100 will be calculated, then divided by 100; i.e., (overlap%100)/100.
show_psd_plot : bool, optional
Whether to show a plot of the psds, by default False
verbose : bool, optional
Whether to print information about the PSD processing to terminal, by default False
Returns
-------
Tuple (dict, np.array)
Tuple with index 0 being a dictionary with keys of components ("Z", "E", "N").
Values are numpy array containing the PSDs for that component at each time step.
Index 1 of tuple contains a numpy array with the start and end times of each time window used for FFT processing.
"""
zdata = hvsr_data.stream.select(component='Z').merge()
edata = hvsr_data.stream.select(component='E').merge()
ndata = hvsr_data.stream.select(component='N').merge()
dataDict = {'Z':zdata,
'E':edata,
'N':ndata}
if do_azimuths:
azimuthStream = hvsr_data.stream.select(component='R').merge()
for azimuthTrace in azimuthStream:
dataDict[azimuthTrace.stats.component.upper()] = azimuthTrace
if remove_response:
for key, compStream in dataDict.items():
compStream = compStream.split()
for trace in compStream:
trace.remove_response(hvsr_data['inv'])
compStream.merge()
if verbose:
print("\n\tInstrument Response Removed from Traces\n")
sample_rate = zdata[0].stats.sampling_rate
sample_space = zdata[0].stats.delta
zdata = zdata.split()
# Transform overlap to proper formatting (% b/w 0-1)
if overlap > 100:
if verbose:
print(f"\tThe parameter overlap={overlap} should be a float between 0-1")
print(f"\t Since it is over 100, the modulus of 100 (overlap%100)/100=({overlap%100}) will be used")
overlap = (overlap % 100)/100
elif overlap > 1:
overlap = overlap / 100
elif overlap >= 0:
overlap = overlap
else:
if verbose:
print(f"\tThe parameter overlap={overlap} should be a float between 0-1")
print(f"\t This has been updated to the default value of overlap=0.5")
overlap = 0.5 #just set it default otherwise
# Get number of samples instead of seconds/percentage
psd_window_samples = int(window_length * sample_rate)
overlap_samples = overlap * psd_window_samples
# Generated x values to which data will be interpolated later
# This maintains consistency in array size across all FFT windows
if hasattr(hvsr_data, 'hvsr_band'):
low_freq = hvsr_data.hvsr_band[0]
hi_freq = hvsr_data.hvsr_band[1]
else:
low_freq = DEFAULT_BAND[0]
hi_freq = DEFAULT_BAND[1]
x_freqs = np.logspace(np.log10(low_freq), np.log10(hi_freq), num_freq_bins)
# For each component, create the time windows and do FFT analysis
psdDict = {}
for key, curr_component in dataDict.items():
psdDict[key] = {}
# Get all data in same format (obspy.Stream, traces will be extracted later)
if isinstance(curr_component, obspy.Trace):
st = obspy.Stream([curr_component]).merge()
else:
st = curr_component.merge()
tr = st[0]
# Initialize for intermediate outputs
psds = []
freqs = []
final_psds = []
# Get all possible windows and initialize output window list for windows that are actually used
# This will likely be the same if there are no gaps in the data
windows = _create_windows(hvsr_data=hvsr_data, window=window_length,
overlap=overlap, window_length_method=window_length_method, verbose=False)
windows_out = []
# Iterate through each window to trim data trace and perform fft analysis
for i, (stime, etime) in enumerate(windows):
# Trim trace to just window time (copy so doesn't overwrite main trace)
window_trace = tr.copy()
window_trace.trim(starttime=stime, endtime=etime)
# Handle gaps in data
# Only process longest continous data section in each window, if gaps exist
window_st = window_trace.split() # Split into continuous data sections
# Handle window where there is no data
if len(window_st)==0:
windows_out.append((stime, False))
psdDict[key][str(stime)] = np.full(x_freqs.shape, np.nan)
if verbose:
print(f"\tWindow starting at {stime} not used (does not exist in data))")
continue
longest_trace = window_st[0] # Initialize longest as first trace
if len(window_st) > 1: # if more than one trace comes out of .split()
# Get the longest trace and used that for analysis for this window
for shorttr in window_st:
if len(shorttr) > len(longest_trace):
longest_trace = shorttr
window_trace = longest_trace
# If the data being processed ends up being shorter than window time
# Reset inputs to scipy.signal.welch to match new "window" length
nsamplesperwin = psd_window_samples
if len(window_trace) < nsamplesperwin:
nsamplesperwin = len(window_trace.data)
overlap_samples = nsamplesperwin - 1
# PERFORM FFT analysis using Welch method if length of window is > 1 sample
# If time window used, the start time will be recorded in window_out list
# and PSD will be stored in psdDict[key][str(starttime)] as numpy array.
noNanCond = np.any(np.isnan(window_trace.data))
if nsamplesperwin > 1 and not noNanCond:
with warnings.catch_warnings():
warnings.simplefilter('ignore') # Sometimes unnecessary warnings arise
f, pxx = scipy.signal.welch(window_trace.data, fs=window_trace.stats.sampling_rate,
window=window_type, nperseg=nsamplesperwin,
noverlap=overlap_samples, nfft=None, detrend='linear', return_onesided=True,
scaling='density', axis=-1, average='mean')
# Only add successful psds to psdDict (and the window starttime to window_out)
if pxx.size > 0 and f.size > 0:
freqs.append(f)
psds.append(np.flip(pxx))
interpPSD = np.interp(x_freqs, f, pxx, left=None, right=None, period=None)
interpPSD_dB = 10*np.log10(interpPSD) # Convert to decibels
psdDict[key][str(stime)] = interpPSD_dB
final_psds.append(interpPSD_dB)
windows_out.append((stime, True))
else:
windows_out.append((stime, False))
print("ADDING STIMES", stime)
psdDict[key][str(stime)] = np.full(x_freqs.shape, np.nan)
if verbose:
print(f"\tWindow starting at {stime} not used ({len(window_trace)} samples long)")
else:
if verbose:
print(f"\tWindow starting at {stime} not used ({len(window_trace)} samples long)")
#psds = np.mean(np.array(final_psds), axis=0)
#psdDict[key][str(stime)] = np.array(final_psds)
if show_psd_plot:
plt.plot(x_freqs, psds, linewidth=0.5, c='k')
plt.semilogx()
plt.semilogy()
return psdDict, np.array(windows_out)
# Generate windows "manually"
def _create_windows(hvsr_data, window=30, overlap=0.5, window_length_method='length', verbose=False):
"""Function to create time windows based on input stream.
Parameters
----------
hvsr_data : HVSRData object, Obspy.Stream, or Obspy.Trace
Input object with stream data
window : float or int, optional
Windowing parameter. If window_length_method='length', this is the length of each window in seconds.
If window_length_method='number', this must be int or be able to be converted to int, and is the number of windows, by default 30
overlap : float, optional
Window overlap in percentage. If >=1, it will be interpreted as a percentage out of 100, by default 0.5
window_length_method : str, optional
Which windowing method to use, "length", which creates windows of a specified length, or
"number", which creates a specified number of windows, by default 'length'
verbose : bool, optional
Whether to print information about the process to terminal, by default False
Returns
-------
np.array
2D Numpy array containing, the size of the first dimension is the number of windows, size of second dimension is 2 (start and end)
"""
length_list = ['window_length', 'window length',
'length', 'len', 'l', 'size', 's']
winNum_list = ['number of windows', 'window_number', 'window number',
'number', 'num', 'winnum', 'window_num', 'amount']
# Get input data as obspy.Stream
if isinstance(hvsr_data, HVSRData):
st = hvsr_data.stream.merge()
elif isinstance(hvsr_data, obspy.Stream):
st = hvsr_data.merge()
elif isinstance(hvsr_data, obspy.Trace):
st = obspy.Stream([hvsr_data]).merge()
else:
raise RuntimeError("hvsr_data parameter of _create_windows() must be sprit.HVSRData, obspy.Stream, or obspy.Trace")
# Get largest starttime and smallest endtime (to ensure all data is used)
for i, tr in enumerate(st):
if i==0:
maxStart = tr.stats.starttime
minEnd = tr.stats.endtime
else:
if tr.stats.starttime > maxStart:
maxStart = tr.stats.starttime
if tr.stats.endtime < minEnd:
minEnd = tr.stats.endtime
# Calculate time between end and start
timeRange = minEnd - maxStart
# Transform overlap to proper formatting (% b/w 0-1)
if overlap > 100:
if verbose:
print(f"\tThe parameter overlap={overlap} should be a float between 0-1")
print(f"\t Since it is over 100, the modulus of 100 (overlap%100)/100=({overlap%100}) will be used")
overlap = (overlap % 100)/100
elif overlap > 1:
overlap = overlap / 100
elif overlap >= 0:
overlap = overlap
else:
if verbose:
print(f"\tThe parameter overlap={overlap} should be a float between 0-1")
print(f"\t This has been updated to the default value of overlap=0.5")
overlap = 0.5 #just set it default otherwise
# Calculate "stride" (time between start of each window) and window length
if window_length_method.lower() in length_list:
stride = window * (1-overlap)
winLength = window
elif window_length_method.lower() in winNum_list:
stride = timeRange // window
winLength = stride / overlap
else:
if verbose:
print(f"\twindow_method={window_length_method} is not a valid entry.")
print(f"\t Use any of the following to create windows using a specific size: {length_list}")
print(f"\t Use any of the following to create a specific number of windows : {winNum_list}")
print(f"\t By default, using a window length of 30 seconds and overlap of 0.5")
# Default of overlap=0.5, window_length=30
stride = 15
winLength = 30
overlap = 0.5
# Get start and end of each window, and format appropriately (2d Numpy array)
windowStarts = np.arange(maxStart, minEnd, stride)
windowEnds = windowStarts + winLength
windows = np.array(list(zip(windowStarts, windowEnds)))
# print information if verbose specified
if verbose:
verboseStatement = ["\tUsing the following windowing parameters"]
verboseStatement.append(f"\t Number of windows: {windows.shape[0]}")
verboseStatement.append(f"\t Window Size: {winLength}")
verboseStatement.append(f"\t Window Overlap: {overlap}")
for l in verboseStatement:
print(l)
return windows
# Remove noisy windows from df
def __remove_windows_from_df(hvsr_data, verbose=False):
# Get gaps from masked regions of traces
gaps0 = []
gaps1 = []
outStream = hvsr_data['stream_edited'].split()
for i, trace in enumerate(outStream):
if i == 0:
trEndTime = trace.stats.endtime
comp_end = trace.stats.component
continue # Wait until the second trace
trStartTime = trace.stats.starttime
comp_start = trace.stats.component
firstDiff = True
secondDiff = True
# Check if both are different from any existing gap times
if trEndTime in gaps0:
firstDiff = False
if trStartTime in gaps1:
secondDiff = False
# If the first element and second element are both new, add to gap list
if firstDiff and secondDiff:
gaps0.append(trEndTime)
gaps1.append(trStartTime)
trEndTime = trace.stats.endtime
gaps = list(zip(gaps0, gaps1))
hvsr_windows_df_exists = ('hvsr_windows_df' in hvsr_data.keys()) or ('params' in hvsr_data.keys() and 'hvsr_windows_df' in hvsr_data['params'].keys()) or ('input_params' in hvsr_data.keys() and 'hvsr_windows_df' in hvsr_data['input_params'].keys())
if hvsr_windows_df_exists:
hvsrDF = hvsr_data['hvsr_windows_df']
use_before = hvsrDF["Use"].copy().astype(bool)
outStream = hvsr_data['stream_edited'].split()
#for i, trace in enumerate(outStream):
#if i == 0:
# trEndTime = trace.stats.endtime
# comp_end = trace.stats.component
# continue
#trStartTime = trace.stats.starttime
#comp_start = trace.stats.component
#if trEndTime < trStartTime and comp_end == comp_start:
hvsrDF['Use'] = hvsrDF['Use'].astype(bool)
for gap in gaps:
# All windows whose starts occur within the gap are set to False
gappedIndices = hvsrDF.between_time(gap[0].datetime.time(), gap[1].datetime.time()).index#.loc[:, 'Use']
hvsrDF.loc[gappedIndices,'Use'] = False
# The previous window is also set to false, since the start of the gap lies within that window
prevInd = hvsrDF.index.get_indexer([gap[0]], method='ffill')
prevDTInd = hvsrDF.index[prevInd]
hvsrDF.loc[prevDTInd, 'Use'] = False
hvsrDF['Use'] = hvsrDF['Use'].astype(bool)
hvsr_data['hvsr_windows_df'] = hvsrDF # May not be needed, just in case, though
use_after = hvsrDF["Use"].astype(bool)
removed = ~use_before.eq(use_after)
if verbose:
if removed[removed].shape[0]>0:
print(f"\n\t\tThe windows starting at the following times have been removed from further analysis ({removed[removed].shape[0]}/{hvsrDF.shape[0]})")
for t in removed[removed].index.to_pydatetime():
print(f'\t\t {t} ')
else:
print(f"\t\tNo windows removed using remove_noise()")
outStream.merge()
hvsr_data['stream_edited'] = outStream
hvsr_data['x_gaps_obspyDT'] = gaps
return hvsr_data
# Helper functions for process_hvsr()
# Get diffuse field assumption data
def _dfa(x, hvsr_data=None, verbose=False):#, equal_interval_energy, median_daily_psd, verbose=False):
"""Helper function for performing Diffuse Field Assumption (DFA) analysis
x : numpy.array
Numpy array or list containing all x values (frequency or period) for each psd
hvsr_data : HVSRData object
HVSRData object containing all the data and information about the HVSR point being processed
verbose : bool, optional
Whether to print information about the DFA processing to terminal, default = False.
"""
# Use equal energy for daily PSDs to give small 'events' a chance to contribute
# the same as large ones, so that pH1List+pH2List+P3=1
hvsr_tSteps = []
if verbose:
print('\tUsing Diffuse Field Assumption (DFA)', flush=True)
warnings.warn('WARNING: DFA method is currently experimental and has not been extensively tested.')
hvsr_data['dfa'] = {}
sum_ns_power = list()
sum_ew_power = list()
sum_z_power = list()
hvsr_data['dfa']['time_int_psd'] = {'Z':{}, 'E':{}, 'N':{}}
hvsr_data['dfa']['time_values'] = list()
hvsr_data['dfa']['equal_interval_energy'] = {'Z':{}, 'E':{}, 'N':{}}
ti = 0
for i, t_int in enumerate(hvsr_data['psds']['Z']['current_times_used']):
ti+=1
hvsr_curve_tinterval = []
# Initialize some lists for later use
sum_ns_power = list()
sum_ew_power = list()
sum_z_power = list()
# Add the time interval to the time_values list
time_int = str(t_int)#day_time.split('T')[0]
if time_int not in hvsr_data['dfa']['time_values']:
hvsr_data['dfa']['time_values'].append(time_int)
# Get the psd data for each time,
tiIndDF = hvsr_data['hvsr_windows_df'].index[i]
hvsr_data['dfa']['time_int_psd']['Z'][time_int] = hvsr_data['hvsr_windows_df'].loc[tiIndDF,'psd_values_Z']
hvsr_data['dfa']['time_int_psd']['E'][time_int] = hvsr_data['hvsr_windows_df'].loc[tiIndDF,'psd_values_E']
hvsr_data['dfa']['time_int_psd']['N'][time_int] = hvsr_data['hvsr_windows_df'].loc[tiIndDF,'psd_values_N']
# Each PSD for the time_int (there is only one in SpRIT)
pZList = list()
pH1List = list()
pH2List = list()
sum_pz = 0
sum_p1 = 0
sum_p2 = 0
# Each sample of the PSD , convert to power
for j in range(len(x) - 1):
pz = __get_power([hvsr_data['dfa']['time_int_psd']['Z'][time_int][j][()], hvsr_data['dfa']['time_int_psd']['Z'][time_int][j + 1][()]], [x[j], x[j + 1]])
pZList.append(pz)
sum_pz += pz
p1 = __get_power([hvsr_data['dfa']['time_int_psd']['E'][time_int][j][()], hvsr_data['dfa']['time_int_psd']['E'][time_int][j + 1][()]], [x[j], x[j + 1]])
pH1List.append(p1)
sum_p1 += p1
p2 = __get_power([hvsr_data['dfa']['time_int_psd']['N'][time_int][j][()], hvsr_data['dfa']['time_int_psd']['N'][time_int][j + 1][()]], [x[j], x[j + 1]])
pH2List.append(p2)
sum_p2 += p2
sum_power = sum_pz + sum_p1 + sum_p2 # total power
# Mormalized power
for j in range(len(x) - 1):
sum_z_power.append(pZList[j] / sum_power)
sum_ew_power.append(pH1List[j] / sum_power)
sum_ns_power.append(pH2List[j] / sum_power)
# Average the normalized time interval power
for j in range(len(x) - 1):
sum_z_power[j] /= len(hvsr_data['dfa']['time_int_psd']['Z'][time_int])
sum_ew_power[j] /= len(hvsr_data['dfa']['time_int_psd']['E'][time_int])
sum_ns_power[j] /= len(hvsr_data['dfa']['time_int_psd']['N'][time_int])
hvsr_data['dfa']['equal_interval_energy']['Z'][time_int] = sum_z_power
hvsr_data['dfa']['equal_interval_energy']['E'][time_int] = sum_ew_power
hvsr_data['dfa']['equal_interval_energy']['N'][time_int] = sum_ns_power
# Start Second dfa section in original iris script
# Perform h/v calculation at each frequency/time step
eie = hvsr_data['dfa']['equal_interval_energy']
for j in range(len(x) - 1):
if (time_int in list(eie['Z'].keys())) and (time_int in list(eie['E'].keys())) and (time_int in list(eie['N'].keys())):
hv_x = math.sqrt((eie['E'][time_int][j] + eie['N'][time_int][j]) / eie['Z'][time_int][j])
hvsr_curve_tinterval.append(hv_x)
else:
if verbose > 0:
print('WARNING: '+ t_int + ' missing component, skipped!')
continue
#Average over time
hvsr_tSteps.append(hvsr_curve_tinterval)
return hvsr_tSteps
# Helper function for smoothing across frequencies
def __freq_smooth_window(hvsr_out, f_smooth_width, kind_freq_smooth):
"""Helper function to smooth frequency if 'constant' or 'proportional' is passed to freq_smooth parameter of process_hvsr() function"""
if kind_freq_smooth == 'constant':
fwidthHalf = f_smooth_width//2
elif kind_freq_smooth == 'proportional':
anyKey = list(hvsr_out['psd_raw'].keys())[0]
freqLength = hvsr_out['psd_raw'][anyKey].shape[1]
if f_smooth_width > 1:
fwidthHalf = int(f_smooth_width/100 * freqLength)
else:
fwidthHalf = int(f_smooth_width * freqLength)
else:
warnings.warn('Oops, typo somewhere')
for k in hvsr_out['psd_raw']:
colName = f'psd_values_{k}'
newTPSD = list(np.stack(hvsr_out['hvsr_windows_df'][colName]))
#newTPSD = list(np.ones_like(hvsr_out['psd_raw'][k]))
for t, tPSD in enumerate(hvsr_out['psd_raw'][k]):
for i, fVal in enumerate(tPSD):
if i < fwidthHalf:
downWin = i
ind = -1*(fwidthHalf-downWin)
windMultiplier_down = np.linspace(1/fwidthHalf, 1-1/fwidthHalf, fwidthHalf)
windMultiplier_down = windMultiplier_down[:ind]
else:
downWin = fwidthHalf
windMultiplier_down = np.linspace(1/fwidthHalf, 1-1/fwidthHalf, fwidthHalf)
if i + fwidthHalf >= len(tPSD):
upWin = (len(tPSD) - i)
ind = -1 * (fwidthHalf-upWin+1)
windMultiplier_up = np.linspace(1-1/fwidthHalf, 0, fwidthHalf)
windMultiplier_up = windMultiplier_up[:ind]
else:
upWin = fwidthHalf+1
windMultiplier_up = np.linspace(1 - 1/fwidthHalf, 0, fwidthHalf)
windMultiplier = list(np.hstack([windMultiplier_down, windMultiplier_up]))
midInd = np.argmax(windMultiplier)
if i > 0:
midInd+=1
windMultiplier.insert(midInd, 1)
smoothVal = np.divide(np.sum(np.multiply(tPSD[i-downWin:i+upWin], windMultiplier)), np.sum(windMultiplier))
newTPSD[t][i] = smoothVal
hvsr_out['psd_raw'][k] = newTPSD
hvsr_out['hvsr_windows_df'][colName] = pd.Series(list(newTPSD), index=hvsr_out['hvsr_windows_df'].index)
return hvsr_out
# Get an HVSR curve, given an array of x values (freqs), and a dict with psds for three components
def __get_hvsr_curve(x, psd, horizontal_method, hvsr_data, azimuth=None, verbose=False):
""" Get an HVSR curve from three components over the same time period/frequency intervals
Parameters
----------
x : list or array_like
x value (frequency or period)
psd : dict
Dictionary with psd values for three components. Usually read in as part of hvsr_data from process_hvsr
horizontal_method : int or str
Integer or string, read in from process_hvsr method parameter
Returns
-------
tuple
(hvsr_curve, hvsr_tSteps), both np.arrays. hvsr_curve is a numpy array containing H/V ratios at each frequency/period in x.
hvsr_tSteps only used with diffuse field assumption method.
"""
hvsr_curve = []
hvsr_tSteps = []
hvsr_azimuth = {}
params = hvsr_data
if horizontal_method==1 or horizontal_method =='dfa' or horizontal_method =='Diffuse Field Assumption':
hvsr_tSteps = _dfa(x, hvsr_data, verbose)
hvsr_curve = np.mean(hvsr_tSteps, axis=0)
else:
for j in range(len(x)-1):
psd0 = [psd['Z'][j], psd['Z'][j + 1]]
psd1 = [psd['E'][j], psd['E'][j + 1]]
psd2 = [psd['N'][j], psd['N'][j + 1]]
f = [x[j], x[j + 1]]
hvratio = __get_hvsr(psd0, psd1, psd2, f, azimuth=azimuth, use_method=horizontal_method)
hvsr_curve.append(hvratio)
# Do azimuth HVSR Calculations, if applicable
hvratio_az = 0
for k in psd.keys():
if k.lower() not in ['z', 'e', 'n']:
psd_az = [psd[k][j], psd[k][j + 1]]
hvratio_az = __get_hvsr(psd0, psd_az, None, f, azimuth=azimuth, use_method='az')
if j == 0:
hvsr_azimuth[k] = [hvratio_az]
else:
hvsr_azimuth[k].append(hvratio_az)
hvsr_tSteps = None # Only used for DFA
return np.array(hvsr_curve), hvsr_azimuth, hvsr_tSteps
# Get HVSR
def __get_hvsr(_dbz, _db1, _db2, _x, azimuth=None, use_method=3):
""" Helper function to calculate H/V ratio
_dbz : list
Two item list with deciBel value of z component at either end of particular frequency step
_db1 : list
Two item list with deciBel value of either e or n component (does not matter which) at either end of particular frequency step
_db2 : list
Two item list with deciBel value of either e or n component (does not matter which) at either end of particular frequency step
_x : list
Two item list containing frequency values at either end of frequency step of interest
use_method : int, default = 4
H is computed based on the selected use_method see: https://academic.oup.com/gji/article/194/2/936/597415
use_method:
(1) Diffuse Field Assumption (DFA)
(2) arithmetic mean, that is, H ≡ (HN + HE)/2
(3) geometric mean, that is, H ≡ √HN · HE, recommended by the SESAME project (2004)
(4) vector summation, that is, H ≡ √H2 N + H2 E
(5) quadratic mean, that is, H ≡ √(H2 N + H2 E )/2
(6) maximum horizontal value, that is, H ≡ max {HN, HE}
"""
_pz = __get_power(_dbz, _x)
_p1 = __get_power(_db1, _x)
_hz = math.sqrt(_pz)
_h1 = math.sqrt(_p1)
if _db2 is None:
_p2 = 1
_h2 = 1
else:
_p2 = __get_power(_db2, _x)
_h2 = math.sqrt(_p2)
def az_calc(az, h1, h2):
if az is None:
az = 90
if az == 'HV':
return math.sqrt(_h1 * _h2)
az_rad = np.deg2rad(az)
return np.add(h2 * np.cos(az_rad), h1 * np.sin(az_rad))
# Previous structure from IRIS module
#_h = { 2: (_h1 + _h2) / 2.0, # Arithmetic mean
# 3: math.sqrt(_h1 * _h2), # Geometric mean
# 4: math.sqrt(_p1 + _p2), # Vector summation
# 5: math.sqrt((_p1 + _p2) / 2.0), # Quadratic mean
# 6: max(_h1, _h2), # Max horizontal value
# 7: min(_h1, _h2), # Minimum horizontal value
# 8: 'do_azimuth_calc',
# 'az': _h1} # If azimuth, horizontals are already combined, no _h2}
# Combine horizontal methods
if use_method == 2 or str(use_method) == '2':
_hCombined = (_h1 + _h2) / 2.0
elif use_method == 3 or str(use_method) == '3':
_hCombined = math.sqrt(_h1 * _h2)
elif use_method == 4 or str(use_method) == '4':
_hCombined = math.sqrt(_p1 + _p2)
elif use_method == 5 or str(use_method) == '5':
_hCombined = math.sqrt((_p1 + _p2) / 2.0)
elif use_method == 6 or str(use_method) == '6':
_hCombined = max(_h1, _h2)
elif use_method == 7 or str(use_method) == '7':
_hCombined = min(_h1, _h2)
elif use_method == 8 or str(use_method) == '8':
_hCombined = az_calc(azimuth, _h1, _h2)
elif use_method == 'az' or str(use_method) == 'az':
_hCombined = _h1
else:
_hCombined = _h1
_hvsr = _hCombined / _hz
return _hvsr
# For converting dB scaled data to power units
def __get_power(_db, _x):
"""Calculate power for HVSR
#FROM ORIGINAL (I think this is only step 6)
Undo deciBel calculations as outlined below:
1. Dividing the window into 13 segments having 75% overlap
2. For each segment:
2.1 Removing the trend and mean
2.2 Apply a 10% sine taper
2.3 FFT
3. Calculate the normalized PSD
4. Average the 13 PSDs & scale to compensate for tapering
5. Frequency-smooth the averaged PSD over 1-octave intervals at 1/8-octave increments
6. Convert power to decibels
#END FROM ORIGINAL
Parameters
----------
_db : list
Two-item list with individual power values in decibels for specified freq step.
_x : list
Two-item list with Individual x value (either frequency or period)
Returns
-------
_p : float
Individual power value, converted from decibels
NOTE
----
PSD is equal to the power divided by the width of the bin
PSD = P / W
log(PSD) = Log(P) - log(W)
log(P) = log(PSD) + log(W) here W is width in frequency
log(P) = log(PSD) - log(Wt) here Wt is width in period
for each bin perform rectangular integration to compute power
power is assigned to the point at the begining of the interval
_ _
| |_| |
|_|_|_|
Here we are computing power for individual ponts, so, no integration is necessary, just
compute area.
"""
_dx = abs(np.diff(_x)[0])
_p = np.multiply(np.mean(__remove_db(_db)), _dx)
return _p
# Remove decibel scaling
def __remove_db(_db_value):
"""convert dB power to power"""
_values = list()
for _d in _db_value:
_values.append(10 ** (float(_d) / 10.0))
#FIX THIS
if _values[1]==0:
_values[1]=10e-300
return _values
# Find peaks in the hvsr ccruve
def __find_peaks(_y):
"""Finds all possible peaks on hvsr curves
Parameters
----------
_y : list or array
_y input is list or array of a curve.
In this case, this is either main hvsr curve or individual time step curves
"""
_index_list = scipy.signal.argrelextrema(np.array(_y), np.greater)
return _index_list[0]
# Get additional HVSR params for later calcualtions
def __gethvsrparams(hvsr_out):
"""Private function to get HVSR parameters for later calculations (things like standard deviation, etc)"""
hvsrp2 = {}
hvsrm2 = {}
hvsrp2=[]
hvsrm=[]
hvsr_log_std = {}
hvsr = hvsr_out['hvsr_curve']
hvsr_az = hvsr_out['hvsr_az']
hvsrDF = hvsr_out['hvsr_windows_df']
if len(hvsr_out['ind_hvsr_curves'].keys()) > 0:
# With arrays, original way of doing it
hvsr_log_std = {}
for k in hvsr_out['ind_hvsr_curves'].keys():
hvsr_log_std[k] = np.nanstd(np.log10(hvsr_out['ind_hvsr_curves'][k]), axis=0)
#With dataframe, updated way to use DF for all time-step tasks, still testing
logStackedata = {}
hvsrp = {}
hvsrm = {}
hvsrp2 = {}
hvsrm2 = {}
hvsr_log_std = {}
for col_name in hvsr_out['hvsr_windows_df'].columns:
if col_name.startswith("HV_Curves"):
if col_name == 'HV_Curves':
colSuffix = '_HV'
colID = 'HV'
else:
colSuffix = '_'+'_'.join(col_name.split('_')[2:])
colID = colSuffix.split('_')[1]
stackedData = np.stack(hvsr_out['hvsr_windows_df'][col_name])
logStackedata = np.log10(stackedData).tolist()
for i, r in enumerate(logStackedata):
logStackedata[i] = np.array(r)
hvsr_out['hvsr_windows_df']['Log10_HV_Curves'+colSuffix] = logStackedata
hvsr_log_std[colID] = np.nanstd(np.stack(hvsr_out['hvsr_windows_df']['Log10_HV_Curves'+colSuffix][hvsrDF['Use']]), axis=0)
#The components are already calculated, don't need to recalculate aren't calculated at the time-step level
hvsrp[colID] = np.add(hvsr_out['hvsr_curve'], hvsr_out['ind_hvsr_stdDev'][colID])
hvsrm[colID] = np.subtract(hvsr_out['hvsr_curve'], hvsr_out['ind_hvsr_stdDev'][colID])
for k in hvsr_out['hvsr_az'].keys():
hvsrp[colID] = np.add(hvsr_out['hvsr_az'][k], hvsr_out['ind_hvsr_stdDev'][colID])
hvsrm[colID] = np.subtract(hvsr_out['hvsr_az'][k], hvsr_out['ind_hvsr_stdDev'][colID])
hvsrp2[colID] = np.multiply(hvsr, np.exp(hvsr_log_std[colID]))
hvsrm2[colID] = np.divide(hvsr, np.exp(hvsr_log_std[colID]))
newKeys = ['hvsr_log_std', 'hvsrp','hvsrm', 'hvsrp2','hvsrm2']
newVals = [hvsr_log_std, hvsrp, hvsrm, hvsrp2, hvsrm2]
for i, nk in enumerate(newKeys):
if nk not in hvsr_out.keys():
hvsr_out[nk] = {}
hvsr_out[nk][colID] = np.array(newVals[i][colID])
return hvsr_out
# HELPER FUNCTIONS FOR GET REPORT
# Private function to generate print report
def _generate_print_report(hvsr_results, azimuth="HV", show_print_report=True, verbose=False):
"""Helper function to perform create a printed (monospace) report with summary data for HVSR Site
Parameters
----------
hvsr_results : HVSRData object
HVSRData object with data to be reported on
show_print_report : bool, optional
Whether output will be produced or not (if show_print_report=True, no ouptut will be produced (report will not be printed)), by default False
Returns
-------
HVSRData object
HVSRData object with the ["Print_Report"] attribute created or updated.
The .Print_Report attribute is a formatted string that can be
displayed using print(hvsr_results['Print_Report'] with a summary of the HVSR results)
"""
#Print results
#Make separators for nicely formatted print output
sepLen = 99
siteSepSymbol = '='
intSepSymbol = u"\u2012"
extSepSymbol = u"\u2014"
if sepLen % 2 == 0:
remainVal = 1
else:
remainVal = 0
siteWhitespace = 2
#Format the separator lines internal to each site
internalSeparator = intSepSymbol.center(sepLen-4, intSepSymbol).center(sepLen, ' ')
extSiteSeparator = "".center(sepLen, extSepSymbol)
siteSeparator = f"{hvsr_results['input_params']['site']}".center(sepLen - siteWhitespace, ' ').center(sepLen, siteSepSymbol)
endSiteSeparator = "".center(sepLen, siteSepSymbol)
#Start building list to print
report_string_list = []
report_string_list.append("") #Blank line to start
report_string_list.append(extSiteSeparator)
report_string_list.append(siteSeparator)
report_string_list.append(extSiteSeparator)
#report_string_list.append(internalSeparator)
report_string_list.append('')
report_string_list.append(f"\tSite Name: {hvsr_results['input_params']['site']}")
report_string_list.append(f"\tAcq. Date: {hvsr_results['input_params']['acq_date']}")
report_string_list.append(f"\tLocation : {hvsr_results['input_params']['longitude']}°, {hvsr_results['input_params']['latitude']}°")
report_string_list.append(f"\tElevation: {hvsr_results['input_params']['elevation']} meters")
report_string_list.append('')
report_string_list.append(internalSeparator)
report_string_list.append('')
if 'BestPeak' not in hvsr_results.keys():
report_string_list.append('\tNo identifiable BestPeak was present between {} for {}'.format(hvsr_results['input_params']['hvsr_band'], hvsr_results['input_params']['site']))
else:
curvTestsPassed = (hvsr_results['BestPeak'][azimuth]['PassList']['WinLen'] +
hvsr_results['BestPeak'][azimuth]['PassList']['SigCycles']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowCurveStD'])
curvePass = curvTestsPassed > 2
#Peak Pass?
peakTestsPassed = ( hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceLow'] +
hvsr_results['BestPeak'][azimuth]['PassList']['ProminenceHi']+
hvsr_results['BestPeak'][azimuth]['PassList']['AmpClarity']+
hvsr_results['BestPeak'][azimuth]['PassList']['FreqStability']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Freq']+
hvsr_results['BestPeak'][azimuth]['PassList']['LowStDev_Amp'])
peakPass = peakTestsPassed >= 5
report_string_list.append('\t{0:.3f} Hz Peak Frequency ± {1:.4f} Hz'.format(hvsr_results['BestPeak'][azimuth]['f0'], float(hvsr_results["BestPeak"][azimuth]['Sf'])))
if curvePass and peakPass:
report_string_list.append('\t {} Peak at {} Hz passed quality checks! :D'.format(sprit_utils._check_mark(), round(hvsr_results['BestPeak'][azimuth]['f0'],3)))
else:
report_string_list.append('\t {} Peak at {} Hz did NOT pass quality checks :('.format(sprit_utils._x_mark(), round(hvsr_results['BestPeak'][azimuth]['f0'],3)))
report_string_list.append('')
report_string_list.append(internalSeparator)
report_string_list.append('')
justSize=34
#Print individual results
report_string_list.append('\tCurve Tests: {}/3 passed (3/3 needed)'.format(curvTestsPassed))
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['Lw'][-1]}"+" Length of processing windows".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['Lw']}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['Nc'][-1]}"+" Number of significant cycles".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['Nc']}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['σ_A(f)'][-1]}"+" Small H/V StDev over time".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['σ_A(f)']}")
report_string_list.append('')
report_string_list.append("\tPeak Tests: {}/6 passed (5/6 needed)".format(peakTestsPassed))
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['A(f-)'][-1]}"+" Peak is prominent below".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['A(f-)']}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['A(f+)'][-1]}"+" Peak is prominent above".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['A(f+)']}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['A0'][-1]}"+" Peak is large".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['A0']}")
if hvsr_results['BestPeak'][azimuth]['PassList']['FreqStability']:
res = sprit_utils._check_mark()
else:
res = sprit_utils._x_mark()
report_string_list.append(f"\t\t {res}"+ " Peak freq. is stable over time".ljust(justSize)+ f"{hvsr_results['BestPeak'][azimuth]['Report']['P-'][:5]} and {hvsr_results['BestPeak'][azimuth]['Report']['P+'][:-1]} {res}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['Sf'][-1]}"+" Stability of peak (Freq. StDev)".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['Sf']}")
report_string_list.append(f"\t\t {hvsr_results['BestPeak'][azimuth]['Report']['Sa'][-1]}"+" Stability of peak (Amp. StDev)".ljust(justSize)+f"{hvsr_results['BestPeak'][azimuth]['Report']['Sa']}")
report_string_list.append('')
report_string_list.append(f"Calculated using {hvsr_results['hvsr_windows_df']['Use'].astype(bool).sum()}/{hvsr_results['hvsr_windows_df']['Use'].count()} time windows".rjust(sepLen-1))
report_string_list.append(extSiteSeparator)
#report_string_list.append(endSiteSeparator)
#report_string_list.append(extSiteSeparator)
report_string_list.append('')
reportStr=''
#Now print it
for line in report_string_list:
reportStr = reportStr+'\n'+line
if show_print_report or verbose:
print(reportStr)
hvsr_results['BestPeak'][azimuth]['Report']['Print_Report'] = reportStr
if azimuth=='HV' or azimuth=='R':
hvsr_results['Print_Report'] = reportStr
return hvsr_results
# Private function to generate table report
def _generate_table_report(hvsr_results, azimuth='HV', show_table_report=True, verbose=False):
"""Helper function for get_report() to generate a site report formatted into a pandas dataframe
Parameters
----------
hvsr_results : HVSRData
HVSRData object containing information about which the report will be generated.
azimuth : str, optional
The azimuth for which this report will be generated. If none specified/calculated, by default 'HV'
show_table_report : bool, optional
Whether to print the table report (as text) to the terminal
verbose : bool, optional
Whether or not to print information about the table report generation (including the pandas dataframe upon creation) to the terminal, by default False
Returns
-------
HVSRData
An HVSRData object with the ["Table_Report"] attribute created/updated.
This is a pandas.DataFrame instance, but can be exported to csv.
"""
coord0Dir = hvsr_results['input_params']['output_crs'].axis_info[0].direction
# Figure out which coordinate axis is which (some CRS do Y, X)
if coord0Dir.lower() in ['north', 'south']:
xaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[1]
yaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[0]
else:
xaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[0]
yaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[1]
# Get the axis name
xaxis_name = xaxisinfo.name
yaxis_name = yaxisinfo.name
# Simplify the axis name
if 'longitude' in xaxis_name.lower():
xaxis_name = 'Longitude'
if 'latitude' in yaxis_name.lower():
yaxis_name = 'Latitude'
pdCols = ['Site Name', 'Acq_Date', xaxis_name, yaxis_name, 'Elevation', 'Peak', 'Peak_StDev',
'PeakPasses','WinLen','SigCycles','LowCurveStD',
'ProminenceLow','ProminenceHi','AmpClarity','FreqStability', 'LowStDev_Freq','LowStDev_Amp']
d = hvsr_results
criteriaList = []
criteriaList.append(hvsr_results['BestPeak'][azimuth]["PeakPasses"])
for p in hvsr_results['BestPeak'][azimuth]["PassList"]:
criteriaList.append(hvsr_results['BestPeak'][azimuth]["PassList"][p])
dfList = [[d['input_params']['site'], d['input_params']['acq_date'], d['input_params']['xcoord'], d['input_params']['ycoord'], d['input_params']['elevation'], round(d['BestPeak'][azimuth]['f0'], 3), round(d['BestPeak'][azimuth]['Sf'], 4)]]
dfList[0].extend(criteriaList)
outDF = pd.DataFrame(dfList, columns=pdCols)
outDF.index.name = 'ID'
if show_table_report or verbose:
print('\nTable Report:\n')
maxColWidth = 13
print(' ', end='')
for col in outDF.columns:
if len(str(col)) > maxColWidth:
colStr = str(col)[:maxColWidth-3]+'...'
else:
colStr = str(col)
print(colStr.ljust(maxColWidth), end=' ')
print() #new line
for c in range(len(outDF.columns) * (maxColWidth+2)):
if c % (maxColWidth+2) == 0:
print('|', end='')
else:
print('-', end='')
print('|') #new line
print(' ', end='') #Small indent at start
for row in outDF.iterrows():
for col in row[1]:
if len(str(col)) > maxColWidth:
colStr = str(col)[:maxColWidth-3]+'...'
else:
colStr = str(col)
print(colStr.ljust(maxColWidth), end=' ')
print()
hvsr_results['BestPeak'][azimuth]['Report']['Table_Report'] = outDF
if azimuth == 'HV' or azimuth == 'R':
hvsr_results['Table_Report'] = outDF
return hvsr_results
# Display html report without creating temporary file
def _display_html_report(html_report):
import platform
import tempfile
import time
import webbrowser
autodelete = platform.system() != "Windows"
vsCodeCheck = 'vscode' in webbrowser.get().name
if not vsCodeCheck:
with tempfile.NamedTemporaryFile(mode="w", delete=autodelete, suffix=".html") as tmp_file:
tmp_file.write(html_report)
file_path = tmp_file.name
file_path = file_path.replace('\\'[0], '/')
rawfpath = file_path
print(rawfpath)
if autodelete:
client = webbrowser
if not file_path.startswith(r"file://-"[:-1]):
file_path = f"file://{file_path}"
client.open_new(file_path)
# Adding a short sleep so that the file does not get cleaned
# up immediately in case the browser takes a while to boot.
time.sleep(5)
if not autodelete:
client = webbrowser
if not file_path.startswith(r"file://"[:-1]):
file_path = f"file://{file_path}"
client.open_new(file_path)
time.sleep(3)
os.unlink(rawfpath) # Cleaning up the file in case of Windows
# Private function for html report generation
def _generate_html_report(hvsr_results, azimuth='HV', show_html_report=False, verbose=False):
"""Private function that generates html report, intented to be used by get_report() public function
Parameters
----------
hvsr_results : HVSRData or HVSRBatch
Input data from which to generate report
show_html_report : bool, optional
Whether to show the report or simply generate and save it in the "HTML_Report" attribute of hvsr_results, by default False
verbose : bool, optional
Whether to print information about the HTML report generation to terminal
Returns
-------
HVSRData or HVSRBatch
Returns the input dataset, with the HTML_Report attribute updated with the html text of the report
"""
htmlTemplatePath = RESOURCE_DIR.joinpath('html_report_template.html')
with open(htmlTemplatePath, 'r') as htmlF:
html = htmlF.read()
# Update report title (site name)
html = html.replace("HVSR_REPORT_TITLE", hvsr_results['site'])
html = html.replace("HVSR_ID", hvsr_results['hvsr_id'])
# Update peak freq info
html = html.replace("PEAKFREQ", str(round(hvsr_results['BestPeak'][azimuth]['f0'], 3)))
html = html.replace("PEAKSTDEV", str(round(hvsr_results['BestPeak'][azimuth]['Sf'], 3)))
if hvsr_results.Table_Report['PeakPasses'][0]:
html = html.replace("SESAME_TESTS_RESULTS", 'Peak has passed the SESAME validation tests.')
else:
html = html.replace("SESAME_TESTS_RESULTS", 'Peak did not pass the SESAME validation tests.')
# Update image source
# Save the plot to a BytesIO object
# Default to matplotlib object
plotEngine = 'matplotlib'
if 'get_report' in hvsr_results.processing_parameters:
plotEngine = hvsr_results.processing_parameters['get_report']['plot_engine'].lower()
if str(plotEngine).lower() not in ['plotly', 'plty', 'p']:
fig = plt.figure(hvsr_results['Plot_Report'])
fig.set_size_inches(8.5, 6)
#fig.set_size_inches(4.25, 3)
# Create a byte stream from the image
buf = io.BytesIO()
fig.savefig(buf, format='png')
buf.seek(0)
# Encode the image to base64
hvplot_base64 = base64.b64encode(buf.read()).decode('utf-8')
# Embed the image in the html document
html = html.replace("./output.png", f'data:image/png;base64,{hvplot_base64}')
else:
#htmlstring = plotly.io.to_html(hvsr_results.Plot_Report, include_plotlyjs=False)
#print(type(htmlstring))
img = hvsr_results.Plot_Report.to_image(format='png', engine='kaleido')
hvplot_base64 = base64.b64encode(img).decode('utf8')
html = html.replace("./output.png", f'data:image/png;base64,{hvplot_base64}')
# Update formatting for print report for html
html_print_report = hvsr_results.Print_Report.replace('\n', '<br>').replace('\t', " ")
html_print_report = html_print_report.replace('<br>', '', 2) #Remove the first two breaks
html_print_report = html_print_report.replace('✔', '✔')
html_print_report = html_print_report.replace('✘', '✗')
majorSepLine = u"\u2014"*99
majorSepLine = u"\u2014"*99
minorSepLine = u"\u2012"*95
majorSepLineHTML = '—'*40
minorSepLineHTML = '— '*25
startInd = html_print_report.index(' Site Name:')
html_print_report = "<br><br>" + html_print_report[startInd:]
lastInd = html_print_report.index(majorSepLine)
html_print_report = html_print_report[:lastInd]
html_print_report = html_print_report.replace(majorSepLine, 'majorSepLineHTML') # Replace the major separator lines
html_print_report = html_print_report.replace(minorSepLine, minorSepLineHTML) # Replace the minor separator lines
html_print_report = html_print_report.replace("=", '') # Get rid of =
html = html.replace('HVSR_PRINT_REPORT', html_print_report)
# Update table
htmlTable = hvsr_results.Table_Report.iloc[:,2:]
for i in range(len(htmlTable.columns)):
tableHeader = htmlTable.columns[i]
#html = html.replace(f"TableHeader_{str(i).zfill(2)}", tableHeader)
tableValue = htmlTable.iloc[:,i][0]
html = html.replace(f"TableData_{str(i).zfill(2)}", str(tableValue))
coord0Dir = hvsr_results['input_params']['output_crs'].axis_info[0].direction
# Figure out which coordinate axis is which (some CRS do Y, X)
if coord0Dir.lower() in ['north', 'south']:
xaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[1]
yaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[0]
else:
xaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[0]
yaxisinfo = hvsr_results['input_params']['output_crs'].axis_info[1]
# Get the axis name
xaxis_name = xaxisinfo.name
yaxis_name = yaxisinfo.name
# Simplify the axis name
if 'longitude' in xaxis_name.lower():
xaxis_name = 'Longitude'
if 'latitude' in yaxis_name.lower():
yaxis_name = 'Latitude'
html = html.replace("X_Coordinate", xaxis_name)
html = html.replace("Y_Coordinate", yaxis_name)
html = html.replace("Deg_E", xaxisinfo.unit_name)
html = html.replace("Deg_N", yaxisinfo.unit_name)
hvsr_results['HTML_Report'] = html
# View in browser, if indicated to
if show_html_report:
try:
_display_html_report(html)
except Exception as e:
print('\tHTML Report could not be displayed, but has been saved to the .HTML_Report attribute')
print(e)
return hvsr_results
# Private/Helper function to generate pdf report
def _generate_pdf_report(hvsr_results, pdf_report_filepath=None, show_pdf_report=False, show_html_report=False, return_pdf_path=False, verbose=False):
"""Private/helper function to generate pdf report from HTML report, intended to be used by get_report() function
Parameters
----------
hvsr_results : HVSRData or HVSRBatch
Input dataset with all processing already carried out
show_pdf_report : bool, optional
EXPERIMENTAL: Whether to open the report after generating it, by default False
show_html_report : bool, optional
Whether to open the html report that the pdf report is based on, by default False
verbose : bool, optional
Whether to print verbose description of what the function is doing
"""
# Generate HTML Report if not already (this will be converted to pdf using xhtml2pdf)
if not hasattr(hvsr_results, "HTML_Report"):
hvsr_results = _generate_html_report(hvsr_results, show_html_report=show_html_report)
if verbose:
print('\tNo HTML Report previously generated, attempting now.')
# try Code to generate HTML report from template
htmlReport = hvsr_results['HTML_Report']
htmlReport = htmlReport.replace('width=99%', '')
if pdf_report_filepath is None:
if verbose:
print('\t pdf_report_filepath not specified, attempting to save to temporary file.')
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
pdf_export_path = temp_file.name # Get the name of the temporary file
# Now, open the file again for writing
with open(pdf_export_path, 'wb') as temp_file:
pisa_status = pisa.CreatePDF(htmlReport, dest=temp_file)
except Exception:
print("\t Attempting BytesIO")
output = io.BytesIO()
pisa_status = 'All good'
pisa_status = pisa.CreatePDF(htmlReport,
dest=output, # destination "file"
)
pdf_export_path = "PDF_Report attribute"
# You can get the PDF file bytes with `.getbuffer()`
print("\tPDF File created as bytes buffer of size", len(output.getbuffer()))
hvsr_results["PDF_Report"] = output.getbuffer()
else:
if pathlib.Path(pdf_report_filepath).is_dir():
fname = f"{hvsr_results['site']}_REPORT_{hvsr_results['hvsr_id']}_{datetime.date.today()}.pdf"
pdf_report_filepath = pathlib.Path(pdf_report_filepath).joinpath(fname)
try:
with open(pdf_report_filepath, "w+b") as export_file:
pisa_status = pisa.CreatePDF(htmlReport, dest=export_file)
pdf_export_path = pdf_report_filepath
if verbose:
print(f'PDF report saved to {pdf_export_path}')
except Exception as e:
print(f'PDF could not be saved to {pdf_report_filepath}')
if verbose:
print(f'\t{e}')
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
pdf_export_path = temp_file.name # Get the name of the temporary file
print(f'Saving pdf to temporary file instead: {temp_file.name}')
# Now, open the file again for writing
with open(pdf_export_path, 'wb') as temp_file:
pisa_status = pisa.CreatePDF(htmlReport, dest=temp_file)
if verbose:
if hasattr(pisa_status, 'err') and not str(pisa_status.err) == '0':
print('\t', pisa_status.err)
if show_html_report:
_display_html_report(hvsr_results['HTML_Report'])
if show_pdf_report:
if verbose:
print(f'\tAttempting to open pdf at {pdf_export_path}')
pdf_report_shown = False
if hasattr(os, 'startfile') and not pdf_report_shown:
try:
print("\t Attempting os.startfile")
os.startfile(pdf_export_path)
pdf_report_shown = True
except Exception as e:
print(f"\t\tError opening pdf report: {e}")
#if not pdf_report_shown:
# try:
# print("\t Attempting os.system")
# os.system(pdf_export_path)
# pdf_report_shown = True
# except Exception as e:
# print(f"\t\tError opening pdf report: {e}")
try: # This freezes or throws errors in cloud environments sometimes, so try/except
webBrowserName = webbrowser.get().name
codespaces_check = 'vscode' in webBrowserName
print('\t Attempting to open PDF Report via web browser')
if not pdf_report_shown and not codespaces_check:
webbrowser.open(pdf_export_path)
pdf_report_shown = True
else:
print('\t\tWeb browser opening not supported')
except Exception as e:
print(f"\tOpening pdf via webbrowser did not work. Error opening pdf report: {e}")
if not pdf_report_shown:
print(f"\tSpRIT cannot open your pdf report in this environment, but it has been saved to {pdf_export_path}")
print('\tAttempting to open HTML version of report')
try:
_display_html_report(hvsr_results['HTML_Report'])
except Exception as e:
print('\tHTML Report could not be displayed, but has been saved to the .HTML_Report attribute')
if return_pdf_path:
return pdf_export_path
return hvsr_results
# Plot hvsr curve, private supporting function for plot_hvsr
def _plot_hvsr(hvsr_data, plot_type, xtype='frequency', fig=None, ax=None, azimuth='HV', save_dir=None, save_suffix='', show_plot=True, **kwargs):
"""Private function for plotting hvsr curve (or curves with components)
"""
# Get kwargs all straightened out
if 'kwargs' in kwargs.keys():
kwargs = kwargs['kwargs']
if fig is None and ax is None:
fig, ax = plt.subplots()
if 'xlim' not in kwargs.keys():
xlim = hvsr_data['hvsr_band']
else:
xlim = kwargs['xlim']
if 'ylim' not in kwargs.keys():
plotymax = max(hvsr_data.hvsrp2['HV']) + (max(hvsr_data.hvsrp2['HV']) - max(hvsr_data.hvsr_curve))
if plotymax > hvsr_data.BestPeak['HV']['A0'] * 1.5:
plotymax = hvsr_data.BestPeak['HV']['A0'] * 1.5
ylim = [0, plotymax]
else:
ylim = kwargs['ylim']
if 'grid' in kwargs.keys():
plt.grid(which=kwargs['grid'], alpha=0.25)
# Get x y data (for main hvsr plot esp.)
hvsrDF = hvsr_data.hvsr_windows_df
freqList = ['x_freqs', 'freqs', 'freq', 'hz', 'f', 'frequency']
if xtype.lower() in freqList:
xlabel = 'Frequency [Hz]'
else:
xlabel = 'Period [s]'
if save_dir is not None:
filename = hvsr_data['input_params']['site']
else:
filename = ""
anyKey = list(hvsr_data[xtype].keys())[0]
x = hvsr_data[xtype][anyKey][:-1]
y = hvsr_data['hvsr_curve']
# Set up plot viz and export
plotSuff = ''
legendLoc = 'upper left'
# Plot HVSR curve first
plotHVSR = False
for item in plot_type:
if item.lower()=='hvsr':
plotHVSR = True
ax.plot(x, y, color='k', label='H/V Ratio', zorder=1000)
plotSuff = 'HVSRCurve_'
if '-s' not in plot_type:
ax.fill_between(x, hvsr_data['hvsrm2'][azimuth], hvsr_data['hvsrp2'][azimuth], color='k', alpha=0.2, label='StDev',zorder=997)
ax.plot(x, hvsr_data['hvsrm2'][azimuth], color='k', alpha=0.25, linewidth=0.5, zorder=998)
ax.plot(x, hvsr_data['hvsrp2'][azimuth], color='k', alpha=0.25, linewidth=0.5, zorder=999)
else:
plotSuff = plotSuff+'noStdDev_'
break
# Plot parameters
ax.semilogx()
ax.set_ylim(ylim)
ax.set_xlim(xlim)
ax.set_ylabel('H/V Ratio'+'\n['+hvsr_data['horizontal_method']+']', fontsize='small',)
ax.tick_params(axis='x', labelsize=8)
ax.tick_params(axis='y', labelsize=5)
plt.suptitle(hvsr_data['input_params']['site'])
if 'processing_parameters' in hvsr_data.keys() and 'generate_psds' in hvsr_data['processing_parameters'].keys():
if hvsr_data['processing_parameters']['generate_psds']['obspy_ppsds']:
compLabel = 'COMPONENTS\nAmplitude\n[m2/s4/Hz] [dB]'
else:
compLabel = 'COMPONENTS\n PSDs'
# Get peak parameters (if exist, otherwise, get dummy ones)
if "BestPeak" in hvsr_data.keys():
f0 = hvsr_data['BestPeak'][azimuth]['f0']
a0 = hvsr_data['BestPeak'][azimuth]['A0']
else:
f0 = hvsr_data['hvsr_band'][0]
a0 = 0
f0_div4 = f0/4
f0_mult4 = f0*4
a0_div2 = a0/2
# Predefine so only need to set True if True
peakAmpAnn = False
peakPoint = False
peakLine = False
used = hvsrDF['Use'].astype(bool)
notused = ~hvsrDF['Use'].astype(bool)
# Go through each "token" in plot_type str and plot as specified
for k in plot_type:
# Show peak(s)
# Show f0 peak (and annotate if indicated)
if k=='p' and 'all' not in plot_type:
plotSuff=plotSuff+'BestPeak_'
bestPeakScore = 0
for i, p in enumerate(hvsr_data['PeakReport'][azimuth]):
if p['Score'] > bestPeakScore:
bestPeakScore = p['Score']
bestPeak = p
ax.axvline(bestPeak['f0'], color='k', linestyle='dotted', label='Peak')
# Annotate primary peak
if 'ann' in plot_type:
xLoc = bestPeak['f0']
yLoc = ylim[0] + (ylim[1] - ylim[0]) * 0.008
ax.text(x=xLoc, y=yLoc, s="Peak at "+str(round(bestPeak['f0'],2))+'Hz',
fontsize='xx-small', horizontalalignment='center', verticalalignment='bottom',
bbox=dict(facecolor='w', edgecolor='none', alpha=0.8, pad=0.1))
plotSuff = plotSuff+'ann_'
#Show all peaks in h/v curve
elif k=='p' and 'all' in plot_type:
plotSuff = plotSuff+'allPeaks_'
ax.vlines(hvsr_data['hvsr_peak_freqs'][azimuth], ax.get_ylim()[0], ax.get_ylim()[1], colors='k', linestyles='dotted', label='Peak')
# Annotate all peaks
if 'ann' in plot_type:
for i, p in enumerate(hvsr_data['hvsr_peak_freqs'][azimuth]):
y = hvsr_data['hvsr_curve'][hvsr_data['hvsr_peak_indices'][azimuth][i]]
ax.annotate('Peak at '+str(round(p,2))+'Hz', (p, 0.1), xycoords='data',
horizontalalignment='center', verticalalignment='bottom',
bbox=dict(facecolor='w', edgecolor='none', alpha=0.8, pad=0.1))
plotSuff=plotSuff+'ann_'
# Show primary peak amplitude (and annotate if indicated)
if k=='pa':
ax.hlines([a0], ax.get_xlim()[0], f0, linestyles='dashed')
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint = True
peakLine = True
# Annotate primary peak amplitude
if 'ann' in plot_type:
ax.annotate(f"Peak Amp.: {a0:.2f}", [f0+0.1*f0, a0])
peakAmpAnn = True
# Show the curves and/or peaks at each time window
if 't' in k and 'test' not in k:
plotSuff = plotSuff+'allTWinCurves_'
# If this is a component subplot
if kwargs['subplot'] == 'comp':
if k == 'tp':
pass # This is not calculated for individual components
if k == 't':
azKeys = ['Z', 'E', 'N']
azKeys.extend(list(hvsr_data.hvsr_az.keys()))
azColors = {'Z':'k', 'E':'b', 'N':'r'}
for az in azKeys:
if az.upper() in azColors.keys():
col = azColors[az]
else:
col = 'g'
for pv, t in enumerate(np.stack(hvsrDF[used]['psd_values_'+az])):
ax.plot(x, t[:-1], color=col, alpha=0.2, linewidth=0.8, linestyle=':', zorder=0)
# For the main H/V plot
else:
# Show all peaks at all times (semitransparent red bars)
if k == 'tp':
for j, t in enumerate(hvsrDF[used]['CurvesPeakIndices_'+azimuth]):
for i, v in enumerate(t):
v= x[v]
if i==0:
width = (x[i+1]-x[i])/16
else:
width = (x[i]-x[i-1])/16
if j == 0 and i==0:
ax.fill_betweenx(ylim,v-width,v+width, color='r', alpha=0.05, label='Individual H/V Peaks')
else:
ax.fill_betweenx(ylim,v-width,v+width, color='r', alpha=0.05)
# Show curves at all time windows
if k == 't':
if used.sum() > 0:
for t in np.stack(hvsrDF[used]['HV_Curves']):
ax.plot(x, t, color='k', alpha=0.25, linewidth=0.8, linestyle=':')
if notused.sum() > 0:
for t in np.stack(hvsrDF[notused]['HV_Curves']):
ax.plot(x, t, color='orangered', alpha=0.666, linewidth=0.8, linestyle=':', zorder=0)
# Plot SESAME test results and thresholds on HVSR plot
if 'test' in k and kwargs['subplot'] == 'hvsr':
if k=='tests' or 'all' in k or ':' in k:
# Change k to pass all test plot conditions
k='test123456c'
if '1' in k:
# Peak is higher than 2x lowest point in f0/4-f0
# Plot the line threshold that the curve needs to cross
ax.plot([f0_div4, f0], [a0_div2, a0_div2], color='tab:blue', marker='|', linestyle='dashed')
# Get minimum of curve in desired range
indexList=[]
fList = []
for i, f in enumerate(hvsr_data.x_freqs['Z']):
if f >= f0_div4 and f <= f0:
indexList.append(i)
fList.append(f)
newCurveList= []
newFreqList = []
for ind in indexList:
if ind < len(hvsr_data.hvsr_curve):
newFreqList.append(hvsr_data.x_freqs['Z'][ind])
newCurveList.append(hvsr_data.hvsr_curve[ind])
curveTestList = list(np.ones_like(newFreqList) * a0_div2)
# Plot line showing where test succeeds or not
if hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'][-1] == sprit_utils._x_mark():
lowf2 = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'].replace('Hz', '').replace('-', '').split()[-3])
hif2 = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'].replace('Hz', '').replace('-', '').split()[-2])
ym = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'].replace('Hz', '').replace('-', '').split()[3])
yp = min(newCurveList)
ax.fill_betweenx(y=[ym, yp], x1=lowf2, x2=hif2, alpha=0.1, color='r')
else:
#fpass = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'].replace('Hz', '').replace('-', '').split()[3])
#fpassAmp = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f-)'].replace('Hz', '').replace('-', '').split()[5])
ax.fill_between(newFreqList, y1=newCurveList, y2=curveTestList, where=np.array(newCurveList)<=a0_div2, color='g', alpha=0.2)
minF = newFreqList[np.argmin(newCurveList)]
minA = min(newCurveList)
ax.plot([minF, minF, minF], [0, minA, a0_div2], marker='.', color='g', linestyle='dotted')
# Plot the Peak Point if not already
if not peakPoint:
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint=True
# Annotate the Peak Amplitude if not already
if not peakAmpAnn and 'ann' in plot_type:
ax.annotate(f"Peak Amp.: {a0:.2f}", [f0+0.1*f0, a0])
peakAmpAnn=True
# Add peak line
if 'pa' not in plot_type and not peakLine:
ax.hlines([a0], ax.get_xlim()[0], f0, linestyles='dashed')
peakLine = True
if '2' in k:
# Peak is higher than 2x lowest point in f0-f0*4
# Plot the line threshold that the curve needs to cross
ax.plot([f0, f0_mult4], [a0_div2, a0_div2], color='tab:blue', marker='|', linestyle='dashed')
# Get minimum of curve in desired range
indexList=[]
fList = []
for i, f in enumerate(hvsr_data.x_freqs['Z']):
if f >= f0 and f <= f0_mult4:
indexList.append(i)
fList.append(f)
newCurveList= []
newFreqList = []
for ind in indexList:
if ind < len(hvsr_data.hvsr_curve):
newFreqList.append(hvsr_data.x_freqs['Z'][ind])
newCurveList.append(hvsr_data.hvsr_curve[ind])
curveTestList = list(np.ones_like(newFreqList) * a0_div2)
if hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'][-1] == sprit_utils._x_mark():
lowf2 = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'].replace('Hz', '').replace('-', '').split()[-3])
hif2 = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'].replace('Hz', '').replace('-', '').split()[-2])
ym = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'].replace('Hz', '').replace('-', '').split()[3])
yp = min(newCurveList)
ax.fill_betweenx(y=[ym, yp], x1=lowf2, x2=hif2, alpha=0.1, color='r')
else:
#fpass = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'].replace('Hz', '').replace('-', '').split()[3])
#fpassAmp = float(hvsr_data['BestPeak'][azimuth]['Report']['A(f+)'].replace('Hz', '').replace('-', '').split()[5])
ax.fill_between(newFreqList, y1=newCurveList, y2=curveTestList, where=np.array(newCurveList)<=a0_div2, color='g', alpha=0.2)
minF = newFreqList[np.argmin(newCurveList)]
minA = min(newCurveList)
ax.plot([minF, minF, minF], [0, minA, a0_div2], marker='.', color='g', linestyle='dotted')
# Plot the Peak Point if not already
if not peakPoint:
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint=True
# Annotate the amplitude of peak point if not already
if not peakAmpAnn and 'ann' in plot_type:
ax.annotate(f"Peak Amp.: {a0:.2f}", [f0+0.1*f0, a0])
peakAmpAnn=True
if 'pa' not in plot_type and not peakLine:
ax.hlines([a0], ax.get_xlim()[0], f0, linestyles='dashed')
peakLine = True
if '3' in k:
if 'c' in k:
#Plot curve test3
lowfc3 = hvsr_data['BestPeak'][azimuth]['Report']['σ_A(f)'].split(' ')[4].split('-')[0]
hifc3 = hvsr_data['BestPeak'][azimuth]['Report']['σ_A(f)'].split(' ')[4].split('-')[1].replace('Hz', '')
pass # May not even finish this
lcolor='r'
if f0 > 2:
lcolor='g'
if 'c' not in k or all(num in k for num in ["1", "2", "3", "4", "5", "6"]):
ax.hlines([2], ax.get_xlim()[0], ax.get_xlim()[1], color='tab:blue', linestyles='dashed')
ax.plot([f0, f0], [2, a0], linestyle='dotted', color=lcolor)
if 'pa' not in plot_type:
ax.hlines([a0], ax.get_xlim()[0], f0, linestyles='dashed')
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint = True
peakLine = True
if '4' in k:
lowf4 = float(hvsr_data['BestPeak'][azimuth]['Report']['P-'].split(' ')[0])
hif4 = float(hvsr_data['BestPeak'][azimuth]['Report']['P+'].split(' ')[0])
m2Max = hvsr_data.x_freqs["Z"][np.argmax(hvsr_data.hvsrm2)]#, np.max(hvsr_data.hvsrm2))
p2Max = hvsr_data.x_freqs["Z"][np.argmax(hvsr_data.hvsrp2)]#, np.max(hvsr_data.hvsrp2))
# ax.vlines([f0*0.95, f0*1.05], [0,0], [ax.get_xlim()[1],ax.get_xlim()[1]])
ax.fill_betweenx(np.linspace(0, ax.get_xlim()[1]), x1=f0*0.95, x2=f0*1.05, color='tab:blue', alpha=0.3)
mcolor = 'r'
pcolor = 'r'
if hvsr_data['BestPeak'][azimuth]['Report']['P-'][-1] == sprit_utils._check_mark():
mcolor='g'
if hvsr_data['BestPeak'][azimuth]['Report']['P+'][-1] == sprit_utils._check_mark():
pcolor='g'
print(lowf4, hif4)
ax.scatter([lowf4, hif4], [np.max(hvsr_data.hvsrm2[azimuth]), np.max(hvsr_data.hvsrp2[azimuth])], c=[mcolor, pcolor], marker='x')
if not peakPoint:
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint = True
if '5' in k:
sf = float(hvsr_data['BestPeak'][azimuth]['Report']['Sf'].split(' ')[4].strip('()'))
sfp = f0+sf
sfm = f0-sf
sfLim = float(hvsr_data['BestPeak'][azimuth]['Report']['Sf'].split(' ')[-2])
sfLimp = f0+sfLim
sfLimm = f0-sfLim
if hvsr_data['BestPeak'][azimuth]['Report']['Sf'][-1] == sprit_utils._check_mark():
xColor = 'g'
else:
xColor='r'
ax.scatter([sfLimm, sfLimp], [a0, a0], marker='|', c='tab:blue')
ax.scatter([sfm, sfp], [a0, a0], marker='x', c=xColor)
ax.plot([sfLimm, sfLimp], [a0, a0], color='tab:blue')
if not peakPoint:
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint = True
if '6' in k:
sa = float(hvsr_data['BestPeak'][azimuth]['Report']['Sa'].split(' ')[4].strip('()'))
sap = a0+sa
sam = a0-sa
saLim = float(hvsr_data['BestPeak'][azimuth]['Report']['Sa'].split(' ')[-2])
saLimp = a0+saLim
saLimm = a0-saLim
if hvsr_data['BestPeak'][azimuth]['Report']['Sa'][-1] == sprit_utils._check_mark():
xColor = 'g'
else:
xColor='r'
ax.scatter([f0, f0], [saLimm, saLimp], marker='_', c='tab:blue')
ax.scatter([f0, f0],[sam, sap], marker='x', c=xColor)
ax.plot([f0, f0],[saLimm, saLimp], color='tab:blue')
if not peakPoint:
ax.scatter([f0], [a0], marker="o", facecolor='none', edgecolor='k')
peakPoint = True
# Plot frequency search range bars
if 'fr' in k:
lowPeakSearchThresh = hvsr_data.peak_freq_range[0]
hiPeakSearchThresh = hvsr_data.peak_freq_range[1]
frStyleDict = {'linestyle':'dashed', 'facecolors':'#1B060544', 'edgecolors':'#000000'}
ax.fill_betweenx(ylim, [xlim[0], xlim[0]],[lowPeakSearchThresh,lowPeakSearchThresh], **frStyleDict)
ax.fill_betweenx(ylim, [hiPeakSearchThresh, hiPeakSearchThresh],[xlim[1],xlim[1]], **frStyleDict)
# Plot individual components
if 'c' in k and 'test' not in k: #Spectrogram uses a different function, so c is unique to the component plot flag
plotSuff = plotSuff+'IndComponents_'
if 'c' not in plot_type[0]:
#This section is if comps plotted in hvsr axis
compAxis = ax.twinx()
plt.sca(compAxis)
#axis2 = plt.gca()
#fig = plt.gcf()
compAxis.set_ylabel(compLabel, rotation=270, labelpad=20)
#plt.sca(compAxis)
#plt.ylabel(compLabel, rotate=180)
compAxis.set_facecolor([0,0,0,0])
legendLoc2 = 'upper right'
else:
# This section is for if they are plotted on different plots
ax.set_title('') #Remove title
ax.sharex(kwargs['axes']['hvsr'])
compAxis = ax
legendLoc2 = 'upper right'
compAxis.set_ylabel(compLabel)
minY = []
maxY = []
keyList = ['Z', 'E', 'N']
for az in hvsr_data.hvsr_az.keys():
keyList.append(az)
keyList.sort()
hvsrDF = hvsr_data.hvsr_windows_df
for key in keyList:
#hvsr_data['psds'][key]['psd_values']
minY.append(hvsr_data['ppsd_std_vals_m'][key].min())
maxY.append(hvsr_data['ppsd_std_vals_p'][key].max())
#minY.append(np.min(np.stack(hvsrDF['psd_values_'+key][hvsrDF['Use']])))
#maxY.append(np.max(np.stack(hvsrDF['psd_values_'+key][hvsrDF['Use']])))
minY = min(minY)
maxY = max(maxY)
#if maxY > 20:
# maxY = max(hvsr_data['hvsr_curve']) * 1.15
rng = maxY-minY
pad = abs(rng * 0.15)
ylim = [float(minY-pad), float(maxY+pad+pad)]
compAxis.set_ylim(ylim)
yLoc = min(ylim) - abs(ylim[1]-ylim[0]) * 0.05
xlab = ax.text(x=xlim[0], y=yLoc, s=xlabel,
fontsize='x-small', horizontalalignment='right', verticalalignment='top',
bbox=dict(facecolor='w', edgecolor='none', alpha=0.8, pad=0.1))
xlab.set_in_layout(False)
#Modify based on whether there are multiple charts
if plotHVSR:
linalpha = 0.2
stdalpha = 0.05
else:
linalpha=1
stdalpha=0.2
#Plot individual components
azsLabeled = False
y={}
psdKeys = list(hvsr_data['psd_values_tavg'])
psdKeys.sort() # Put Z last so it plots on top
for key in psdKeys:
if key.upper() == 'Z':
pltColor = 'k'
elif key.upper() =='E':
pltColor = 'b'
elif key.upper() == 'N':
pltColor = 'r'
else:
pltColor = 'g'
if key in keyList or key == azimuth:
if hvsr_data.horizontal_method == 'Single Azimuth' and key in ['E', 'N']:
pass
else:
y[key] = hvsr_data['psd_values_tavg'][key][:-1]
# Make sure azimuth only shows up in legend once
if pltColor == 'g':
if azsLabeled:
leglabel = None
else:
leglabel = 'Azimuths'
azsLabeled = True
else:
leglabel = key
compAxis.plot(x, y[key], c=pltColor, label=leglabel, alpha=linalpha)
if '-s' not in plot_type:
compAxis.fill_between(x, hvsr_data['ppsd_std_vals_m'][key][:-1], hvsr_data['ppsd_std_vals_p'][key][:-1], color=pltColor, alpha=stdalpha)
if 'c' not in plot_type[0].lower():
if not kwargs['show_legend'] == False:
compAxis.legend(loc=legendLoc2)
else:
ax.legend(loc=legendLoc, ncols = len(psdKeys),
borderaxespad=0.1, columnspacing=1,markerfirst=False, reverse=True, borderpad=0.2)
else:
yLoc = min(ylim) - abs(ylim[1]-ylim[0]) * 0.05
ax.text(x=xlim[0], y=yLoc, s=xlabel,
fontsize='x-small', horizontalalignment='right', verticalalignment='top',
bbox=dict(facecolor='w', edgecolor='none', alpha=0.8, pad=0.1))
bbox = ax.get_window_extent()
bboxStart = bbox.__str__().find('Bbox(',0,50)+5
bboxStr = bbox.__str__()[bboxStart:].split(',')[:4]
axisbox = []
for i in bboxStr:
i = i.split('=')[1]
if ')' in i:
i = i[:-1]
axisbox.append(float(i))
if kwargs['show_legend']:
ax.legend(loc=legendLoc,bbox_to_anchor=(1.05, 1))
__plot_current_fig(save_dir=save_dir,
filename=filename,
fig=fig, ax=ax,
plot_suffix=plotSuff,
user_suffix=save_suffix,
show_plot=show_plot)
return fig, ax
# Private function to help for when to show and format and save plots
def __plot_current_fig(save_dir, filename, fig, ax, plot_suffix, user_suffix, show_plot):
"""Private function to support plot_hvsr, for plotting and showing plots"""
#plt.gca()
#plt.gcf()
#fig.tight_layout() #May need to uncomment this
#plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
if save_dir is not None:
outFile = save_dir+'/'+filename+'_'+plot_suffix+str(datetime.datetime.today().date())+'_'+user_suffix+'.png'
fig.savefig(outFile, bbox_inches='tight', pad_inches=0.2)
if show_plot:
fig.canvas.draw()#.show()
#fig.tight_layout()
#plt.ion()
return
# Plot specgtrogram, private supporting function for plot_hvsr
def _plot_specgram_hvsr(hvsr_data, fig=None, ax=None, azimuth='HV', save_dir=None, save_suffix='',**kwargs):
"""Private function for plotting average spectrogram of all three channels from psds
"""
# Get all input parameters
if fig is None and ax is None:
fig, ax = plt.subplots()
if 'kwargs' in kwargs.keys():
kwargs = kwargs['kwargs']
if 'spec' in kwargs.keys():
del kwargs['spec']
if 'p' in kwargs.keys():
peak_plot=True
del kwargs['p']
else:
peak_plot=False
if 'ann' in kwargs.keys():
annotate=True
del kwargs['ann']
else:
annotate=False
if 'all' in kwargs.keys():
show_all_peaks = True
del kwargs['all']
else:
show_all_peaks = False
if 'tp' in kwargs.keys():
show_all_time_peaks = True
del kwargs['tp']
else:
show_all_time_peaks = False
if 'grid' in kwargs.keys():
ax.grid(which=kwargs['grid'], alpha=0.25)
del kwargs['grid']
if 'ytype' in kwargs:
if kwargs['ytype']=='freq':
ylabel = 'Frequency [Hz]'
del kwargs['ytype']
else:
ylabel = 'Period [s]'
del kwargs['ytype']
else:
ylabel='Frequency [Hz]'
if 'detrend' in kwargs.keys():
detrend= kwargs['detrend']
del kwargs['detrend']
else:
detrend=True
if 'colorbar' in kwargs.keys():
colorbar = kwargs['colorbar']
del kwargs['colorbar']
else:
colorbar=True
if 'cmap' in kwargs.keys():
pass
else:
kwargs['cmap'] = 'turbo'
hvsrDF = hvsr_data['hvsr_windows_df']
used = hvsrDF['Use'].astype(bool)
notused = ~hvsrDF['Use'].astype(bool)
# Setup
psds = hvsr_data['psds']#[k]['current_times_used']
import matplotlib.dates as mdates
anyKey = list(psds.keys())[0]
# Get data
hvCurveColumn = 'HV_Curves'
if azimuth != 'HV':
hvCurveColumn += '_'+azimuth
psdArr = np.stack(hvsrDF[hvCurveColumn].apply(np.flip))
useArr = np.array(hvsrDF['Use'])
useArr = np.tile(useArr, (psdArr.shape[1], 1)).astype(int)
useArr = np.clip(useArr, a_min=0.15, a_max=1)
# Get times
xmin = hvsrDF['TimesProcessed_MPL'].min()
xmax = hvsrDF['TimesProcessed_MPL'].max()
#Format times
tTicks = mdates.MinuteLocator(byminute=range(0,60,5))
ax.xaxis.set_major_locator(tTicks)
tTicks_minor = mdates.SecondLocator(bysecond=[0])
ax.xaxis.set_minor_locator(tTicks_minor)
tLabels = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(tLabels)
ax.tick_params(axis='both', labelsize='x-small')
#Get day label for bottom of chart
if hvsrDF.index[0].date() != hvsrDF.index[-1].date():
day = str(hvsr_data['hvsr_windows_df'].index[0].date())+' - '+str(hvsr_data['hvsr_windows_df'].index[-1].date())
else:
day = str(hvsr_data['hvsr_windows_df'].index[0].date())
#Get extents
ymin = hvsr_data['input_params']['hvsr_band'][0]
ymax = hvsr_data['input_params']['hvsr_band'][1]
freqticks = np.flip(hvsr_data['x_freqs'][anyKey])
yminind = np.argmin(np.abs(ymin-freqticks))
ymaxind = np.argmin(np.abs(ymax-freqticks))
freqticks = freqticks[yminind:ymaxind]
freqticks = np.logspace(np.log10(freqticks[0]), np.log10(freqticks[-1]), num=psdArr.shape[1])
extList = [xmin, xmax, ymin, ymax]
#Set up axes
ax.set_facecolor([0,0,0]) #Create black background for transparency to look darker
# Interpolate into linear
new_indices = np.linspace(freqticks[0], freqticks[-1], len(freqticks))
linList = []
for row in psdArr:
row = row.astype(np.float16)
linList.append(np.interp(new_indices, freqticks, row))
linear_arr = np.stack(linList)
# Create chart
if 'subplot' in kwargs.keys():
del kwargs['subplot']
# Get min and max of colormap normalization from array that is used
if 'vmin' not in kwargs.keys():
kwargs['vmin'] = np.min(np.stack(hvsrDF[used]['HV_Curves']))
if 'vmax' not in kwargs.keys():
kwargs['vmax'] = np.max(np.stack(hvsrDF[used]['HV_Curves']))
im = ax.imshow(linear_arr.T, origin='lower', extent=extList, aspect='auto', alpha=useArr, **kwargs)
ax.tick_params(left=True, right=True, top=True)
if peak_plot:
ax.axhline(hvsr_data['BestPeak'][azimuth]['f0'], c='k', linestyle='dotted', zorder=1000)
if annotate:
if float(hvsr_data['BestPeak'][azimuth]['f0']) < 1:
boxYPerc = 0.998
vertAlign = 'top'
else:
boxYPerc = 0.002
vertAlign = 'bottom'
xLocation = float(xmin) + (float(xmax)-float(xmin))*0.99
yLocation = hvsr_data['input_params']['hvsr_band'][0] + (hvsr_data['input_params']['hvsr_band'][1]-hvsr_data['input_params']['hvsr_band'][0])*(boxYPerc)
ann = ax.text(x=xLocation, y=yLocation, fontsize='x-small', s=f"Peak at {hvsr_data['BestPeak'][azimuth]['f0']:0.2f} Hz", ha='right', va=vertAlign,
bbox={'alpha':0.8, 'edgecolor':None, 'linewidth':0, 'fc':'w', 'pad':0.3})
if show_all_time_peaks:
timeVals = []
peakFreqs = []
for tIndex, pFreqs in enumerate(hvsrDF[used]['CurvesPeakFreqs_'+azimuth]):
endWindow = hvsrDF.iloc[tIndex]['TimesProcessed_MPLEnd']
startWindow = hvsrDF.iloc[tIndex]['TimesProcessed_MPL']
midTime = (endWindow + startWindow) / 2
for f in pFreqs:
timeVals.append(midTime)
peakFreqs.append(f)
ax.scatter(timeVals, peakFreqs, marker="^", facecolors='#00000000', edgecolors='#00000088',s=12)
if show_all_peaks:
ax.hlines(hvsr_data['hvsr_peak_freqs'][azimuth], ax.get_xlim()[0], ax.get_xlim()[1], colors='gray', alpha=0.666, linestyles='dotted', zorder=999)
xLoc = xmin + (xmax - xmin) * 0.001
yLoc = ymin + (ymax - ymin) * 0.97
ax.text(x=xLoc, y=yLoc, s=day,
fontsize='small', horizontalalignment='left', verticalalignment='top',
bbox=dict(facecolor='w', edgecolor=None, linewidth=0, alpha=0.8, pad=0.2))
if colorbar:
cbar = plt.colorbar(mappable=im, orientation='horizontal')
cbar.set_label('H/V Ratio')
#Set x and y labels
yLoc = ymin - (ymin * 2.5e-1)
ax.text(x=xmin, y=yLoc,s="UTC Time",
fontsize='x-small', horizontalalignment='right', verticalalignment='top',
bbox=dict(facecolor='w', edgecolor='none', alpha=0.8, pad=0.1))
ax.set_ylabel(ylabel, fontsize='x-small')
ax.set_yscale('log')
#plt.sca(ax)
#plt.rcParams['figure.dpi'] = 500
#plt.rcParams['figure.figsize'] = (12,4)
fig.canvas.draw()
return fig, ax
# HELPER functions for checking peaks
# Initialize peaks
def __init_peaks(_x, _y, _index_list, _hvsr_band, peak_freq_range=DEFAULT_BAND, _min_peak_amp=0):
""" Initialize peaks.
Creates dictionary with relevant information and removes peaks in hvsr curve that are not relevant for data analysis (outside HVSR_band)
Parameters
----------
x : list-like obj
List with x-values (frequency or period values)
y : list-like obj
List with hvsr curve values
index_list : list or array_like
List with indices of peaks
_hvsr_band : list
Two-item list with low and high frequency to limit frequency range of data analysis extent
peak_freq_range : list
Two-item list with low and high frequency to limit frequency range for checking for peaks
_min_peak_amp : float
Minimum amplitude to be used for peak selection (to limit number of meaningless peaks found)
Returns
-------
_peak : list
List of dictionaries, one for each input peak
"""
_peak = list()
for _i in _index_list:
if (_hvsr_band[0] <= _x[_i] <= _hvsr_band[1]) and (peak_freq_range[0] <= _x[_i] <= peak_freq_range[1]) and (_y[_i]>_min_peak_amp):
_peak.append({'f0': float(_x[_i]), 'A0': float(_y[_i]),
'f-': None, 'f+': None, 'Sf': None, 'Sa': None,
'Score': 0,
'Report': {'Lw':'', 'Nc':'', 'σ_A(f)':'', 'A(f-)':'', 'A(f+)':'', 'A0': '', 'P+': '', 'P-': '', 'Sf': '', 'Sa': ''},
'PassList':{},
'PeakPasses':False})
return _peak
# Check reliability of HVSR of curve
def __check_curve_reliability(hvsr_data, _peak, col_id='HV'):
"""Tests to check for reliable H/V curve
Tests include:
1) Peak frequency is greater than 10 / window length (f0 > 10 / Lw)
f0 = peak frequency [Hz]
Lw = window length [seconds]
2) Number of significant cycles (Nc) is greater than 200 (Nc(f0) > 200)
Nc = Lw * Nw * f0
Lw = window length [sec]
Nw = Number of windows used in analysis
f0 = peak frequency [Hz]
3) StDev of amplitude of H/V curve is less than 2 at all frequencies between 0.5f0 and 2f0
(less than 3 if f0 is less than 0.5 Hz)
f0 = peak frequency [Hz]
StDev is a measure of the variation of all the H/V curves generated for each time window
Our main H/V curve is the median of these
Parameters
----------
hvsr_data : dict
Dictionary containing all important information generated about HVSR curve
_peak : list
A list of dictionaries, with each dictionary containing information about each peak
Returns
-------
_peak : list
List of dictionaries, same as above, except with information about curve reliability tests added
"""
anyKey = list(hvsr_data['psds'].keys())[0]#Doesn't matter which channel we use as key
delta = hvsr_data['psds'][anyKey]['delta']
window_len = hvsr_data['psds'][anyKey]['ppsd_length'] #Window length in seconds
window_num = np.array(hvsr_data['psd_raw'][anyKey]).shape[0]
for _i in range(len(_peak)):
# Test 1
peakFreq= _peak[_i]['f0']
test1 = peakFreq > 10/window_len
nc = window_len * window_num * peakFreq
test2 = nc > 200
halfF0 = peakFreq/2
doublef0 = peakFreq*2
test3 = True
failCount = 0
for i, freq in enumerate(hvsr_data['x_freqs'][anyKey][:-1]):
if freq >= halfF0 and freq <doublef0:
compVal = 2
if peakFreq >= 0.5:
if hvsr_data['hvsr_log_std'][col_id][i] >= compVal:
test3=False
failCount +=1
else: #if peak freq is less than 0.5
compVal = 3
if hvsr_data['hvsr_log_std'][col_id][i] >= compVal:
test3=False
failCount +=1
if test1:
_peak[_i]['Report']['Lw'] = f'{round(peakFreq,3)} > {10/int(window_len):0.3} (10 / {int(window_len)}) {sprit_utils._check_mark()}'
else:
_peak[_i]['Report']['Lw'] = f'{round(peakFreq,3)} > {10/int(window_len):0.3} (10 / {int(window_len)}) {sprit_utils._x_mark()}'
if test2:
_peak[_i]['Report']['Nc'] = f'{int(nc)} > 200 {sprit_utils._check_mark()}'
else:
_peak[_i]['Report']['Nc'] = f'{int(nc)} > 200 {sprit_utils._x_mark()}'
if test3:
_peak[_i]['Report']['σ_A(f)'] = f'H/V Amp. St.Dev. for {peakFreq*0.5:0.3f}-{peakFreq*2:0.3f}Hz < {compVal} {sprit_utils._check_mark()}'
else:
_peak[_i]['Report']['σ_A(f)'] = f'H/V Amp. St.Dev. for {peakFreq*0.5:0.3f}-{peakFreq*2:0.3f}Hz < {compVal} {sprit_utils._x_mark()}'
_peak[_i]['PassList']['WinLen'] = test1
_peak[_i]['PassList']['SigCycles'] = test2
_peak[_i]['PassList']['LowCurveStD'] = test3
return _peak
# Check clarity of peaks
def __check_clarity(_x, _y, _peak, do_rank=True):
"""Check clarity of peak amplitude(s)
Test peaks for satisfying amplitude clarity conditions as outlined by SESAME 2004:
- there exist one frequency f-, lying between f0/4 and f0, such that A0 / A(f-) > 2
- there exist one frequency f+, lying between f0 and 4*f0, such that A0 / A(f+) > 2
- A0 > 2
Parameters
----------
x : list-like obj
List with x-values (frequency or period values)
y : list-like obj
List with hvsr curve values
_peak : list
List with dictionaries for each peak, containing info about that peak
do_rank : bool, default=False
Include Rank in output
Returns
-------
_peak : list
List of dictionaries, each containing the clarity test information for the different peaks that were read in
"""
global max_rank
# Test each _peak for clarity.
if do_rank:
max_rank += 1
if np.array(_x).shape[0] == 1000:
jstart = len(_y)-2
else:
jstart = len(_y)-1
for _i in range(len(_peak)):
#Initialize as False
_peak[_i]['f-'] = sprit_utils._x_mark()
_peak[_i]['Report']['A(f-)'] = f"H/V curve > {_peak[_i]['A0']/2:0.2f} for all {_peak[_i]['f0']/4:0.2f} Hz-{_peak[_i]['f0']:0.3f} Hz {sprit_utils._x_mark()}"
_peak[_i]['PassList']['ProminenceLow'] = False #Start with assumption that it is False until we find an instance where it is True
for _j in range(jstart, -1, -1):
# There exist one frequency f-, lying between f0/4 and f0, such that A0 / A(f-) > 2.
if (float(_peak[_i]['f0']) / 4.0 <= _x[_j] < float(_peak[_i]['f0'])) and float(_peak[_i]['A0']) / _y[_j] > 2.0:
_peak[_i]['Score'] += 1
_peak[_i]['f-'] = '%10.3f %1s' % (_x[_j], sprit_utils._check_mark())
_peak[_i]['Report']['A(f-)'] = f"Amp. of H/V Curve @{_x[_j]:0.3f}Hz ({_y[_j]:0.3f}) < {_peak[_i]['A0']/2:0.3f} {sprit_utils._check_mark()}"
_peak[_i]['PassList']['ProminenceLow'] = True
break
else:
pass
if do_rank:
max_rank += 1
for _i in range(len(_peak)):
#Initialize as False
_peak[_i]['f+'] = sprit_utils._x_mark()
_peak[_i]['Report']['A(f+)'] = f"H/V curve > {_peak[_i]['A0']/2:0.2f} for all {_peak[_i]['f0']:0.2f} Hz-{_peak[_i]['f0']*4:0.3f} Hz {sprit_utils._x_mark()}"
_peak[_i]['PassList']['ProminenceHi'] = False
for _j in range(len(_x) - 1):
# There exist one frequency f+, lying between f0 and 4*f0, such that A0 / A(f+) > 2.
if float(_peak[_i]['f0']) * 4.0 >= _x[_j] > float(_peak[_i]['f0']) and \
float(_peak[_i]['A0']) / _y[_j] > 2.0:
_peak[_i]['Score'] += 1
_peak[_i]['f+'] = f"{_x[_j]:0.3f} {sprit_utils._check_mark()}"
_peak[_i]['Report']['A(f+)'] = f"H/V Curve at {_x[_j]:0.2f} Hz: {_y[_j]:0.2f} < {_peak[_i]['A0']/2:0.2f} (f0/2) {sprit_utils._check_mark()}"
_peak[_i]['PassList']['ProminenceHi'] = True
break
else:
pass
# Amplitude Clarity test
# Only peaks with A0 > 2 pass
if do_rank:
max_rank += 1
_a0 = 2.0
for _i in range(len(_peak)):
if float(_peak[_i]['A0']) > _a0:
_peak[_i]['Report']['A0'] = f"Amplitude of peak ({_peak[_i]['A0']:0.2f}) > {int(_a0)} {sprit_utils._check_mark()}"
_peak[_i]['Score'] += 1
_peak[_i]['PassList']['AmpClarity'] = True
else:
_peak[_i]['Report']['A0'] = '%0.2f > %0.1f %1s' % (_peak[_i]['A0'], _a0, sprit_utils._x_mark())
_peak[_i]['PassList']['AmpClarity'] = False
return _peak
# Check the stability of the frequency peak
def __check_freq_stability(_peak, _peakm, _peakp):
"""Test peaks for satisfying stability conditions
Test as outlined by SESAME 2004:
- the _peak should appear at the same frequency (within a percentage ± 5%) on the H/V
curves corresponding to mean + and - one standard deviation.
Parameters
----------
_peak : list
List of dictionaries containing input information about peak, without freq stability test
_peakm : list
List of dictionaries containing input information about peakm (peak minus one StDev in freq)
_peakp : list
List of dictionaries containing input information about peak (peak plus one StDev in freq)
Returns
-------
_peak : list
List of dictionaries containing output information about peak test
"""
global max_rank
# check σf and σA
max_rank += 1
# First check below
# Initialize list
_found_m = list()
for _i in range(len(_peak)):
_dx = 1000000.
# Initialize test as not passing for this frequency
_found_m.append(False)
_peak[_i]['Report']['P-'] = sprit_utils._x_mark()
# Iterate through all time windows
for _j in range(len(_peakm)):
if abs(_peakm[_j]['f0'] - _peak[_i]['f0']) < _dx:
_index = _j
_dx = abs(_peakm[_j]['f0'] - _peak[_i]['f0']) #_dx is difference between peak frequencies for each time window and main peak
if _peak[_i]['f0'] * 0.95 <= _peakm[_j]['f0'] <= _peak[_i]['f0'] * 1.05:
_peak[_i]['Report']['P-'] = f"{_peakm[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._check_mark()}"
_found_m[_i] = True
break
if _peak[_i]['Report']['P-'] == sprit_utils._x_mark():
_peak[_i]['Report']['P-'] = f"{_peakm[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._x_mark()}"
# Then Check above
_found_p = list()
for _i in range(len(_peak)):
_dx = 1000000.
_found_p.append(False)
_peak[_i]['Report']['P+'] = sprit_utils._x_mark()
for _j in range(len(_peakp)):
if abs(_peakp[_j]['f0'] - _peak[_i]['f0']) < _dx:
_dx = abs(_peakp[_j]['f0'] - _peak[_i]['f0'])
if _peak[_i]['f0'] * 0.95 <= _peakp[_j]['f0'] <= _peak[_i]['f0'] * 1.05:
if _found_m[_i]:
_peak[_i]['Report']['P+'] = f"{_peakp[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._check_mark()}"
_peak[_i]['Score'] += 1
_peak[_i]['PassList']['FreqStability'] = True
else:
_peak[_i]['Report']['P+'] = f"{_peakp[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._x_mark()}"
_peak[_i]['PassList']['FreqStability'] = False
break
else:
_peak[_i]['Report']['P+'] = f"{_peakp[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._x_mark()}"
_peak[_i]['PassList']['FreqStability'] = False
if _peak[_i]['Report']['P+'] == sprit_utils._x_mark() and len(_peakp) > 0:
_peak[_i]['Report']['P+'] = f"{_peakp[_j]['f0']:0.2f} Hz within ±5% of {_peak[_i]['f0']:0.2f} Hz {sprit_utils._x_mark()}"
return _peak
# Check stability
def __check_stability(_stdf, _peak, _hvsr_log_std, rank):
"""Test peaks for satisfying stability conditions as outlined by SESAME 2004
This includes:
- σf lower than a frequency dependent threshold ε(f)
- σA (f0) lower than a frequency dependent threshold θ(f),
Parameters
----------
_stdf : list
List with dictionaries containint frequency standard deviation for each peak
_peak : list
List of dictionaries containing input information about peak, without freq stability test
_hvsr_log_std : list
List of dictionaries containing log standard deviation along curve
rank : int
Integer value, higher value is "higher-ranked" peak, helps determine which peak is actual hvsr peak
Returns
-------
_peak : list
List of dictionaries containing output information about peak test
"""
global max_rank
#
# check σf and σA
#
if rank:
max_rank += 2
for _i in range(len(_peak)):
_peak[_i]['Sf'] = _stdf[_i]
_peak[_i]['Sa'] = _hvsr_log_std[_i]
_this_peak = _peak[_i]
if _this_peak['f0'] < 0.2:
_e = 0.25
if _stdf[_i] < _e * _this_peak['f0']:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Freq'] = True
else:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._x_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
_t = 0.48
if _hvsr_log_std[_i] < _t:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Amp'] = True
else:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['PassList']['LowStDev_Amp'] = False
elif 0.2 <= _this_peak['f0'] < 0.5:
_e = 0.2
if _stdf[_i] < _e * _this_peak['f0']:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Freq'] = True
else:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._x_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
_t = 0.40
if _hvsr_log_std[_i] < _t:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Amp'] = True
else:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['PassList']['LowStDev_Amp'] = False
elif 0.5 <= _this_peak['f0'] < 1.0:
_e = 0.15
if _stdf[_i] < _e * _this_peak['f0']:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Freq'] = True
else:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._x_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
_t = 0.3
if _hvsr_log_std[_i] < _t:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Amp'] = True
else:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['PassList']['LowStDev_Amp'] = False
elif 1.0 <= _this_peak['f0'] <= 2.0:
_e = 0.1
if _stdf[_i] < _e * _this_peak['f0']:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Freq'] = True
else:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._x_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
_t = 0.25
if _hvsr_log_std[_i] < _t:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Amp'] = True
else:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['PassList']['LowStDev_Amp'] = False
elif _this_peak['f0'] > 0.2:
_e = 0.05
if _stdf[_i] < _e * _this_peak['f0']:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Freq'] = True
else:
_peak[_i]['Report']['Sf'] = f"St.Dev. of Peak Freq. ({_stdf[_i]:0.2f}) < {(_e * _this_peak['f0']):0.3f} {sprit_utils._x_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
_t = 0.2
if _hvsr_log_std[_i] < _t:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['Score'] += 1
_this_peak['PassList']['LowStDev_Amp'] = True
else:
_peak[_i]['Report']['Sa'] = f"St.Dev. of Peak Amp. ({_hvsr_log_std[_i]:0.3f}) < {_t:0.2f} {sprit_utils._check_mark()}"
_this_peak['PassList']['LowStDev_Freq'] = False
return _peak
# Get frequency standard deviation
def __get_stdf(x_values, indexList, hvsrPeaks):
"""Private function to get frequency standard deviation of peak(s) of interest, from multiple time-step HVSR curves
Paramaters
----------
x_values : list or np.array
Array of x_values of dataset (frequency or period, most often frequency)
indexList : list
List of index/indices of peak(s) of interest, (index is within the x_values list)
Returns
-------
stdf : list
List of standard deviations of the peak
"""
stdf = list()
# Go through list containing all peak indices (often, just a single index of the main peak)
for index in indexList:
point = list()
# Iterate to get index for all rows of pandas series,
# each row contains a list of peak indices for the H/V curve from that time window
for j in range(len(hvsrPeaks)):
p = None
# Iterate through each peak in each time window
for k in range(len(hvsrPeaks.iloc[j])):
if p is None:
p = hvsrPeaks.iloc[j][k]
else:
# Find frequency peak closest in the current time window to the (current) hvsr peak
if abs(index - hvsrPeaks.iloc[j][k]) < abs(index - p):
p = hvsrPeaks.iloc[j][k]
# p = hvsrPeaks[j][k]
# print(p=p1, p, p1)
if p is not None:
# It should never be None, this is just a double check
# Append the index of interest for that time window
point.append(p)
# Append the last index
point.append(index)
v = list()
# Get all the actual frequencies (go through each index and extract the frequency from x_values)
for pl in range(len(point)):
v.append(x_values[point[pl]])
# stdf is a list in case there are multiple peaks to check.
# Most of the time this is only a 1-item list
# Contains std of frequencies of the peaks from each time window H/V curve that are closest to the main H/V peak
stdf.append(np.std(v))
return stdf