In [3]:
# IMPORTS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import math 
import pylab 

from matplotlib.dates import DateFormatter
from glob import glob
from os import path
from collections import defaultdict
from virocon import (
    GlobalHierarchicalModel,
    ExponentiatedWeibullDistribution,
    WeibullDistribution,
    DependenceFunction,
    WidthOfIntervalSlicer,
    plot_marginal_quantiles,
    plot_dependence_functions,
)

import sys
sys.path.insert(0, path.abspath(path.join(path.curdir, '../src')))

%matplotlib notebook

# WIND

In [8]:
# concatenate environmental data

path_wind = 'environmental_data_all/lidar'
wind_files = glob(path_wind + "/*.pickle")

wind_data = []

for filename in wind_files:
    wind_df = pd.read_pickle(filename)
    wind_data.append(wind_df)

wind_frame = pd.concat(wind_data)
wind_frame= wind_frame.drop_duplicates()
wind_frame = wind_frame.sort_index()
wind_frame1= wind_frame.resample('1s').ffill()
wind_frame2= wind_frame.resample('1min').mean()

wind_frame1.index= wind_frame1.index.tz_convert('UTC')
wind_frame2.index= wind_frame2.index.tz_convert('UTC')


wind_frame1.to_csv('wind_data_resampled1s_UTC.csv')
wind_frame2.to_csv('wind_data_resampled1min_UTC.csv')

In [9]:
wind_data1 = pd.read_csv('wind_data_resampled1s_UTC.csv')
wind_data2 = pd.read_csv('wind_data_resampled1min_UTC.csv')

In [8]:
wind_data1= wind_data1.set_index('datetime')
wind_data2= wind_data2.set_index('datetime')
print(wind_data2.index)

Index(['2019-09-01 00:00:00+00:00', '2019-09-01 00:01:00+00:00',
       '2019-09-01 00:02:00+00:00', '2019-09-01 00:03:00+00:00',
       '2019-09-01 00:04:00+00:00', '2019-09-01 00:05:00+00:00',
       '2019-09-01 00:06:00+00:00', '2019-09-01 00:07:00+00:00',
       '2019-09-01 00:08:00+00:00', '2019-09-01 00:09:00+00:00',
       ...
       '2019-12-25 23:50:00+00:00', '2019-12-25 23:51:00+00:00',
       '2019-12-25 23:52:00+00:00', '2019-12-25 23:53:00+00:00',
       '2019-12-25 23:54:00+00:00', '2019-12-25 23:55:00+00:00',
       '2019-12-25 23:56:00+00:00', '2019-12-25 23:57:00+00:00',
       '2019-12-25 23:58:00+00:00', '2019-12-25 23:59:00+00:00'],
      dtype='object', name='datetime', length=167040)


In [12]:
print(wind_data1.index[-1])
print(wind_data2.index[-1])
print(len(wind_data1))
print(len(wind_data2))

2019-12-25 23:59:59+00:00
2019-12-25 23:59:00+00:00
10022400
167040


# WAVES

In [10]:
# concatenate environmental data

path_waves = 'environmental_data_all/waves'
wave_files = glob(path_waves + "/*.pickle")

wave_data = []

for filename in wave_files:
    wave_df = pd.read_pickle(filename)
    wave_data.append(wave_df)

wave_frame = pd.concat(wave_data)

wave_frame.insert(column= 'time', value= pd.to_datetime(wave_frame['Zeitpunkt gerundet']), loc= 0)
wave_frame.set_index('time', inplace=True)
wave_frame.index= wave_frame.index.tz_localize('UTC')

wave_frame.fillna(method= 'ffill', inplace=True)

wave_frame= wave_frame[~wave_frame.index.duplicated(keep='first')]
print(wave_frame.index.is_unique)


True


In [18]:
wave_frame= wave_frame.resample('1min').ffill()
#print(wave_frame)

                           Zeitpunkt gerundet                      Date/Time  \
time                                                                           
2019-08-03 00:00:00+00:00 2019-08-03 00:00:00  2019-08-03T00:05:00.000+02:00   
2019-08-03 00:01:00+00:00 2019-08-03 00:00:00  2019-08-03T00:05:00.000+02:00   
2019-08-03 00:02:00+00:00 2019-08-03 00:00:00  2019-08-03T00:05:00.000+02:00   
2019-08-03 00:03:00+00:00 2019-08-03 00:00:00  2019-08-03T00:05:00.000+02:00   
2019-08-03 00:04:00+00:00 2019-08-03 00:00:00  2019-08-03T00:05:00.000+02:00   
...                                       ...                            ...   
2019-12-25 23:26:00+00:00 2019-12-25 23:00:00  2019-12-25T23:04:59,999+01:00   
2019-12-25 23:27:00+00:00 2019-12-25 23:00:00  2019-12-25T23:04:59,999+01:00   
2019-12-25 23:28:00+00:00 2019-12-25 23:00:00  2019-12-25T23:04:59,999+01:00   
2019-12-25 23:29:00+00:00 2019-12-25 23:00:00  2019-12-25T23:04:59,999+01:00   
2019-12-25 23:30:00+00:00 2019-12-25 23:

In [19]:
wave_frame.to_csv('wave_data_resampled1min_UTC.csv')