In [2]:
import pandas as p
import math
import numpy as np
from sklearn.model_selection import train_test_split 

file = p.read_csv('files/lorawan/lorawan_dataset_antwerp.csv') # reading file

#### Remove entries with less than 3 gateways #### 
columns = file.columns
x = file[columns[0:72]]  # Get basestations' RSS readings
c = (x == -200).astype(int).sum(axis=1) # counting the amount of not-receiving gateways per message
c = 72 - c  # counting the amount of receiving gateways per message
c = c.tolist()

# finding indices of messages with less than 3 receiving gateways, and dropping these messages from the dataset file
indices = list()
for i in range(len(c)):
    element = c[i]
    if element <3:
        indices.append(i)  # appending all indices of messages with fewer than 3 receiving gateways

print(file.shape) # size before...
file = file.drop(indices) # dropping all entries with fewer than 3 receiving gateways
print(file.shape) # ... and size after the dropping


#### Dataset preparation for ML pipeline
columns = file.columns
x = file[columns[0:72]] #features (RSS receptions)
y = file[columns[75:]] # target (locations)
HDOP = np.expand_dims(file['HDOP'], axis=1) 
random_state = 42

print(HDOP.shape)

# Train, validation, test set splitting, (70%/15%/15%)
x_train, x_test_val, y_train, y_test_val = train_test_split(x.values, y.values, test_size=0.3, random_state=random_state)
x_val, x_test, y_val, y_test = train_test_split(x_test_val, y_test_val, test_size=0.5, random_state=random_state)
HDOP_train, HDOP_test_val, _, _ = train_test_split(HDOP, HDOP, test_size=0.3, random_state=random_state)
HDOP_val, HDOP_test, _, _ = train_test_split(HDOP_test_val, HDOP_test_val, test_size=0.5, random_state=random_state)

x_train_df = p.DataFrame(x_train, columns=x.columns.values.tolist())
x_val_df = p.DataFrame(x_val, columns=x.columns.values.tolist())
x_test_df = p.DataFrame(x_test, columns=x.columns.values.tolist())
x_train_df.to_csv('files/x_train.csv', index=False)
x_val_df.to_csv('files/x_val.csv', index=False)
x_test_df.to_csv('files/x_test.csv', index=False)


y_train_df = p.DataFrame(y_train, columns=y.columns.values.tolist())
y_val_df = p.DataFrame(y_val, columns=y.columns.values.tolist())
y_test_df = p.DataFrame(y_test, columns=y.columns.values.tolist())
y_train_df.to_csv('files/y_train.csv', index=False)
y_val_df.to_csv('files/y_val.csv', index=False)
y_test_df.to_csv('files/y_test.csv', index=False)


HDOP_train_df = p.DataFrame(HDOP_train, columns=['HDOP'])
HDOP_val_df = p.DataFrame(HDOP_val, columns=['HDOP'])
HDOP_test_df = p.DataFrame(HDOP_test, columns=['HDOP'])
HDOP_train_df.to_csv('files/HDOP_train.csv', index=False)
HDOP_val_df.to_csv('files/HDOP_val.csv', index=False)
HDOP_test_df.to_csv('files/HDOP_test.csv', index=False)




(130429, 77)
(55375, 77)
(55375, 1)


In [3]:
import json
with open('files/lorawan/lorawan_antwerp_gateway_locations.json') as json_file:
    gateway_locations = json.load(json_file)
    
gateway_locations_list =[]
for gateway in gateway_locations.values():
    lat = gateway['latitude']
    lon = gateway['longitude']
    gateway_locations_list.append((lat,lon))
print(gateway_locations_list)



[(50.898949, 4.437971), (51.219257, 4.413227), (51.031765, 3.752673), (51.024288, 4.172601), (51.120323, 5.266541), (50.991409, 5.41839), (51.247723, 4.432333), (50.811005, 4.552141), (51.217793, 4.426748), (51.382084, 4.756686), (51.13298, 4.339783), (50.948193, 5.390654), (51.417225, 4.764807), (50.986633, 4.822298), (51.058033, 4.836339), (51.062271, 3.739041), (51.199253, 4.498773), (51.017212, 3.772769), (51.206081, 4.77691), (50.972889, 5.463193), (50.888924, 4.446842), (51.053875, 3.732365), (50.822422, 5.215035), (51.241821, 4.449746), (51.014141, 3.727952), (51.309242, 4.866507), (50.887886, 3.884478), (51.313484, 3.118006), (50.767448, 3.939036), (51.223225, 4.407838), (51.017372, 4.479738), (51.144844, 4.16816), (50.854164, 5.468602), (51.146603, 4.058345), (51.148735, 2.724441), (50.878448, 4.700553), (51.223267, 4.478827), (51.127098, 4.827172), (51.195278, 4.426771), (50.941864, 5.466932), (50.935932, 4.473584), (51.194767, 4.380632), (51.235054, 4.830704), (51.073723, 5.