# Import packages
import numpy as np
np.random.seed(123)
np.set_printoptions(suppress=True)
import pandas as pd
import matplotlib.pyplot as plt
import pygmo as pg
import random
random.seed(123)
import operator
import copy
from collections import defaultdict
import math
# Read "treatment_centers.csv". This file has the following information:
# 1. Name of the treatment center;
# 2. Latitude and Logitude (as 2 different columns) of the above mentioned treatment centers;
# 3. Demand allocated to each treatment center
hospitals = pd.read_csv('INPUT/treatment_centers.csv') # turn into array
hospitals_array = np.array(hospitals[['Longitude','Latitude','Demand']]) # Hospital names are irrelevant
hospitals.head()
# Read "1000_largest_us_cities_raw_cost.csv". This file has the following information:
# 1. 1000 candidate locations for the manufacturing and cryopreservation facilities;
# - these were chosen based on the 1000 biggest cities in the USA;
# 2. Rank (i.e. this is the rank by size) and population size for each of the candidate locations;
# 3. State within each city is located;
# 4. Latitude and Logitude (as 2 separate columns) for each of the candidate locations;
# 5. The cost/order calculated for each of the candidate locations;
# - this is random at this stage and calculated at state level rather than candidate location;
locations = pd.read_csv('INPUT/1000_largest_us_cities_raw_cost.csv')
locations.head()
# Create construction costs per State for MANU and CRYO
construction_df = pd.DataFrame(locations.State.unique(),columns=['State'])
# SIMULATE CONSTRUCTION COSTS
# Construction cost is created randomly between 700k and 1mil for manufacturing
construction_df['Constr_manu'] = np.random.randint(700,1000, size = len(construction_df))*1000
# Construction cost is created randomly between 100k and 250k for cryopreservation
construction_df['Constr_cryo'] = np.random.randint(100,250, size = len(construction_df))*1000
# Merge random construction data with locations dataframe and sort by index; drop index
locations = pd.merge(locations.reset_index(),construction_df,
on = 'State').sort_values(['index']).drop(columns = ['index']).reset_index(drop = True)
# turn into array; City, Rank, State, and Population are not relevant anymore
loc_array = np.array(locations[['Longitude','Latitude','Cost_order','Constr_manu','Constr_cryo']])
locations.head()
# Haversine distance formula
def haversine(A_lon,A_lat,B_lon,B_lat):
A_lon,A_lat,B_lon,B_lat = map(np.radians, [A_lon,A_lat,B_lon,B_lat])
dlon = B_lon - A_lon
dlat = B_lat - A_lat
a = np.sin(dlat/2.0)**2 + np.cos(A_lat) * np.cos(B_lat) * np.sin(dlon/2.0)**2
c = 2.0 * 6371.0 # Replace 6371 with 3958.756 if you want the answer in miles.
return c * np.arcsin(np.sqrt(a))
### HYPERPARAMETERS
# Fixed hyperparameters
SPEED = 110 # Average speed in America (km)
TOTAL_DEMAND = sum(hospitals_array[:,2]) # Demand in the problem
NR_LOCATIONS = len(loc_array) # Nr of candidate locations
# INSERT SHELF LIFE HERE (in hours)
SHELF_LIFE = 12
# INSERT CAP FOR MAXIMUM OF UNCOVERED HOSPITALS OBJECTIVE
MAX_UNCOVERED = 0.2
# MORS hyperparameters (N = Population x Generations)
EVALUATIONS = 20
MANU_PROP = 0.01 # Probability of manufacturing locations
POPULATION = 500
PARENTS = math.ceil(POPULATION/4) # Parents are a quarter of the population size
GENERATIONS = 500
PF_SIZE_MAX = 200 # Maxim Pareto Front size
### CALCULATE SHELF-LIFE RADIUSES OF LOCATIONS FOR CRYO AND MANUFACTURING
# Calcute radiuses for ALL candidate locations based on shelf-life
# for both Cryopreservation and Manufacturing facilities
def within_shelflife(shelf_life):
hospitals['del'] = 1 # fake identical column for merging long and lat between
locations['del'] = 1 # locations and hospitals
hospitals_sl = hospitals[['del','Longitude','Latitude']].merge(locations[['del','Longitude','Latitude']],
on='del', suffixes=('_H', '_L')).drop('del', axis=1)
hospitals.drop('del', axis=1, inplace = True)
locations.drop('del', axis=1, inplace = True)
# Calculate distance and order time for all hospitals with each candidate location
dist_km = haversine(*np.array(hospitals_sl).T)
order_time = dist_km / SPEED
# MANUFACTURING
# returns one dimensional coordinates that satisfy shelf life constraints
# divided by 2 since for manufacturing the shelf life needs to be a return route
good_locations = np.where(order_time <= shelf_life/2)
# converts one dimensional array to pairs of hospital-manufacturing that satisfy shelf life
pairs = np.asarray(np.unravel_index(good_locations, (len(hospitals),len(locations))))
pairs = pairs.reshape(pairs.shape[0],pairs.shape[-1])
# convert to dataframe
pairs_df = pd.DataFrame(pairs.T, columns = ['Hosp','Manu'])
# aggregate for each location the hospitals that are within the shelf-life if
# a MANU were to be placed there
hospitals_for_manu = pairs_df.groupby('Manu')['Hosp'].apply(set).to_dict()
# CRYOPRESERVATION
# returns one dimensional coordinates that satisfy shelf life constraints
# no return needed as shelf-life is deactivated once product is frozen
good_locations = np.where(order_time <= shelf_life)
# converts one dimensional array to pairs of hospital-manufacturing that satisfy shelf life
pairs = np.asarray(np.unravel_index(good_locations, (len(hospitals),len(locations))))
pairs = pairs.reshape(pairs.shape[0],pairs.shape[-1])
# convert to dataframe
pairs_df = pd.DataFrame(pairs.T, columns = ['Hosp','Cryo'])
# aggregate for each location the hospitals that are within the shelf-life if
# a CRYO were to be placed there
hospitals_for_cryo = pairs_df.groupby('Cryo')['Hosp'].apply(set).to_dict()
return hospitals_for_manu, hospitals_for_cryo
hosp_for_manu, hosp_for_cryo = within_shelflife(SHELF_LIFE)
# Add locations with no hospital locations in shelf-life range
def full_locations(a_dict):
# adds missing locations from dictionary
full_dict = dict.fromkeys(range(NR_LOCATIONS), set())
for loc in a_dict:
full_dict[loc] = a_dict[loc]
return full_dict
# add missing locations that have 0 hospitals within shelf life
hosp_for_manu = full_locations(hosp_for_manu)
hosp_for_cryo = full_locations(hosp_for_cryo)
### DEFINE HOSPITALS
# Hospital class determines the allocation of hospitals given a set of manufacturing and cryopreservation locations
class Hosp:
def __init__(self, x, y, demand):
self.x = x # Latitude
self.y = y # Longitude
self.demand = demand # Estimated demand for given hospital
self.time = 0.0
self.cost = 0.0
#self.covered = True
self.cryo = None
self.manu = None
def distance(self, p2, func = haversine):
return func(self.x, self.y, p2.x, p2.y)
def __repr__(self):
return "(" + str(self.x) + "," + str(self.y) + ")"
# Returns sorted list by preference of manufacturing locations given a candidate manufacturing solution
def preference(self, loc_list, func = haversine):
locs = np.array(loc_list)
H_x = np.full(len(locs), self.x)
H_y = np.full(len(locs), self.y)
L_x = loc_array[locs,0] # longitude
L_y = loc_array[locs,1] # latitude
dist = func(H_x,H_y,L_x,L_y)
return locs[np.argsort(dist)]
# Define list of HOSPITALS
hospitals_list = [Hosp(*row) for row in hospitals_array] #list
# Create array of LxH where cells are d_h when h within cryo shelflife of l, and 0 otherwise
demand_map_list = []
for loc in hosp_for_cryo:
demand_map_list.append(np.array([hospitals_array[i,2] if i in hosp_for_cryo[loc] else 0 for i in range(len(hospitals_array))]))
demand_map_array = np.stack(demand_map_list, axis=0)
# demand from hospitals in range for each (cryo) location
cryo_demand = np.sum(demand_map_array,axis = 1)
### DEFINE SOLUTION
class Solution:
def __init__(self, manu_list, cryo_list):
self.manu_list = manu_list
self.cryo_list = cryo_list
self.allocation = copy.deepcopy(hospitals_list) # list of hospitals
self.avgwait_time = 0.0 # objective_1
self.total_cost = 0.0 # objective_2
self.uncovered = 1.0 # objective_3
def calc_objectives(self): # default is haversine distance function (in km)
# and 110 km/h speed
if self.avgwait_time == 0:
manu_cover = set()
for manu in self.manu_list:
# add manufacturing constuction cost
self.total_cost += loc_array[manu,3]
# reunion of hospitals within shelf-life of manu locations
manu_cover |= hosp_for_manu[manu]
cryo_cover = set()
for cryo in self.cryo_list:
# add cryo constuction cost
self.total_cost += loc_array[cryo,4]
# reunion of hospitals within shelf-life of cryo locations
cryo_cover |= hosp_for_cryo[cryo]
# remove hospitals covered by manu
cryo_cover -= manu_cover
# total coverage (hospitals covered by manufacturing or cryo)
total_cover = manu_cover | cryo_cover
total_time = 0
covered_demand = 0
# Allocation and objective functions for manu_covered hospitals (MANUFACTURING)
for hosp in manu_cover:
hospital = self.allocation[hosp]
hospital.manu = hospital.preference(self.manu_list)[0] # closest manufacturing
hospital.cost = loc_array[hospital.manu,2] * hospital.demand
self.total_cost += hospital.cost
# travel time is a RETURN ROUTE
hospital.time = haversine(hospital.x,hospital.y,
loc_array[hospital.manu,0],loc_array[hospital.manu,1]) / SPEED * 2
total_time += hospital.time * hospital.demand
covered_demand += hospital.demand
# Allocation and objective functions for cryo_covered hospitals (CRYOPRESERVATION)
for hosp in cryo_cover:
hospital = self.allocation[hosp]
hospital.manu = hospital.preference(self.manu_list)[0] # closest manufacturing
hospital.cryo = hospital.preference(self.cryo_list)[0] # closest cryo
# multiply cost order (per state) * hospital demand
hospital.cost = loc_array[hospital.manu,2] * hospital.demand
self.total_cost += hospital.cost
# travel time is a H->C->M->H ROUTE
hospital.time = (haversine(hospital.x,hospital.y,
loc_array[hospital.cryo,0],loc_array[hospital.cryo,1]) +
haversine(loc_array[hospital.cryo,0],loc_array[hospital.cryo,1],
loc_array[hospital.manu,0],loc_array[hospital.manu,1]) +
haversine(hospital.x,hospital.y,
loc_array[hospital.manu,0],loc_array[hospital.manu,1])) / SPEED
total_time += hospital.time * hospital.demand
covered_demand += hospital.demand
# calculate coverage (objective #3)
self.uncovered = 1 - covered_demand/TOTAL_DEMAND
if self.uncovered < 1:
# demand-weighted avg waiting time (objective #1)
self.avgwait_time = total_time / covered_demand
def __repr__(self):
return '['+str(self.manu_list)+','+str(self.cryo_list)+']'
### PROGRESSIVE COVERAGE WITH CRYO FOR HOSPITALS UNCOVERED BY MANUFACTURING
def best_coverage(total_uncovered_demand):
return np.where(total_uncovered_demand == np.amax(total_uncovered_demand))[0],np.amax(total_uncovered_demand)
def stochastic_coverage(uncovered_demand):
solution = []
# candidate locations with max of uncovered hospitals
best_pos,max_demand = best_coverage(np.sum(uncovered_demand,axis = 1))
while max_demand > 0:
# select total range (covered and uncovered hospitals)
range_candidates = cryo_demand[best_pos]
# select locations with max total coverage
indices = np.where(range_candidates == np.amax(range_candidates))[0]
# randomly select a location between locations with max coverage
max_range_pos = best_pos[indices]
np.random.shuffle(max_range_pos)
cryo_pos = max_range_pos[0]
# append newly added cryo
solution.append(cryo_pos)
# exclude newly covered hospitals from dictionary of uncovered hospitals
exclude = np.where(uncovered_demand[cryo_pos]>0)[0]
uncovered_demand[:,exclude]=0
uncovered_demand[cryo_pos,:]=0
best_pos,max_demand = best_coverage(np.sum(uncovered_demand,axis = 1))
return solution
def append_CryoSol(manu_list):
# for a manu solution the function appends the cryo solutions in increasing
# levels of coverage from base coverage to total coverage
solutions = []
# reunion of hospitals within shelf-life of manu locations
base_cover = set()
for manu in manu_list:
base_cover |= hosp_for_manu[manu] # for each manu, it removes the set of covered hospitals
# Determine uncovered hospitals from the hosp_for_manu dictionary
uncovered_set = set(range(len(hospitals_list))) - base_cover
# create dictionary of uncovered hospitals for each cryo (different from the locations of manu)
uncovered_dict = {cryo: hosp_for_cryo[cryo] & uncovered_set for cryo in hosp_for_cryo}
uncovered_demand = copy.deepcopy(demand_map_array)
uncovered_demand[:,list(base_cover)]=0
uncovered_demand[manu_list,:]=0
full_coverage_cryo = stochastic_coverage(copy.deepcopy(uncovered_demand))
min_cryo_level = 0
if base_cover == set():
base_demand = 0
else:
base_demand = sum(hospitals_array[np.array(list(base_cover)),2])/sum(hospitals_array[:,2])
while base_demand < 1-MAX_UNCOVERED:
added_cryo = full_coverage_cryo[min_cryo_level]
base_cover |= uncovered_dict[added_cryo]
# exclude newly covered hospitals from dictionary of uncovered hospitals
exclude = uncovered_dict[added_cryo].copy()
for pos in uncovered_dict:
uncovered_dict[pos] -= exclude
base_demand = sum(hospitals_array[np.array(list(base_cover)),2])/sum(hospitals_array[:,2])
min_cryo_level += 1
for stage in range(min_cryo_level,len(full_coverage_cryo)+1):
# append solution with different levels of coverage minimum coverage levels to full coverage
cryo_list = full_coverage_cryo[:stage]
solutions.append(Solution(manu_list,cryo_list))
return solutions
### MULTI-OBJECTIVE RS
# create random solution of manufacturing facilities
def createManuSol(manu_Prop = MANU_PROP):
rand_vector = np.random.rand(NR_LOCATIONS)
sample = [i for i,x in enumerate(rand_vector) if x pf_size:
pareto_front,fitness = CrowdingDist(pareto_front,fitness,pf_size)
gen_fitness.append(fitness)
fitness = copy.deepcopy(fitness)
print(i//POPULATION, end = ' ')
new_fit = np.array([sol.avgwait_time,sol.total_cost,sol.uncovered])
dominated = False
for j,fit in enumerate(fitness):
# if new_fit dominates any of the fits in the pareto front, the dominated old fits are discarded
while all(fit>new_fit):
del pareto_front[j]
del fitness[j]
if j sign*sol2)
def Best_front(fitness):
# Use objectives as keys to make python dictionary
map_fit_ind = defaultdict(list)
for i, f_value in enumerate(fitness): # fitness = [(1, 2, 3), (2, 2, 2), (3, 1, 2), (1, 4, 5)...]
map_fit_ind[tuple(f_value)].append(i)
fits = list(map_fit_ind.keys()) # (unique) fitness values
current_front = []
dominating_fits = defaultdict(int) # n (The number of people dominate you)
dominated_fits = defaultdict(list) # Sp (The people you dominate)
# Rank first Pareto front
# *fits* is a iterable list of chromosomes. Each has multiple objectives.
for i, fit_i in enumerate(fits):
for fit_j in fits[i + 1:]:
# Even though equals or empty list, n & Sp won't be affected
if dominates(fit_i, fit_j):
dominating_fits[fit_j] += 1
dominated_fits[fit_i].append(fit_j)
elif dominates(fit_j, fit_i):
dominating_fits[fit_i] += 1
dominated_fits[fit_j].append(fit_i)
if dominating_fits[fit_i] == 0:
current_front.append(fit_i)
front = [] # The first front
for fit in current_front:
front.extend(map_fit_ind[fit])
return front
### RUN
FINAL_GENERATIONS = []
FINAL_GENERATIONS_FIT = []
ALL_HYPERVOLUME = []
max_objectives = np.zeros(3)
for i in range(EVALUATIONS):
print()
print('Evaluation',i)
gen_fitness = []
pareto_front = RandomSearchFLP(GENERATIONS*POPULATION)
FINAL_GENERATIONS.extend(pareto_front)
FINAL_GENERATIONS_FIT.extend(gen_fitness[-1])
ALL_HYPERVOLUME.append(gen_fitness)
eva_max_obj = np.amax(np.array([np.amax(fits,axis = 0) for fits in gen_fitness]),axis = 0)
max_objectives = np.maximum(eva_max_obj, max_objectives)
# Store all fitness values
FITNESS_DF = pd.DataFrame(columns = ['Evaluation','Generation','AvgWaitingTime','TotalCost','%Uncovered'])
for ev in range(EVALUATIONS):
for gen in range(GENERATIONS):
df = pd.DataFrame(ALL_HYPERVOLUME[ev][gen], columns = ['AvgWaitingTime','TotalCost','%Uncovered'])
df.insert(0,'Evaluation',ev)
df.insert(1,'Generation',gen)
FITNESS_DF = pd.concat([FITNESS_DF, df], ignore_index=True)
print(ev, end = ' ')
FITNESS_DF.to_csv('OUTPUT/Hypervolume/ProgCoverage_RS_fitness_'+str(MAX_UNCOVERED)+'.csv',index = False)