import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set()
Author: Iza Romanowska
Date: 13/10/2020
Associated data: Dataset_events: Romanowska, Iza 2020 “Historical events at Palmyra, Syria”, version 1.0. Zenodo. Doi: xxxx; Dataset_architecture: Romanowska, Iza 2020 “Major construction projects at Palmyra, Syria”, version 1.0. Zenodo. Doi: 10.5281/zenodo.4669962
Associated publications: Romanowska, I. et al. in press “Reconstructing the social, economic and demographic trends of Palmyra’s elite from funerary data.” Journal of Archaeological Science: xxx.
Bobou, O. et al. 2021 “Historical Trajectories of Palmyra’s elites through the lens of archaeological data.” Journal of Urban Archaeology 4: xxx.
Raja, R. et al. in press “300 years of Palmyrene history through deaths. Unlocking the potential of archaeological and historical data for studying social transformation.”
Description: Data analysis script contrasting the cumulative probability curve derived from Palmyra's funerary data (portraits, burials and tombs) with major historical events in the region and monumental architecture projects in the city.
df = pd.read_csv("SI_A_data/dataset_architecture.csv")
df.head()
df = df.sort_values(by=['date_start_func']).reset_index()
plt.figure(figsize=(14,5), dpi= 80)
plt.hlines(y=df.index, xmin=df.date_start_build, xmax=df.date_start_func, alpha=0.3, linewidth=6)
plt.scatter(df.date_start_func, df.index, alpha=0.6)# color=df.colors, s=[600 if x == 'Fiat X1-9' else 300 for x in df.cars]
data = pd.read_excel('SI_A_data/dataset_funerary.xlsx')
data_test = pd.to_numeric(data.date_start,errors = 'coerce')
#print(data_test)
incorrect_date_start = data.date_start[pd.isnull(data_test)]
#print(incorrect_date_start)
correct_date_start = incorrect_date_start.apply(lambda x: int(x.split('/')[0]))
#print(correct_date_start)
data.loc[correct_date_start.index, "date_start"] = correct_date_start
#print(data.loc[correct_date_start.index])
#data.loc[960:967] # double check
data_test2 = pd.to_numeric(data.date_end,errors = 'coerce')
incorrect_date_end = data.date_end[pd.isnull(data_test2)]
#print(incorrect_date_end)
correct_date_end = incorrect_date_end.apply(lambda x: int(x.split('/')[0]))
#print(correct_date_end)
data.loc[correct_date_end.index, "date_end"] = correct_date_end
#print(data.loc[correct_date_end.index])
#print(data.loc[488]) # double check
#print(data.loc[2507]) # double check
data['date_start'] = pd.to_numeric(data['date_start'], errors = 'raise')
data['date_end'] = pd.to_numeric(data['date_end'], errors = 'raise')
def calc_prob(data, multiplier=1):
# get the oldest date, and the youngest date to calculate the range for the dictionary
minimum = data["date_start"].min()
maximum = data["date_end"].max()
# initiate the dictionary
x = dict.fromkeys(range(minimum, maximum), 0)
data['range'] = (1 / (data['date_end'] - data['date_start'] ))*multiplier
# drop nans because they are problematic with recasting into integers
data = data.dropna(subset=['date_start'])
data = data.dropna(subset=['date_end'])
# for each object
for row in range(len(data)):
# for each year
for year in range(data['date_start'].astype(int).iloc[row], data['date_end'].astype(int).iloc[row]):
# A few lines for testing values
# print("------", year)
# print(data['date_start'].astype(int).iloc[row])
# print(data['date_end'].astype(int).iloc[row])
# print(data['range'].iloc[row])
# # update that year with the probability of that site
x[year] += data['range'].iloc[row]
# print(x[year])
# recast it into a useful data structure
s = pd.Series(x, name='Probability')
s.index.name = 'Year'
s.reset_index()
return s
probs = calc_prob(data)
plt.figure(figsize = (15,7))
sns.set()
g = sns.lineplot(data = probs, color ='#3182bd')
plt.vlines(x=df.date_start_build, ymin=0, ymax=50, alpha=0.13, linewidth=10)
plt.scatter(df.date_start_build, df.index - df.index, alpha=0.6)
g.set_title('Probability distribution of all objects against the start of the construction date')
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.tight_layout()
#probs = calc_prob(data)
plt.figure(figsize = (15,7))
sns.set()
g = sns.lineplot(data = probs, color ='#3182bd')
plt.vlines(x=df.date_start_build,
ymin=0, ymax=50,
alpha=0.05,
linewidth=20,
color = "#C70039")
plt.vlines(x=df.date_start_func,
ymin=0, ymax=50,
alpha=0.05,
linewidth=20,
color = "#FF5733")
plt.scatter(df.date_start_build,
df.index - df.index-.5, #put the dot just under the 0 line
alpha=0.6,
color = "#C70039")
plt.scatter(df.date_start_func,
df.index - df.index+.5, #put the dot just over the 0 line
alpha=0.6,
color = "#FF5733")
g.set_title('Probability distribution of all objects againts both start of the construction and opening')
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.tight_layout()
#probs = calc_prob(data)
fig, ax1 = plt.subplots(figsize = (15,7))
ax1 = sns.lineplot(data = probs, color ='#3182bd')
ax2 = ax1.twinx()
ax2 = sns.distplot(df.date_start_func , bins = 50, kde_kws={'linewidth':0})
ax2 = sns.distplot(df.date_start_build , bins = 50, kde_kws={'linewidth':0})
ax1.set_title('Probability distribution of all objects againts both start of the construction and opening')
plt.ylim(0, 0.1)
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.grid(None)
plt.tight_layout()
hist = pd.read_csv("SI_A_data/dataset_events.csv")
hist.columns
hist.head()
# We calculate the mid value to better place the labels
hist["color"] = hist.impact.map({0:"#006dc7", 1:"#FF5733"})
hist["date_mid"] = hist.date_start + (hist.date_end - hist.date_start) / 2
#hist.tail(30)
Note this is figure no. 4 in Bobou et al. 2021.
sns.set(style="whitegrid", font_scale=2.5)
plt.figure(figsize = (30,14))
plt.xlim(0, 300)
plt.ylim(-2, 50)
g = sns.lineplot(data = probs, color ='#3182bd')
for i in range(len(hist)):
plt.axvspan(hist.date_start[i], hist.date_end[i],facecolor=hist.color[i], alpha=0.13)
if i == 7:
plt.annotate(hist.event[i], (hist.date_mid[i]-1,17), rotation = 90, color = hist.color[i], fontsize = 24)
else:
plt.annotate(hist.event[i], (hist.date_mid[i]-1,2), rotation = 90, color = hist.color[i], fontsize = 24)
plt.vlines(x=df.date_start_build,
ymin=0, ymax=50,
alpha=0.05,
linewidth=20,
color = "#FF5733")
plt.vlines(x=df.date_start_func,
ymin=0, ymax=50,
alpha=0.05,
linewidth=20,
color = "#FF5733")
plt.scatter(df.date_start_build,
df.index - df.index-.5,
alpha=0.6,
color = "#C70039",
label = "building activity")
plt.scatter(df.date_start_func,
df.index - df.index-1.5,
alpha=0.6,
color = "#FF5733",
label = "building opening")
plt.legend()
#g.set_title('Correlation with historical events')
plt.ylabel("Cumulative frequency of all objects")
plt.xlabel('Year')
plt.tight_layout()
plt.savefig("figures/Bobou_hist_correlates.png", dpi = 300)
This is figure 8 in Romanowska et al. 2021
hist["color"] = hist.impact.map({0:"#3182bd", 1:"#FF5733"})
hist["label"] = hist.impact.map({0:"negative impact", 1:"positive impact"})
sns.set(font_scale=1.1)
plt.figure(figsize = (15,8))
plt.xlim(0, 300)
plt.ylim(0, 50)
g = sns.lineplot(data = probs, color ='#3182bd', linewidth=2)
for i in range(len(hist)):
plt.axvspan(hist.date_start[i], hist.date_end[i],facecolor=hist.color[i], alpha=0.1)
plt.vlines(x=df.date_start_build,
ymin=0, ymax=50,
alpha=0.1,
linewidth=20,
color = "#FF5733")
plt.vlines(x=df.date_start_func,
ymin=0, ymax=50,
alpha=0.1,
linewidth=20,
color = "#FF5733")
plt.scatter(df.date_start_build,
df.index - df.index+.5,
alpha=0.6,
color = "#C70039",
label = "construction start")
plt.scatter(df.date_start_func,
df.index - df.index+1.2,
alpha=0.6,
color = "#FF5733",
label = "building opening")
plt.legend()
plt.xlabel('Year')
plt.tight_layout()
plt.savefig("figures/Rom_all_correlates_small.png", dpi = 300)