In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
sns.set()

Palmyra's timeline: historical events and construction projects

Author: Iza Romanowska

Date: 13/10/2020

Associated data: Dataset_events: Romanowska, Iza 2020 “Historical events at Palmyra, Syria”, version 1.0. Zenodo. Doi: xxxx; Dataset_architecture: Romanowska, Iza 2020 “Major construction projects at Palmyra, Syria”, version 1.0. Zenodo. Doi: 10.5281/zenodo.4669962

Associated publications: Romanowska, I. et al. in press “Reconstructing the social, economic and demographic trends of Palmyra’s elite from funerary data.” Journal of Archaeological Science: xxx.

Bobou, O. et al. 2021 “Historical Trajectories of Palmyra’s elites through the lens of archaeological data.” Journal of Urban Archaeology 4: xxx.

Raja, R. et al. in press “300 years of Palmyrene history through deaths. Unlocking the potential of archaeological and historical data for studying social transformation.”

Description: Data analysis script contrasting the cumulative probability curve derived from Palmyra's funerary data (portraits, burials and tombs) with major historical events in the region and monumental architecture projects in the city.

In [2]:
df = pd.read_csv("SI_A_data/dataset_architecture.csv")
In [3]:
df.head()
Out[3]:
name date_start_build date_start_func date_end_func date_start1 function ref notes Unnamed: 8 Unnamed: 9
0 Temple of Bel 19 32 175.0 NaN religion Smith 2013, 60-62 NaN NaN NaN
1 Temple of Bel portico 75 100 NaN NaN religion NaN NaN NaN NaN
2 Temple of Allat early -50 -50 NaN NaN religion Gawlikowski 2019 NaN NaN NaN
3 Temple of Allat second 140 150 NaN NaN religion Kaizer 2019 NaN NaN NaN
4 Altar of Allat "the lady of the house" 115 115 NaN NaN religion Kaizer 2019 NaN NaN NaN

Data overview: monumental architecture

In [4]:
df = df.sort_values(by=['date_start_func']).reset_index()
In [5]:
plt.figure(figsize=(14,5), dpi= 80)
plt.hlines(y=df.index, xmin=df.date_start_build, xmax=df.date_start_func,  alpha=0.3, linewidth=6)
plt.scatter(df.date_start_func, df.index, alpha=0.6)#  color=df.colors, s=[600 if x == 'Fiat X1-9' else 300 for x in df.cars]
Out[5]:
<matplotlib.collections.PathCollection at 0x21406813df0>

Add the main dataset

In [6]:
data = pd.read_excel('SI_A_data/dataset_funerary.xlsx')
In [7]:
data_test = pd.to_numeric(data.date_start,errors = 'coerce')
#print(data_test)
incorrect_date_start = data.date_start[pd.isnull(data_test)]
#print(incorrect_date_start)
correct_date_start = incorrect_date_start.apply(lambda x: int(x.split('/')[0]))
#print(correct_date_start)
data.loc[correct_date_start.index, "date_start"] = correct_date_start
#print(data.loc[correct_date_start.index])
#data.loc[960:967] # double check

data_test2 = pd.to_numeric(data.date_end,errors = 'coerce')
incorrect_date_end = data.date_end[pd.isnull(data_test2)]
#print(incorrect_date_end)
correct_date_end = incorrect_date_end.apply(lambda x: int(x.split('/')[0]))
#print(correct_date_end)
data.loc[correct_date_end.index, "date_end"] = correct_date_end
#print(data.loc[correct_date_end.index])
#print(data.loc[488]) # double check
#print(data.loc[2507]) # double check

data['date_start'] = pd.to_numeric(data['date_start'], errors = 'raise')
data['date_end'] = pd.to_numeric(data['date_end'], errors = 'raise')
In [8]:
def calc_prob(data, multiplier=1):
#    get the oldest date, and the youngest date to calculate the range for the dictionary

    minimum = data["date_start"].min()
    maximum = data["date_end"].max()
#    initiate the dictionary
    x = dict.fromkeys(range(minimum, maximum), 0)   
    

    data['range'] =  (1 / (data['date_end'] - data['date_start'] ))*multiplier

    
#    drop nans because they are problematic with recasting into integers
    data = data.dropna(subset=['date_start'])
    data = data.dropna(subset=['date_end'])
    
#   for each object
    for row in range(len(data)):
#        for each year
        for year in range(data['date_start'].astype(int).iloc[row], data['date_end'].astype(int).iloc[row]):
          #  A few lines for testing values
          #  print("------", year)
          #  print(data['date_start'].astype(int).iloc[row])
          #  print(data['date_end'].astype(int).iloc[row])
          #  print(data['range'].iloc[row])
#         #  update that year with the probability of that site
            x[year] += data['range'].iloc[row]
           # print(x[year])
    
#   recast it into a useful data structure
    s = pd.Series(x, name='Probability')
    s.index.name = 'Year'
    s.reset_index()

    return s

Different visualisations

In [9]:
probs = calc_prob(data)
plt.figure(figsize = (15,7))
sns.set()
g = sns.lineplot(data = probs, color ='#3182bd')
plt.vlines(x=df.date_start_build, ymin=0, ymax=50,  alpha=0.13, linewidth=10)
plt.scatter(df.date_start_build, df.index - df.index, alpha=0.6)

g.set_title('Probability distribution of all objects against the start of the construction date')
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.tight_layout()
In [10]:
#probs = calc_prob(data)
plt.figure(figsize = (15,7))
sns.set()
g = sns.lineplot(data = probs, color ='#3182bd')
plt.vlines(x=df.date_start_build, 
           ymin=0, ymax=50,  
           alpha=0.05, 
           linewidth=20, 
           color = "#C70039")
plt.vlines(x=df.date_start_func, 
           ymin=0, ymax=50, 
           alpha=0.05, 
           linewidth=20, 
           color =  "#FF5733")

plt.scatter(df.date_start_build, 
            df.index - df.index-.5,  #put the dot just under the 0 line
            alpha=0.6, 
            color = "#C70039")
plt.scatter(df.date_start_func, 
            df.index - df.index+.5, #put the dot just over the 0 line
            alpha=0.6, 
            color =  "#FF5733")

g.set_title('Probability distribution of all objects againts both start of the construction and opening')
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.tight_layout()
In [11]:
#probs = calc_prob(data)
fig, ax1 = plt.subplots(figsize = (15,7))

ax1 = sns.lineplot(data = probs, color ='#3182bd')

ax2 = ax1.twinx() 
ax2 = sns.distplot(df.date_start_func , bins = 50, kde_kws={'linewidth':0})
ax2 = sns.distplot(df.date_start_build , bins = 50, kde_kws={'linewidth':0})

ax1.set_title('Probability distribution of all objects againts both start of the construction and opening')
plt.ylim(0, 0.1)
plt.xlim(-50, 300)
plt.xlabel('Year')
plt.grid(None)
plt.tight_layout()

Add historical events

In [12]:
hist = pd.read_csv("SI_A_data/dataset_events.csv")
In [13]:
hist.columns
hist.head()
Out[13]:
event date_start date_end impact
0 Antonine Plague 166 180 0
1 Cyprian Plague 250 270 0
2 Caravan related inscriptions 130 160 1
3 Nabatean Kingdom defeated 106 107 1
4 Hadrian visit 130 131 1
In [14]:
# We calculate the mid value to better place the labels
hist["color"] = hist.impact.map({0:"#006dc7", 1:"#FF5733"})
hist["date_mid"] = hist.date_start + (hist.date_end - hist.date_start) / 2
#hist.tail(30)

A full picture: funerary data + historical events + construction projects

Note this is figure no. 4 in Bobou et al. 2021.

In [15]:
sns.set(style="whitegrid", font_scale=2.5)
plt.figure(figsize = (30,14))
plt.xlim(0, 300)
plt.ylim(-2, 50)
g = sns.lineplot(data = probs, color ='#3182bd')

for i in range(len(hist)):
    plt.axvspan(hist.date_start[i], hist.date_end[i],facecolor=hist.color[i], alpha=0.13)
    if i == 7:
        plt.annotate(hist.event[i], (hist.date_mid[i]-1,17), rotation = 90, color = hist.color[i], fontsize = 24)

    else:
        plt.annotate(hist.event[i], (hist.date_mid[i]-1,2), rotation = 90, color = hist.color[i], fontsize = 24)

plt.vlines(x=df.date_start_build, 
           ymin=0, ymax=50,  
           alpha=0.05, 
           linewidth=20, 
           color = "#FF5733")
plt.vlines(x=df.date_start_func, 
           ymin=0, ymax=50, 
           alpha=0.05, 
           linewidth=20, 
           color =  "#FF5733")

plt.scatter(df.date_start_build, 
            df.index - df.index-.5, 
            alpha=0.6, 
            color = "#C70039",
            label = "building activity")
plt.scatter(df.date_start_func, 
            df.index - df.index-1.5, 
            alpha=0.6, 
            color =  "#FF5733",
            label = "building opening")
plt.legend()
#g.set_title('Correlation with historical events')
plt.ylabel("Cumulative frequency of all objects")
plt.xlabel('Year')
plt.tight_layout()
plt.savefig("figures/Bobou_hist_correlates.png", dpi = 300)

This is figure 8 in Romanowska et al. 2021

In [16]:
hist["color"] = hist.impact.map({0:"#3182bd", 1:"#FF5733"}) 
hist["label"] = hist.impact.map({0:"negative impact", 1:"positive impact"})  
In [17]:
sns.set(font_scale=1.1)
plt.figure(figsize = (15,8))
plt.xlim(0, 300)
plt.ylim(0, 50)
g = sns.lineplot(data = probs, color ='#3182bd', linewidth=2)

for i in range(len(hist)):
    plt.axvspan(hist.date_start[i], hist.date_end[i],facecolor=hist.color[i], alpha=0.1)

plt.vlines(x=df.date_start_build, 
           ymin=0, ymax=50,  
           alpha=0.1, 
           linewidth=20, 
           color = "#FF5733")
plt.vlines(x=df.date_start_func, 
           ymin=0, ymax=50, 
           alpha=0.1, 
           linewidth=20, 
           color =  "#FF5733")

plt.scatter(df.date_start_build, 
            df.index - df.index+.5, 
            alpha=0.6, 
            color = "#C70039",
            label = "construction start")
plt.scatter(df.date_start_func, 
            df.index - df.index+1.2, 
            alpha=0.6, 
            color =  "#FF5733",
            label = "building opening")
plt.legend()

plt.xlabel('Year')
plt.tight_layout()
plt.savefig("figures/Rom_all_correlates_small.png", dpi = 300)
In [ ]: