Preview

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np
import shutil
import re

# Import the required libraries

# Define the function to extract unique elements from a list
def unique_list(l):
    ulist = []
    [ulist.append(x) for x in l if x not in ulist]
    return ulist

# Define the function for handling multiple selected choices
def multiple_selected_function():
    answers = df[col].values.tolist()  # Make a list of all responses
    
    # Data cleaning to remove NaN values
    answers_no_nan = list()
    for element in answers:
        if str(element) != "nan":
            answers_no_nan.append(element)
    
    # Clean the answers themselves
    answers_no_nan = [x.replace(' ', '_') for x in answers_no_nan]  # Add space with an underscore
    answers_no_nan = [x.replace(',', ' ') for x in answers_no_nan]  # Replace commas with spaces
    answers_no_nan = [x.replace('?', '') for x in answers_no_nan]  # Remove question marks
    answers_no_nan = ' '.join(answers_no_nan)  # Join the list
    answers_no_nan = ' '.join(unique_list(answers_no_nan.split()))  # Remove duplicates
    answer_list = answers_no_nan.split(" ")  # Split the list at spaces
    answer_list = [type.replace('_', ' ') for type in answer_list]  # Replace underscores with spaces
    
    answer_proportion = pd.DataFrame(columns=['answer', 'Total'])  # Create a table for the results
    i = 0
    
    # Iterate over each item in the list of answers
    for item in answer_list:
        number_of_answer = df[col].str.contains(item).sum()  # Count the occurrences of the item
        answer_proportion.loc[i] = [item] + [number_of_answer]  # Add the item and count to the table
        i += 1
    
    answer_proportion = answer_proportion.sort_values(by=['Total'], ascending=True)  # Sort the results
    
    # Create a bar chart
    x_axis = answer_proportion['answer']
    y_axis = answer_proportion['Total']
    plt.figure(figsize=(15, 15))
    plt.xticks(rotation=45)
    grouped_df = sns.barplot(data=answer_proportion, x=x_axis, y=y_axis, color='#9e291c')
    plt.title(col, pad=32, wrap=True)
    sns.set(font_scale=2)
    save_the_bar_chart()
    
    # Create a pie chart
    labels = answer_proportion['answer']
    chart = answer_proportion.plot.pie(y="Total", ylabel='', labels=labels, x=col, xlabel=item, autopct='%1.0f%%', figsize=(15, 7.5), legend=None, colors=CB_color_cycle)
    plt.title(col, pad=32, wrap=True)
    sns.set(font_scale=2)
    save_the_pie_chart()

# Define the function for handling single selected choices
def single_selected_function():
    grouped_df = df.groupby(col)["ID"].count()  # Count the number of responses of each type
    grouped_df = grouped_df.reset_index()  # Reset the index
    grouped_df.columns = [col, 'Number of Results']  # Name the columns
    grouped_df = grouped_df.sort_values(by=['Number of Results'], ascending=True)  # Sort the results
    analysing_publicly_available_data = pd.DataFrame({col: [], "Number of Results": []})
    x_axis = col
    y_axis = 'Number of Results'
    plt.figure(figsize=(15, 7.5))
    grouped_df = sns.barplot(data=grouped_df, x=x_axis, y=y_axis, color='#9e291c')
    plt.title(col, pad=32, wrap=True)
    grouped_df.yaxis.get_major_formatter().set_scientific(False)
    grouped_df.yaxis.get_major_formatter().set_useOffset(False)
    save_the_bar_chart()
    
    grouped_df = df.groupby(col)["ID"].count().plot(kind='pie', ylabel='', autopct='%1.0f%%', figsize=(15, 7.5), colors=CB_color_cycle)
    plt.title(col, pad=32, wrap=True)
    sns.set(font_scale=2)
    save_the_pie_chart()

# Define the function to save the pie chart
def save_the_pie_chart():
    place_to_save = "../charts/image/pie/"  # Set the location to save the file
    file_name = col.replace("-", "\-")  # Remove any possible name challenges
    file_name = col.replace("]", "\]")
    file_name = col.replace("\\", "\\")
    file_name = col.replace("/", "\/")
    file_name = col.replace("^", "\^")
    file_name = col.replace("$", "\$")
    file_name = col.replace("*", "\*")
    file_name = col.replace(".", "\.")
    file_name = col.replace("(", "\(")
    file_name = col.replace(")", "\)")
    file_name = col.replace(" ", "_")
    file_name = col.replace("?", "")
    if "Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. -" in col:
        file_name = col.replace("Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. - ", "")
    plt.savefig(place_to_save+file_name+'.png', dpi=300)  # Save the file
    plt.close()  # Clear the chart
    plt.clf()

# Define the function to save the bar chart
def save_the_bar_chart():
    place_to_save = "../charts/image/bar/"  # Set the location to save the file
    file_name = col.replace("-", "\-")  # Remove any possible name challenges
    file_name = col.replace("]", "\]")
    file_name = col.replace("\\", "\\")
    file_name = col.replace("/", "\/")
    file_name = col.replace("^", "\^")
    file_name = col.replace("$", "\$")
    file_name = col.replace("*", "\*")
    file_name = col.replace(".", "\.")
    file_name = col.replace("(", "\(")
    file_name = col.replace(")", "\)")
    file_name = col.replace(" ", "_")
    file_name = col.replace("?", "")
    if "Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. -" in col:
        file_name = col.replace("Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. - ", "")
    plt.savefig(place_to_save+file_name+'.png', dpi=300)  # Save the file
    plt.close()  # Clear the chart
    plt.clf()

# Set the file names
old_name = r"data/backup/bioarchaeology_reuse.csv"
new_name = r"data/bioarchaeology_reuse.csv"

# Set the root folder
ROOT_DIR =