import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import os import numpy as np import shutil import re # Import the required libraries # Define the function to extract unique elements from a list def unique_list(l): ulist = [] [ulist.append(x) for x in l if x not in ulist] return ulist # Define the function for handling multiple selected choices def multiple_selected_function(): answers = df[col].values.tolist() # Make a list of all responses # Data cleaning to remove NaN values answers_no_nan = list() for element in answers: if str(element) != "nan": answers_no_nan.append(element) # Clean the answers themselves answers_no_nan = [x.replace(' ', '_') for x in answers_no_nan] # Add space with an underscore answers_no_nan = [x.replace(',', ' ') for x in answers_no_nan] # Replace commas with spaces answers_no_nan = [x.replace('?', '') for x in answers_no_nan] # Remove question marks answers_no_nan = ' '.join(answers_no_nan) # Join the list answers_no_nan = ' '.join(unique_list(answers_no_nan.split())) # Remove duplicates answer_list = answers_no_nan.split(" ") # Split the list at spaces answer_list = [type.replace('_', ' ') for type in answer_list] # Replace underscores with spaces answer_proportion = pd.DataFrame(columns=['answer', 'Total']) # Create a table for the results i = 0 # Iterate over each item in the list of answers for item in answer_list: number_of_answer = df[col].str.contains(item).sum() # Count the occurrences of the item answer_proportion.loc[i] = [item] + [number_of_answer] # Add the item and count to the table i += 1 answer_proportion = answer_proportion.sort_values(by=['Total'], ascending=True) # Sort the results # Create a bar chart x_axis = answer_proportion['answer'] y_axis = answer_proportion['Total'] plt.figure(figsize=(15, 15)) plt.xticks(rotation=45) grouped_df = sns.barplot(data=answer_proportion, x=x_axis, y=y_axis, color='#9e291c') plt.title(col, pad=32, wrap=True) sns.set(font_scale=2) save_the_bar_chart() # Create a pie chart labels = answer_proportion['answer'] chart = answer_proportion.plot.pie(y="Total", ylabel='', labels=labels, x=col, xlabel=item, autopct='%1.0f%%', figsize=(15, 7.5), legend=None, colors=CB_color_cycle) plt.title(col, pad=32, wrap=True) sns.set(font_scale=2) save_the_pie_chart() # Define the function for handling single selected choices def single_selected_function(): grouped_df = df.groupby(col)["ID"].count() # Count the number of responses of each type grouped_df = grouped_df.reset_index() # Reset the index grouped_df.columns = [col, 'Number of Results'] # Name the columns grouped_df = grouped_df.sort_values(by=['Number of Results'], ascending=True) # Sort the results analysing_publicly_available_data = pd.DataFrame({col: [], "Number of Results": []}) x_axis = col y_axis = 'Number of Results' plt.figure(figsize=(15, 7.5)) grouped_df = sns.barplot(data=grouped_df, x=x_axis, y=y_axis, color='#9e291c') plt.title(col, pad=32, wrap=True) grouped_df.yaxis.get_major_formatter().set_scientific(False) grouped_df.yaxis.get_major_formatter().set_useOffset(False) save_the_bar_chart() grouped_df = df.groupby(col)["ID"].count().plot(kind='pie', ylabel='', autopct='%1.0f%%', figsize=(15, 7.5), colors=CB_color_cycle) plt.title(col, pad=32, wrap=True) sns.set(font_scale=2) save_the_pie_chart() # Define the function to save the pie chart def save_the_pie_chart(): place_to_save = "../charts/image/pie/" # Set the location to save the file file_name = col.replace("-", "\-") # Remove any possible name challenges file_name = col.replace("]", "\]") file_name = col.replace("\\", "\\") file_name = col.replace("/", "\/") file_name = col.replace("^", "\^") file_name = col.replace("$", "\$") file_name = col.replace("*", "\*") file_name = col.replace(".", "\.") file_name = col.replace("(", "\(") file_name = col.replace(")", "\)") file_name = col.replace(" ", "_") file_name = col.replace("?", "") if "Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. -" in col: file_name = col.replace("Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. - ", "") plt.savefig(place_to_save+file_name+'.png', dpi=300) # Save the file plt.close() # Clear the chart plt.clf() # Define the function to save the bar chart def save_the_bar_chart(): place_to_save = "../charts/image/bar/" # Set the location to save the file file_name = col.replace("-", "\-") # Remove any possible name challenges file_name = col.replace("]", "\]") file_name = col.replace("\\", "\\") file_name = col.replace("/", "\/") file_name = col.replace("^", "\^") file_name = col.replace("$", "\$") file_name = col.replace("*", "\*") file_name = col.replace(".", "\.") file_name = col.replace("(", "\(") file_name = col.replace(")", "\)") file_name = col.replace(" ", "_") file_name = col.replace("?", "") if "Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. -" in col: file_name = col.replace("Rank the following in descending order of importance when analysing publicly available data. To rank, please click and drag each item to the desired order. - ", "") plt.savefig(place_to_save+file_name+'.png', dpi=300) # Save the file plt.close() # Clear the chart plt.clf() # Set the file names old_name = r"data/backup/bioarchaeology_reuse.csv" new_name = r"data/bioarchaeology_reuse.csv" # Set the root folder ROOT_DIR =