"""
This file sets up the health system resources for each district, each region, and also national level.

It defines 7 levels for facility types, i.e., Facility_Levels = [0,1a,1b,2,3,4,5].

It creates one facility of each level for each district.

It allocates health care workers ('officers') to one of the seven Facility Levels.

The tables generated is listed below:
- capability tables to repo
  Scenario 'actual' -> ResourceFile_Daily_Capabilities (./resources/healthsystem/human_resources/actual/)
  Scenario 'funded' -> ResourceFile_Daily_Capabilities (./resources/healthsystem/human_resources/funded/)
  Scenario 'funded_plus' -> ResourceFile_Daily_Capabilities (./resources/healthsystem/human_resources/funded_plus/)

- definition tables to repo
  ResourceFile_Appt_Time_Table (./resources/healthsystem/human_resources/definitions/)
  ResourceFile_Appt_Types_Table (./resources/healthsystem/human_resources/definitions/)
  ResourceFile_ApptType_By_FacLevel_Table (./resources/healthsystem/human_resources/definitions/)
  ResourceFile_Officers_Types_Table (./resources/healthsystem/human_resources/definitions/)

- organisation tables to repo
  ResourceFile_Master_Facilities_List_Table (./resources/healthsystem/human_resources/organisation/)

- other tables that can be generated by this file
  Scenario 'actual' -> ResourceFile_Staff_Table
  Scenario 'funded' -> ResourceFile_Staff_Table
  Scenario 'funded_plus' -> ResourceFile_Staff_Table
  Scenario 'actual' -> ResourceFile_Staff_Distribution_Assumption
  Scenario 'funded' -> ResourceFile_Staff_Distribution_Assumption
  ResourceFile_Staff_Distribution_Compare
  ResourceFile_Patient_Facing_Time
  ResourceFile_District_Population_Data
  ResourceFile_Facilities_For_Each_District
  Scenario 'actual' -> appt_have_or_miss_capability
  Scenario 'funded' -> appt_have_or_miss_capability
  Scenario 'funded_plus' -> appt_have_or_miss_capability
"""

from pathlib import Path

import numpy as np
import pandas as pd

resourcefilepath = Path('./resources')

path_to_dropbox = Path(
    '/Users/jdbb1/Dropbox/Thanzi La Onse')  # <-- point to the TLO dropbox locally

workingfile = (path_to_dropbox /
               '05 - Resources' / 'Module-healthsystem' / 'chai ehp resource use data' / 'ORIGINAL' /
               'Malawi optimization model import_2022-02-11.xlsx')

working_file_old = (path_to_dropbox /
                    '05 - Resources' / 'Module-healthsystem' / 'chai ehp resource use data' / 'ORIGINAL' /
                    'Optimization model import_Malawi_20180315 v10.xlsx')

path_to_auxiliaryfiles = (path_to_dropbox /
                          '05 - Resources' /
                          'Module-healthsystem' /
                          'chai ehp resource use data' /
                          'Auxiliary CHAI Data from CHAI HR Team 12 Sep 2021')

outputlocation = resourcefilepath / 'healthsystem'

# ---------------------------------------------------------------------------------------------------------------------
# *** create and save population_by_district data
population = pd.read_csv(
    resourcefilepath / 'demography' / 'ResourceFile_PopulationSize_2018Census.csv'
)

pop_by_district = pd.DataFrame(population.groupby('District')['Count'].sum())

# Add the column of Region
for d in pop_by_district.index:
    pop_by_district.loc[d, 'Region'] = population.loc[population['District'] == d, 'Region'].values[0]

# Save
# pop_by_district.to_csv(outputlocation / 'organisation' / 'ResourceFile_District_Population_Data.csv', index=True)

# ---------------------------------------------------------------------------------------------------------------------
# *** Below we generate staffing tables: fund_staffing_table for established staff, and\
# curr_staffing_table for current staff
# Before generating the tables, we need to prepare wb_import, officer_types_table, and\
# make assumptions of curr_staff_return distribution and fund_staff_return distribution using Auxiliary CHAI Data

# --- wb_import for staff information

# Import all of the 'Staff' sheet, including both data of current and funded staff
wb_import = pd.read_excel(workingfile, sheet_name='Staff', header=None)

# --- officer_types_table
# Make dataframe summarising the officer types and the officer codes:
officer_types_table = wb_import.loc[2:3, 64:84].transpose().reset_index(drop=True).copy()
officer_types_table.columns = ['Officer_Type', 'Officer_Type_Code']

# Add the categories of officers
officer_types_table.loc[0:2, 'Officer_Category'] = 'Clinical'
officer_types_table.loc[3:4, 'Officer_Category'] = 'Nursing_and_Midwifery'
officer_types_table.loc[5:7, 'Officer_Category'] = 'Pharmacy'
officer_types_table.loc[8:10, 'Officer_Category'] = 'Laboratory'
officer_types_table.loc[11, 'Officer_Category'] = 'DCSA'
officer_types_table.loc[12:14, 'Officer_Category'] = 'Dental'
officer_types_table.loc[15, 'Officer_Category'] = 'Mental'
officer_types_table.loc[16, 'Officer_Category'] = 'Nutrition'
officer_types_table.loc[17:20, 'Officer_Category'] = 'Radiography'

# Save
officer_types_table.to_csv(outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_Officer_Types_Table.csv',
                           index=False)

# --- Generate assumptions of current staff distribution at facility levels 0&1a&1b&2
# Read compiled staff return data from CHAI auxiliary datasets
compiled_staff_return = pd.read_excel(path_to_auxiliaryfiles / 'Compiled Staff Returns.xlsx',
                                      sheet_name='Compiled Staff Returns', skiprows=range(5))

# Get relevant columns
curr_staff_return = compiled_staff_return[['District / Central Hospital', 'MOH/ CHAM', 'Name of Incumbent', 'Cadre',
                                           'Health Facility', 'Health Facility Type']].copy()

# Drop rows with missing elements
curr_staff_return.dropna(inplace=True)

# Drop rows that associate to '_NOT INCLUDED' and '_MISSING'
curr_staff_return.drop(curr_staff_return[curr_staff_return['Cadre'] == '_NOT INCLUDED'].index, inplace=True)
curr_staff_return.drop(curr_staff_return[curr_staff_return['Cadre'] == '_MISSING'].index, inplace=True)

# Drop rows that associate to 'Home Craft Worker' and 'Educ/Environ Health Officer',
# as these cadres are not included in 'Time_Base' and 'PFT'.
curr_staff_return.drop(curr_staff_return[curr_staff_return['Cadre'] == 'Home Craft Worker'].index, inplace=True)
curr_staff_return.drop(curr_staff_return[curr_staff_return['Cadre'] == 'Educ/Environ Health Officer'].index,
                       inplace=True)

# Replace 'HSA' by 'DCSA', 'Nutrition Officer' by 'Nutrition Staff',
# 'Pharmacy Technician' by 'Pharm Technician', 'Pharmacy Assistant' by 'Pharm Assistant',
# to be consistent with officer_types_table
idx_hsa = curr_staff_return[curr_staff_return['Cadre'] == 'HSA'].index
curr_staff_return.loc[idx_hsa, 'Cadre'] = 'DCSA'

idx_nutri = curr_staff_return[curr_staff_return['Cadre'] == 'Nutrition Officer'].index
curr_staff_return.loc[idx_nutri, 'Cadre'] = 'Nutrition Staff'

idx_pt = curr_staff_return[curr_staff_return['Cadre'] == 'Pharmacy Technician'].index
curr_staff_return.loc[idx_pt, 'Cadre'] = 'Pharm Technician'

idx_pa = curr_staff_return[curr_staff_return['Cadre'] == 'Pharmacy Assistant'].index
curr_staff_return.loc[idx_pa, 'Cadre'] = 'Pharm Assistant'

# Replace health facility type "Karonga Hospital" to "District Hospital"
idx_Karonga = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Karonga Hospital'].index
curr_staff_return.loc[idx_Karonga, 'Health Facility Type'] = 'District Hospital'

# Reassign the facility type of Zomba Mental Hospital as 'Zomba Mental Hospital', instead of 'Central Hospital',
# to differentiate it with other central hospitals
idx_ZMH = curr_staff_return[curr_staff_return['Health Facility'] == 'Zomba Mental Hospital'].index
curr_staff_return.loc[idx_ZMH, 'Health Facility Type'] = 'Zomba Mental Hospital'

# Add a column 'Staff_Count' to denote the no. of staff
curr_staff_return['Staff_Count'] = 1

# Reset index
curr_staff_return.reset_index(drop=True, inplace=True)

# Important definition: Facility_Levels = [0, 1a, 1b, 2, 3, 4, 5]
# 0: Community/Local level - HP, Village Health Committee, Community initiatives
# 1a: Primary level - Dispensary, HC, Clinic, Maternity facility
# 1b: Primary level - Community/Rural Hospital, CHAM (Community) Hospitals
# 2: Second level - District hospital, DHO
# 3: Tertiary/Referral level - KCH, MCH, ZCH + QECH as referral hospitals
# 4: Zomba Mental Hospital, which has very limited data in CHAI dataset
# 5: Headquarter, which has staff data (but no Time_Base or Incidence_Curr data)

# Get the Health Facility Type list and Cadre list
# Note three cadres of 'R04 Radiotherapy Technician', 'R03 Sonographer', 'D03 Dental Assistant' have no data
# in CHAI current and funded staff sheet and complied staff return dataset.
fac_types_list = pd.unique(curr_staff_return['Health Facility Type'])  # Level_0 Facs and Headquarter not included
cadre_list = pd.unique(curr_staff_return['Cadre'])  # Radiotherapy Technician/Sonographer/Dental Assistant not included

# Add column 'Facility_Level'; HQ not listed in compiled staff return table
idx_urbhc = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Urban Health Center'].index
curr_staff_return.loc[idx_urbhc, 'Facility_Level'] = 'Facility_Level_1a'  # Including CHAM HCs

idx_rurhc = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Rural Health Center'].index
curr_staff_return.loc[idx_rurhc, 'Facility_Level'] = 'Facility_Level_1a'  # Including CHAM HCs

idx_comhos = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Community Hospital'].index
curr_staff_return.loc[idx_comhos, 'Facility_Level'] = 'Facility_Level_1b'  # Including CHAM community hospitals

idx_dishos = curr_staff_return[curr_staff_return['Health Facility Type'] == 'District Hospital'].index
curr_staff_return.loc[idx_dishos, 'Facility_Level'] = 'Facility_Level_2'

idx_cenhos = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Central Hospital'].index
curr_staff_return.loc[idx_cenhos, 'Facility_Level'] = 'Facility_Level_3'

idx_zmhfac = curr_staff_return[curr_staff_return['Health Facility Type'] == 'Zomba Mental Hospital'].index
curr_staff_return.loc[idx_zmhfac, 'Facility_Level'] = 'Facility_Level_4'

# Add column 'Cadre_Code'
for c in cadre_list:
    curr_staff_return.loc[curr_staff_return['Cadre'] == c, 'Cadre_Code'] = officer_types_table.loc[
        officer_types_table['Officer_Type'] == c, 'Officer_Type_Code'].copy().values[0]

# Check no blanks in this table
assert not pd.isnull(curr_staff_return).any().any()

# curr_staff_return ready!

# Get curr_staff_return distribution among levels 0, 1a, 1b and 2, i.e., staff distribution within a district
# Specifically, only and all DCSAs/HSAs are to be allocated at level 0;
# Other cadres are to be allocated at level 1a and above.

curr_staff_district = curr_staff_return[['Facility_Level', 'Cadre_Code', 'Staff_Count']].copy()

# Group staff by facility level
curr_staff_distribution = pd.DataFrame(
    curr_staff_district.groupby(by=['Cadre_Code', 'Facility_Level'], sort=False).sum())
curr_staff_distribution.sort_index(level=[0, 1], inplace=True)
curr_staff_distribution.reset_index(drop=False, inplace=True)

# Make the curr_staff_distribution includes all cadres and facility levels (0,1a,1b,2,3,4) as index and columns
cadre_faclevel = pd.DataFrame(columns=['Cadre_Code', 'Facility_Level_0', 'Facility_Level_1a',
                                       'Facility_Level_1b', 'Facility_Level_2', 'Facility_Level_3',
                                       'Facility_Level_4'])
cadre_faclevel['Cadre_Code'] = officer_types_table['Officer_Type_Code']
cadre_faclevel = pd.melt(cadre_faclevel, id_vars='Cadre_Code', value_vars=cadre_faclevel.columns[1:],
                         var_name='Facility_Level')
# Merge
curr_staff_distribution = curr_staff_distribution.merge(cadre_faclevel, how='right')
# Fill null with 0
curr_staff_distribution.fillna(0, inplace=True)
# Sort
curr_staff_distribution.set_index(['Cadre_Code', 'Facility_Level'], inplace=True)
curr_staff_distribution.sort_index(level=[0, 1], inplace=True)
curr_staff_distribution.reset_index(drop=False, inplace=True)
curr_staff_distribution.drop(['value'], axis=1, inplace=True)

# Save the the complete current staff distribution table
# curr_staff_distribution_complete = curr_staff_distribution.copy()

# Keep and focus on rows of levels 0, 1a, 1b, and 2
idx_keep = curr_staff_distribution[(curr_staff_distribution['Facility_Level'] == 'Facility_Level_0') |
                                   (curr_staff_distribution['Facility_Level'] == 'Facility_Level_1a') |
                                   (curr_staff_distribution['Facility_Level'] == 'Facility_Level_1b') |
                                   (curr_staff_distribution['Facility_Level'] == 'Facility_Level_2')].index
curr_staff_distribution = curr_staff_distribution.loc[idx_keep, :].copy()
curr_staff_distribution.reset_index(drop=True, inplace=True)

# Add column 'Proportion', denoting the percents of staff per cadre between level 0, level_1a, level_1b, and level_2
for i in range(21):
    # Proportion; Cadres except DCSA are allocated at level 1a and above
    if curr_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Staff_Count'].sum() > 0:  # sum of 4i+1,4i+2,4i+3

        curr_staff_distribution.loc[4 * i + 1, 'Proportion'] = (
            curr_staff_distribution.loc[4 * i + 1, 'Staff_Count'] /
            curr_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Staff_Count'].sum()
        )

        curr_staff_distribution.loc[4 * i + 2, 'Proportion'] = (
            curr_staff_distribution.loc[4 * i + 2, 'Staff_Count'] /
            curr_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Staff_Count'].sum()
        )

        curr_staff_distribution.loc[4 * i + 3, 'Proportion'] = (
            curr_staff_distribution.loc[4 * i + 3, 'Staff_Count'] /
            curr_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Staff_Count'].sum()
        )

# fillna
curr_staff_distribution.fillna(0, inplace=True)

# For DCSA individually, reassign their proportions since we assume all DCSAs are located at level 0
idx_dcsa = curr_staff_distribution[curr_staff_distribution['Cadre_Code'] == 'E01'].index
curr_staff_distribution.loc[idx_dcsa[0], 'Proportion'] = 1.00
curr_staff_distribution.loc[idx_dcsa[1:4], 'Proportion'] = 0.00
# Alternatively, DCSAs 50% at level 0 and 50% at level 1a?

# curr_staff_distribution ready!

# Save
# curr_staff_distribution.to_csv(
#     outputlocation / 'human_resources' / 'actual' / 'ResourceFile_Staff_Distribution_Assumption.csv',
#     index=False)

# --- Generate assumptions of established/funded staff distribution at facility levels 0&1a&1b&2
# Read 2018-03-09 Facility-level establishment MOH & CHAM from CHAI auxiliary datasets
fund_staff_2018_raw = pd.read_excel(path_to_auxiliaryfiles / '2018-03-09 Facility-level establishment MOH & CHAM.xlsx',
                                    sheet_name='Establishment listing')

# Get relevant columns
fund_staff_2018 = fund_staff_2018_raw[['Number of positions', 'Facility', 'Facility Type', 'WFOM Cadre']].copy()

# Drop rows with missing/blank elements
fund_staff_2018.dropna(inplace=True)
# Drop rows that associate to '_NOT INCLUDED'
fund_staff_2018.drop(fund_staff_2018[fund_staff_2018['WFOM Cadre'] == '_NOT INCLUDED'].index, inplace=True)
# Drop rows for 'Training Institution'
fund_staff_2018.drop(fund_staff_2018[fund_staff_2018['Facility Type'] == 'Training Institution'].index, inplace=True)
# Reset index after drop
fund_staff_2018.reset_index(drop=True, inplace=True)

# Reform column 'WFOM Cadre'
# Note 'Cadre_Extra' records 'Clinical ' or 'Nursing ' for C01 and C02.
# We combine C01 and C02 into C01 denoting mental health staff cadre to be consistent with 'curr_staff_return'.
fund_staff_2018[['Cadre_No.', 'Cadre_Code', 'Cadre', 'Cadre_Extra']] = \
    fund_staff_2018['WFOM Cadre'].str.split(pat='-| - ', expand=True).copy()
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre_Code'] == 'C02'].index, 'Cadre_Code'] = 'C01'
# Drop columns ['WFOM Cadre','Cadre_No.','Cadre_Extra']
fund_staff_2018.drop(columns=['WFOM Cadre', 'Cadre_No.', 'Cadre_Extra'], inplace=True)

# Drop rows that associate to 'Home Craft Worker', 'Educ/Environ Health Officer', and 'Community Midwife Assistant'
# as these cadres are not included in 'Time_Base' and 'PFT'.
fund_staff_2018.drop(fund_staff_2018[fund_staff_2018['Cadre'] == 'Home Craft Worker'].index, inplace=True)
fund_staff_2018.drop(fund_staff_2018[fund_staff_2018['Cadre'] == 'Educ/Environ Health Officer'].index, inplace=True)
fund_staff_2018.drop(fund_staff_2018[fund_staff_2018['Cadre'] == 'Community Midwife Assistant'].index, inplace=True)
# Reset index
fund_staff_2018.reset_index(drop=True, inplace=True)

# Replace {
# 'HSA' by 'DCSA' (and 'E02' by 'E01') , 'Medical Assistant' by 'Med. Assistant', 'Laboratory Officer' by 'Lab Officer',
# 'Laboratory Technician' by 'Lab Technician', 'Laboratory Assistant' by 'Lab Assistant'
# 'Nursing Officer/Registered Nurse' by 'Nurse Officer', 'Dentist' by 'Dental Officer',
# 'Nutrition Officer' by 'Nutrition Staff', 'Pharmacy Technician' by 'Pharm Technician',
# 'Pharmacy Assistant' by 'Pharm Assistant', 'Pharmacy Officer' by 'Pharmacist' }
# to be consistent with officer_types_table
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'HSA'].index, 'Cadre'] = 'DCSA'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre_Code'] == 'E02'].index, 'Cadre_Code'] = 'E01'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Medical Assistant'].index, 'Cadre'] = 'Med. Assistant'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Laboratory Officer'].index, 'Cadre'] = 'Lab Officer'
fund_staff_2018.loc[
    fund_staff_2018[fund_staff_2018['Cadre'] == 'Laboratory Technician'].index, 'Cadre'] = 'Lab Technician'
fund_staff_2018.loc[
    fund_staff_2018[fund_staff_2018['Cadre'] == 'Laboratory Assistant'].index, 'Cadre'] = 'Lab Assistant'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Nursing Officer/Registered Nurse'].index,
                    'Cadre'] = 'Nurse Officer'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Dentist'].index, 'Cadre'] = 'Dental Officer'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Nutrition Officer'].index, 'Cadre'] = 'Nutrition Staff'
fund_staff_2018.loc[
    fund_staff_2018[fund_staff_2018['Cadre'] == 'Pharmacy Technician'].index, 'Cadre'] = 'Pharm Technician'
fund_staff_2018.loc[
    fund_staff_2018[fund_staff_2018['Cadre'] == 'Pharmacy Assistant'].index, 'Cadre'] = 'Pharm Assistant'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Cadre'] == 'Pharmacy Officer'].index, 'Cadre'] = 'Pharmacist'

# Note that {D03 'Dental Assistant', R03 'Radiotherapy Technician', R04 'Sonographer'} are not included in this dataset.
# This is OK because CHAI current and funded staff sheet has no data regarding the three cadres.

# Reassign the facility type of Zomba Mental Hospital as 'Zomba Mental Hospital'.
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility'] == 'Zomba Mental Hospital'].index,
                    'Facility Type'] = 'Zomba Mental Hospital'

# Important definition: Facility_Levels = [0, 1a, 1b, 2, 3, 4, 5]
# 0: Community/Local level - HP, Village Health Committee, Community initiatives
# 1a: Primary level - Dispensary, HC, Clinic, Maternity facility
# 1b: Primary level - Community/Rural Hospital, CHAM (Community) Hospitals
# 2: Second level - District hospital, DHO
# 3: Tertiary/Referral level - KCH, MCH, ZCH + QECH as referral hospitals
# 4: Zomba Mental Hospital, which has very limited data in CHAI dataset
# 5: Headquarter, which has staff data (but no Time_Base or Incidence_Curr data)

# Get the Health Facility Type list
# fac_types_list = pd.unique(fund_staff_2018['Facility Type']) # Level_0 Facs not included

# Add column 'Facility_Level'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Urban Health Center'].index,
                    'Facility_Level'] = 'Facility_Level_1a'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Rural Health Center'].index,
                    'Facility_Level'] = 'Facility_Level_1a'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Health Center (with maternity)'].index,
                    'Facility_Level'] = 'Facility_Level_1a'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Health Center (without maternity)'].index,
                    'Facility_Level'] = 'Facility_Level_1a'

fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Rural/Community Hospital'].index,
                    'Facility_Level'] = 'Facility_Level_1b'

fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'District Hospital'].index,
                    'Facility_Level'] = 'Facility_Level_2'
fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'DHO'].index,
                    'Facility_Level'] = 'Facility_Level_2'

fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Central Hospital'].index,
                    'Facility_Level'] = 'Facility_Level_3'

fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Zomba Mental Hospital'].index,
                    'Facility_Level'] = 'Facility_Level_4'

fund_staff_2018.loc[fund_staff_2018[fund_staff_2018['Facility Type'] == 'Headquarters'].index,
                    'Facility_Level'] = 'Facility_Level_5'

# Check no blanks in this table
assert not pd.isnull(fund_staff_2018).any().any()

# fund_staff_2018 ready!

# Get fund_staff_return distribution among levels 0, 1a, 1b and 2, i.e., staff distribution within a district
# Specifically, only and all DCSAs/HSAs are to be allocated at level 0;
# Other cadres are to be allocated at level 1a and above.

fund_staff_district = fund_staff_2018[['Facility_Level', 'Cadre_Code', 'Number of positions']].copy()

# Group staff by facility level
fund_staff_distribution = pd.DataFrame(
    fund_staff_district.groupby(by=['Cadre_Code', 'Facility_Level'], sort=False).sum())
fund_staff_distribution.sort_index(level=[0, 1], inplace=True)
fund_staff_distribution.reset_index(drop=False, inplace=True)

# Make the fund_staff_distribution includes all cadres and facility levels (0,1a,1b,2,3,4,5) as index and columns
fund_cadre_faclevel = pd.DataFrame(columns=['Cadre_Code', 'Facility_Level_0', 'Facility_Level_1a',
                                            'Facility_Level_1b', 'Facility_Level_2', 'Facility_Level_3',
                                            'Facility_Level_4', 'Facility_Level_5'])
fund_cadre_faclevel['Cadre_Code'] = officer_types_table['Officer_Type_Code']
fund_cadre_faclevel = pd.melt(fund_cadre_faclevel, id_vars='Cadre_Code', value_vars=fund_cadre_faclevel.columns[1:],
                              var_name='Facility_Level')
# Merge
fund_staff_distribution = fund_staff_distribution.merge(fund_cadre_faclevel, how='right')
# Fill null with 0
fund_staff_distribution.fillna(0, inplace=True)
# Sort
fund_staff_distribution.set_index(['Cadre_Code', 'Facility_Level'], inplace=True)
fund_staff_distribution.sort_index(level=[0, 1], inplace=True)
fund_staff_distribution.reset_index(drop=False, inplace=True)
fund_staff_distribution.drop(['value'], axis=1, inplace=True)

# Save the the complete funded staff distribution table
# fund_staff_distribution_complete = fund_staff_distribution.copy()

# Keep and focus on rows of levels 0, 1a, 1b, and 2
fund_idx_keep = fund_staff_distribution[(fund_staff_distribution['Facility_Level'] == 'Facility_Level_0') |
                                        (fund_staff_distribution['Facility_Level'] == 'Facility_Level_1a') |
                                        (fund_staff_distribution['Facility_Level'] == 'Facility_Level_1b') |
                                        (fund_staff_distribution['Facility_Level'] == 'Facility_Level_2')].index
fund_staff_distribution = fund_staff_distribution.loc[fund_idx_keep, :].copy()
fund_staff_distribution.reset_index(drop=True, inplace=True)

# Add column 'Proportion', denoting the percents of staff per cadre between level 0, level_1a, level_1b, and level_2
for i in range(21):
    # Proportion; Cadres except DCSA are allocated at level 1a and above
    if fund_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Number of positions'].sum() > 0:  # sum of 4i+1,4i+2,4i+3

        fund_staff_distribution.loc[4 * i + 1, 'Proportion_Fund'] = (
            fund_staff_distribution.loc[4 * i + 1, 'Number of positions'] /
            fund_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Number of positions'].sum()
        )

        fund_staff_distribution.loc[4 * i + 2, 'Proportion_Fund'] = (
            fund_staff_distribution.loc[4 * i + 2, 'Number of positions'] /
            fund_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Number of positions'].sum()
        )

        fund_staff_distribution.loc[4 * i + 3, 'Proportion_Fund'] = (
            fund_staff_distribution.loc[4 * i + 3, 'Number of positions'] /
            fund_staff_distribution.loc[4 * i + 1:4 * i + 3, 'Number of positions'].sum()
        )

# fillna
fund_staff_distribution.fillna(0, inplace=True)

# For DCSA individually, reassign their proportions since we assume all DCSAs are located at level 0
fund_idx_dcsa = fund_staff_distribution[fund_staff_distribution['Cadre_Code'] == 'E01'].index
fund_staff_distribution.loc[fund_idx_dcsa[0], 'Proportion_Fund'] = 1.00
fund_staff_distribution.loc[fund_idx_dcsa[1:4], 'Proportion_Fund'] = 0.00
# Alternatively, DCSAs 50% at level 0 and 50% at level 1a?

# fund_staff_distribution ready!

# Save
# fund_staff_distribution.to_csv(
#     outputlocation / 'human_resources' / 'funded' / 'ResourceFile_Staff_Distribution_Assumption.csv',
#     index=False)

# We read info from CHAI estimates of optimal and immediately needed workforce for comparison wherever possible
# --- CHAI WFOM optimal workforce and immediately needed staff distribution

# Preparing optimal workforce from CHAI auxiliary datasets
opt_workforce = pd.read_excel(path_to_auxiliaryfiles / 'MalawiOptimization_OUTPUT2022 SH 2019-10-19.xlsx',
                              sheet_name='Sums by facility type')
# Drop redundant row
opt_workforce.drop(0, inplace=True)
opt_workforce.reset_index(drop=True, inplace=True)

# Add column 'Facility_level'
opt_workforce.insert(2, 'Facility_Level', ['Facility_Level_3',
                                           'Facility_Level_1b',
                                           'Facility_Level_2',
                                           'Facility_Level_1a',
                                           'Facility_Level_1a'])

# Get staff distribution between level_1a, level_1b and level_2 per cadre
cols_matter = opt_workforce.columns[2:24]
opt_workforce_distribution = opt_workforce.loc[1:4, cols_matter].copy()  # drop row Facility_Level_3
opt_workforce_distribution = pd.DataFrame(opt_workforce_distribution.groupby(by=['Facility_Level'], sort=False).sum())
opt_workforce_distribution.sort_index(inplace=True)
# Reset index
opt_workforce_distribution.reset_index(drop=False, inplace=True)

# Transform to long format
opt_workforce_distribution = pd.melt(opt_workforce_distribution, id_vars='Facility_Level', value_vars=cols_matter[1:],
                                     var_name='Cadre_Opt', value_name='Staff_Count_Opt')

# Add column 'Cadre_Code'
for i in range(63):
    opt_workforce_distribution.loc[i, 'Cadre_Code'] = str(opt_workforce_distribution.loc[i, 'Cadre_Opt'])[7:10]

# Sort to be consistent with curr_staff_distribution
# Drop unnecessary column
opt_workforce_distribution.set_index(['Cadre_Code', 'Facility_Level'], inplace=True)
opt_workforce_distribution.sort_index(level=[0, 1], inplace=True)
opt_workforce_distribution.reset_index(drop=False, inplace=True)
opt_workforce_distribution.drop(columns=['Cadre_Opt'], inplace=True)

# Add column 'Proportion', denoting the percents of staff per cadre between level_1a, level_1b and level_2
for i in range(21):
    if opt_workforce_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_Opt'].sum() > 0:  # sum of 3i,3i+1,3i+2
        opt_workforce_distribution.loc[3 * i, 'Proportion_Opt'] = (
            opt_workforce_distribution.loc[3 * i, 'Staff_Count_Opt'] /
            opt_workforce_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_Opt'].sum()
        )

        opt_workforce_distribution.loc[3 * i + 1, 'Proportion_Opt'] = (
            opt_workforce_distribution.loc[3 * i + 1, 'Staff_Count_Opt'] /
            opt_workforce_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_Opt'].sum()
        )

        opt_workforce_distribution.loc[3 * i + 2, 'Proportion_Opt'] = (
            opt_workforce_distribution.loc[3 * i + 2, 'Staff_Count_Opt'] /
            opt_workforce_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_Opt'].sum()
        )

# fillna
opt_workforce_distribution.fillna(0, inplace=True)

# opt_workforce_distribution ready!

# Preparing immediately needed estimates from CHAI auxiliary datasets
immed_need = pd.read_excel(path_to_auxiliaryfiles / 'MalawiOptimization_OUTPUT_ALLYEARS_Curr.xlsx',
                           sheet_name='CurrBase Output')

# Select relevant data
idx_year = immed_need[immed_need['OutputYear'] == 2016].index
immed_need_distribution = immed_need.loc[idx_year, immed_need.columns[np.r_[1, 3, 49:70]]]
immed_need_distribution.dropna(inplace=True)

# Add column 'Facility_Level'
immed_need_distribution.loc[immed_need_distribution[immed_need_distribution['FacilityType'] ==
                                                    'UrbHC'].index, 'Facility_Level'] = 'Facility_Level_1a'

immed_need_distribution.loc[immed_need_distribution[immed_need_distribution['FacilityType'] ==
                                                    'RurHC'].index, 'Facility_Level'] = 'Facility_Level_1a'

immed_need_distribution.loc[immed_need_distribution[immed_need_distribution['FacilityType'] ==
                                                    'ComHos'].index, 'Facility_Level'] = 'Facility_Level_1b'

immed_need_distribution.loc[immed_need_distribution[immed_need_distribution['FacilityType'] ==
                                                    'DisHos'].index, 'Facility_Level'] = 'Facility_Level_2'

immed_need_distribution.loc[immed_need_distribution[immed_need_distribution['FacilityType'] ==
                                                    'CenHos'].index, 'Facility_Level'] = 'Facility_Level_3'

# Group staff by levels
immed_need_distribution = pd.DataFrame(immed_need_distribution.groupby(by=['Facility_Level'], sort=False).sum())
# Drop level 3
immed_need_distribution.drop(index='Facility_Level_3', inplace=True)
# Reset index
immed_need_distribution.reset_index(inplace=True)

# Transform to long format
assert set(immed_need_distribution.columns[1:]) == set(cols_matter[1:])
immed_need_distribution = pd.melt(immed_need_distribution, id_vars='Facility_Level', value_vars=cols_matter[1:],
                                  var_name='Cadre_ImmedNeed', value_name='Staff_Count_ImmedNeed')

# Add column 'Cadre_Code'
for i in range(63):
    immed_need_distribution.loc[i, 'Cadre_Code'] = str(immed_need_distribution.loc[i, 'Cadre_ImmedNeed'])[7:10]

# Sort to be consistent with curr_staff_distribution
# Drop unnecessary column
immed_need_distribution.set_index(['Cadre_Code', 'Facility_Level'], inplace=True)
immed_need_distribution.sort_index(level=[0, 1], inplace=True)
immed_need_distribution.reset_index(drop=False, inplace=True)
immed_need_distribution.drop(columns=['Cadre_ImmedNeed'], inplace=True)

# Add column 'Proportion', denoting the percents of staff per cadre among level_1a, level_1b, and level_2
for i in range(21):
    if immed_need_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_ImmedNeed'].sum() > 0:  # sum of 3i,3i+1,3i+2
        immed_need_distribution.loc[3 * i, 'Proportion_ImmedNeed'] = (
            immed_need_distribution.loc[3 * i, 'Staff_Count_ImmedNeed'] /
            immed_need_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_ImmedNeed'].sum()
        )

        immed_need_distribution.loc[3 * i + 1, 'Proportion_ImmedNeed'] = (
            immed_need_distribution.loc[3 * i + 1, 'Staff_Count_ImmedNeed'] /
            immed_need_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_ImmedNeed'].sum()
        )

        immed_need_distribution.loc[3 * i + 2, 'Proportion_ImmedNeed'] = (
            immed_need_distribution.loc[3 * i + 2, 'Staff_Count_ImmedNeed'] /
            immed_need_distribution.loc[3 * i:3 * i + 2, 'Staff_Count_ImmedNeed'].sum()
        )

# fillna
immed_need_distribution.fillna(0, inplace=True)

# immed_need_distribution ready!

# --- Combine curr_staff_distribution, fund_staff_distribution, opt_workforce_distribution, and immed_need_distribution
# Compare if possible

# Merge curr and opt data
# First, drop rows of level_0 of curr_staff_distribution, for compare_staff_distribution
idx_level0 = curr_staff_distribution[curr_staff_distribution['Facility_Level'] == 'Facility_Level_0'].index
compare_staff_distribution = curr_staff_distribution.drop(idx_level0, axis=0, inplace=False).copy()
# Merge
compare_staff_distribution = curr_staff_distribution.merge(opt_workforce_distribution, how='right')

# Check before adding ImmedNeed data
assert (compare_staff_distribution['Cadre_Code'] == immed_need_distribution['Cadre_Code']).all()
assert (compare_staff_distribution['Facility_Level'] == immed_need_distribution['Facility_Level']).all()
# Add Staff_Count_ImmedNeed and Proportion_ImmedNeed to the merged table
compare_staff_distribution['Staff_Count_ImmedNeed'] = immed_need_distribution['Staff_Count_ImmedNeed'].copy()
compare_staff_distribution['Proportion_ImmedNeed'] = immed_need_distribution['Proportion_ImmedNeed'].copy()

# Add fund data
# First, drop rows of level_0 of fund_staff_distribution
fund_idx_level0 = fund_staff_distribution[fund_staff_distribution['Facility_Level'] == 'Facility_Level_0'].index
fund_staff_distribution_nolevel0 = fund_staff_distribution.drop(fund_idx_level0, axis=0, inplace=False).copy()
fund_staff_distribution_nolevel0.reset_index(drop=True, inplace=True)
# Check before combination
assert (compare_staff_distribution['Cadre_Code'] == fund_staff_distribution_nolevel0['Cadre_Code']).all()
assert (compare_staff_distribution['Facility_Level'] == fund_staff_distribution_nolevel0['Facility_Level']).all()
# Add Number of positions and Proportion_Fund to the merged table
compare_staff_distribution.insert(4, 'Staff_Count_Fund', fund_staff_distribution_nolevel0['Number of positions'].values)
compare_staff_distribution.insert(5, 'Proportion_Fund', fund_staff_distribution_nolevel0['Proportion_Fund'].values)

# Calculate the difference
for i in range(63):
    # Current data compared with Fund, Opt, and ImmedNeed
    if compare_staff_distribution.loc[i, 'Proportion_Fund'] > 0:
        compare_staff_distribution.loc[i, 'Curr_vs_Fund'] = (
            (compare_staff_distribution.loc[i, 'Proportion'] - compare_staff_distribution.loc[i, 'Proportion_Fund']) /
            compare_staff_distribution.loc[i, 'Proportion_Fund']
        )

    if compare_staff_distribution.loc[i, 'Proportion_Opt'] > 0:
        compare_staff_distribution.loc[i, 'Curr_vs_Opt'] = (
            (compare_staff_distribution.loc[i, 'Proportion'] - compare_staff_distribution.loc[i, 'Proportion_Opt']) /
            compare_staff_distribution.loc[i, 'Proportion_Opt']
        )

    if compare_staff_distribution.loc[i, 'Proportion_ImmedNeed'] > 0:
        compare_staff_distribution.loc[i, 'Curr_vs_ImmedNeed'] = (
            (compare_staff_distribution.loc[i, 'Proportion'] -
             compare_staff_distribution.loc[i, 'Proportion_ImmedNeed']) /
            compare_staff_distribution.loc[i, 'Proportion_ImmedNeed']
        )
    # Funded data compared with Opt and ImmedNeed
    if compare_staff_distribution.loc[i, 'Proportion_Opt'] > 0:
        compare_staff_distribution.loc[i, 'Fund_vs_Opt'] = (
            (compare_staff_distribution.loc[i, 'Proportion_Fund'] -
             compare_staff_distribution.loc[i, 'Proportion_Opt']) /
            compare_staff_distribution.loc[i, 'Proportion_Opt']
        )

    if compare_staff_distribution.loc[i, 'Proportion_ImmedNeed'] > 0:
        compare_staff_distribution.loc[i, 'Fund_vs_ImmedNeed'] = (
            (compare_staff_distribution.loc[i, 'Proportion_Fund'] -
             compare_staff_distribution.loc[i, 'Proportion_ImmedNeed']) /
            compare_staff_distribution.loc[i, 'Proportion_ImmedNeed']
        )

# Save
# compare_staff_distribution.to_csv(outputlocation / 'ResourceFile_Staff_Distribution_Compare.csv', index=False)

# ***
# --- fund_staffing_table for established staff
# Extract just the section about "Funded TOTAl Staff'
wb_extract = wb_import.loc[3:39, 64:84]
wb_extract = wb_extract.drop([4, 5])
wb_extract.columns = wb_extract.iloc[0]
wb_extract = wb_extract.drop([3])
wb_extract = wb_extract.reset_index(drop=True)
wb_extract.fillna(0, inplace=True)  # replace all null values with zero values

# Add in the column to the dataframe for the labels that distinguishes whether
# these officers are allocated to the district-or-lower levels or one of the key hospitals.
labels = wb_import.loc[6:39, 0].reset_index(drop=True)
is_distlevel = labels.copy()
is_distlevel[0:28] = True  # for district-or-lower levels
is_distlevel[28:] = False  # for CenHos-or-above levels

wb_extract.loc[:, 'District_Or_Hospital'] = labels
wb_extract.loc[:, 'Is_DistrictLevel'] = is_distlevel

# Finished import from the CHAI excel:
fund_staffing_table = wb_extract.copy()

# The imported staffing table suggest that there is some Dental officer (D01) in most districts,
# but the Time_Curr data (below) suggest that D01 is only needed at central hospitals (not yet validated by CHAI).
# This potential inconsistency can be solved by re-allocating D01 from districts to central hospitals, but
# currently we do not do such reallocation to reduce the assumptions we have to make;
# Also because the central/referral hospitals have Dental officer allocated to meet dental service demand,
# thus no risk of not able to meet such demand at level 3.

# *** Only for funded_plus ********************************************************************************************
# Districts Balaka/Machinga/Mwanza/Neno (4 in South), Nkhata Bay (1 in North), Ntchisi/ Salima (2 in Central)
# have 0 mental health staff C01 in establishment,
# whereas C01 is required by mental health appts at level 1b, level 2 and level 3.
# To fix this inconsistency, we have to move at least 1 C01 to each of these districts from the referral hospitals.
# (QECH and ZCH in South, MCH in North, KCH in Central; ZCH has no C01)
# non_c01_district_idx = fund_staffing_table[(fund_staffing_table['C01'] == 0) &
#                                            (fund_staffing_table['Is_DistrictLevel'])].index
# non_c01_districts = pd.DataFrame(fund_staffing_table.loc[non_c01_district_idx, 'District_Or_Hospital'])
# non_c01_districts['Region'] = pop_by_district.loc[non_c01_districts['District_Or_Hospital'], 'Region'].values
# fund_staffing_table.loc[non_c01_district_idx, 'C01'] = 1
# fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'QECH', 'C01'] = (
#     fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'QECH', 'C01'] - 4
# )
# fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'MCH', 'C01'] = (
#     fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'MCH', 'C01'] - 1
# )
# fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'KCH', 'C01'] = (
#     fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == 'KCH', 'C01'] - 2
# )
# *********************************************************************************************************************

# *** Only for funded_plus ********************************************************************************************
# We find that the total DCSA count in establishment/funded scenario is less than that in actual scenario,
# which is abnormal. Therefore, we use actual DCSA count data for funded_plus scenario, to avoid possible
# negative impact caused of reduced DCSA when switch from actual to funded_plus scenario in tlo simulation.
# First, extract the section about "Current TOTAl Staff'
# hcw_curr_extract = wb_import.loc[3:39, 1:21]
# hcw_curr_extract = hcw_curr_extract.drop([4, 5])
# hcw_curr_extract.columns = hcw_curr_extract.iloc[0]
# hcw_curr_extract = hcw_curr_extract.drop([3])
# hcw_curr_extract = hcw_curr_extract.reset_index(drop=True)
# hcw_curr_extract.fillna(0, inplace=True)
# hcw_curr_extract.loc[:, 'District_Or_Hospital'] = labels
# hcw_curr_extract.loc[:, 'Is_DistrictLevel'] = is_distlevel
# curr_staffing_table = hcw_curr_extract.copy()
# Then, replace the DCSA E01 data for funded_plus
# assert (curr_staffing_table.District_Or_Hospital == fund_staffing_table.District_Or_Hospital).all()
# fund_staffing_table['E01'] = curr_staffing_table['E01'].copy()
# Further, it does not make sense that Likoma has no DCSA staff. As CHAI indicates Likoma's data is mostly bounded into
# Nhkata Bay, we draw some DCSA from Nhkata Bay to Likoma using population as the weight
# idx_likoma = fund_staffing_table[fund_staffing_table['District_Or_Hospital'] == 'Likoma'].index
# assert fund_staffing_table.loc[idx_likoma, 'E01'].values == 0
# idx_nkhatabay = fund_staffing_table[fund_staffing_table['District_Or_Hospital'] == 'Nkhata Bay'].index
# fund_staffing_table.loc[idx_likoma, 'E01'] = fund_staffing_table.loc[idx_nkhatabay, 'E01'].values[0] * (
#      pop_by_district.loc['Likoma', 'Count'] / pop_by_district.loc['Nkhata Bay', 'Count'])
# fund_staffing_table.loc[idx_nkhatabay, 'E01'] = (
#      fund_staffing_table.loc[idx_nkhatabay, 'E01'].values[0] - fund_staffing_table.loc[idx_likoma, 'E01'].values[0])
# *********************************************************************************************************************

# Sort out which are district allocations and which are central hospitals and above

# We assign HQ to HQ; KCH as RefHos in Central region; MCH as RefHos in Northern region;
# QECH and ZCH as RefHos in Southern region (QECH is in Southwest and ZCH is in Southeast).
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'HQ', 'District_Or_Hospital'] = 'Headquarter'
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'KCH', 'District_Or_Hospital'] = 'Referral Hospital_Central'
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'MCH', 'District_Or_Hospital'] = 'Referral Hospital_Northern'
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'QECH', 'District_Or_Hospital'] = 'Referral Hospital_Southern'
# fund_staffing_table.loc[
# fund_staffing_table['District_Or_Hospital'] == 'QECH', 'District_Or_Hospital'] = 'Referral Hospital_Southwest'
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'ZCH', 'District_Or_Hospital'] = 'Referral Hospital_Southern'
# fund_staffing_table.loc[
# fund_staffing_table['District_Or_Hospital'] == 'ZCH', 'District_Or_Hospital'] = 'Referral Hospital_Southeast'
fund_staffing_table.loc[
    fund_staffing_table['District_Or_Hospital'] == 'ZMH', 'District_Or_Hospital'] = 'Zomba Mental Hospital'

# Group the referral hospitals QECH and ZCH as Referral Hospital_Southern
Is_DistrictLevel = fund_staffing_table['Is_DistrictLevel'].values  # Save the column 'Is_DistrictLevel' first
fund_staffing_table = pd.DataFrame(
    fund_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()).reset_index()
fund_staffing_table.insert(1, 'Is_DistrictLevel', Is_DistrictLevel[:-1])  # Add the column 'Is_DistrictLevel'

# Check that in fund_staffing_table every staff count entry >= 0
assert (fund_staffing_table.loc[:, 'M01':'R04'].values >= 0).all()

# The following districts are not in the CHAI data because they are included within other districts.
# For now, we will say that the division of staff between these cities and the wide district (where they are included)
# is consistent with the population recorded for them (Malawi 2018 census),
# i.e., to use population-based weights to reallocate staff

# Add in Lilongwe City (part of Lilongwe)
# Add in Mzuzu City (part of Mziba) ASSUMED
# Add in Zomba City (part of Zomba)
# Add in Blantyre City (part of Blantyre)

# create mapping: the new districts : super_district
split_districts = (
    ('Lilongwe City', 'Lilongwe'),
    ('Mzuzu City', 'Mzimba'),
    ('Zomba City', 'Zomba'),
    ('Blantyre City', 'Blantyre')
)

# reallocating staff to the new districts
for i in np.arange(0, len(split_districts)):
    new_district = split_districts[i][0]
    super_district = split_districts[i][1]

    record = fund_staffing_table.iloc[0].copy()  # get a row of the staffing table

    # make a the record for the new district
    record['District_Or_Hospital'] = new_district
    record['Is_DistrictLevel'] = True

    # get total staff level from the super districts
    cols = set(fund_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code))

    total_staff = fund_staffing_table.loc[
        fund_staffing_table['District_Or_Hospital'] == super_district, cols].values.squeeze()

    # get the weight; The original weights w0 for the 4 new districts in order are 0.60,0.24,0.14,1.77(> 1)
    w0 = pop_by_district.loc[new_district, 'Count'] / pop_by_district.loc[super_district, 'Count']
    if w0 < 1:
        w = w0
    else:
        w = 0.5

    # assign w * 100% staff to the new district
    record.loc[cols] = w * total_staff
    fund_staffing_table = fund_staffing_table.append(record).reset_index(drop=True)

    # take staff away from the super district
    fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == super_district, cols] = \
        fund_staffing_table.loc[
            fund_staffing_table[
                'District_Or_Hospital'] == super_district, cols] - record.loc[cols]

# Confirm the merging will be perfect:
pop = pop_by_district.reset_index(drop=False, inplace=False)
assert set(pop['District'].values) == set(
    fund_staffing_table.loc[fund_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital'])
assert len(pop['District'].values) == len(
    fund_staffing_table.loc[fund_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital'])

# ... double check by doing the merge explicitly
pop_districts = pd.DataFrame({'District': pd.unique(pop['District'])})  # data frame
chai_districts = pd.DataFrame(
    {'District': fund_staffing_table.loc[fund_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital']})

merge_result = pop_districts.merge(chai_districts, how='inner', indicator=True)
assert all(merge_result['_merge'] == 'both')
assert len(merge_result) == len(pop_districts)

# Split staff within each district to level 0 (All DCSAs at HP), level 1a (Disp, HC, etc.),
# level 1b (ComHos, CHAM ComHos), and level 2 (DisHos, etc.), according to fund_staff_distribution.

# First, generate a df with all districts and facility levels 0 - 2 per district
district_faclevel = pd.DataFrame(columns=['District_Or_Hospital', 'Facility_Level_0', 'Facility_Level_1a',
                                          'Facility_Level_1b', 'Facility_Level_2'])
district_faclevel['District_Or_Hospital'] = pop['District'].values.copy()
district_faclevel = pd.melt(district_faclevel, id_vars='District_Or_Hospital', value_vars=district_faclevel.columns[1:],
                            var_name='Facility_Level')
district_faclevel.set_index(['District_Or_Hospital', 'Facility_Level'], inplace=True)
district_faclevel.sort_index(level=[0, 1], inplace=True)
district_faclevel.reset_index(drop=False, inplace=True)
district_faclevel.drop(columns=['value'], axis=1, inplace=True)
# Merge
fund_staffing_table = district_faclevel.merge(fund_staffing_table, how='outer')

# Split staff among levels

# *** Only for funded_plus ********************************************************************************************
# Before split, update the funded C01 distributions at levels 1a, 1b and 2 using CHAI Optimal Workforce estimates. \
# This is because funded C01 are all at level 1b (100%), meanwhile appt time base requires C01 at level 2. \
# CHAI Optimal Workforce locates C01 47.92% at level 1b and 52.08% at level 2, which seems more sensible.
# idx_c01_level_1b = fund_staff_distribution[
#     (fund_staff_distribution['Cadre_Code'] == 'C01') &
#     (fund_staff_distribution['Facility_Level'] == 'Facility_Level_1b')].index
# fund_staff_distribution.loc[idx_c01_level_1b, 'Proportion_Fund'] = 0.4792
#
# idx_c01_level_2 = fund_staff_distribution[
#     (fund_staff_distribution['Cadre_Code'] == 'C01') &
#     (fund_staff_distribution['Facility_Level'] == 'Facility_Level_2')].index
# fund_staff_distribution.loc[idx_c01_level_2, 'Proportion_Fund'] = 0.5208
# *********************************************************************************************************************

# Split
for district in pop['District']:
    for cadre in set(fund_staffing_table.columns[3:]):
        # The proportions
        weight = fund_staff_distribution.loc[fund_staff_distribution['Cadre_Code'] == cadre,
                                             ['Facility_Level', 'Proportion_Fund']].copy()
        # The staff count before splitting
        old_count = fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == district,
                                            ['Facility_Level', cadre]].copy()

        # Check that Facility levels of weight and old_count are consistent
        assert (weight['Facility_Level'].values == old_count['Facility_Level'].values).all()

        # Check that if old_count is not 0, then weight is not 0, guaranteeing that staff are split
        if (old_count[cadre] > 0).any():
            assert (weight['Proportion_Fund'] > 0).any()

        # Split
        fund_staffing_table.loc[fund_staffing_table['District_Or_Hospital'] == district, cadre] = (
            old_count[cadre].values * weight['Proportion_Fund'].values)

# Add facility levels for HQ, CenHos and ZMH
fund_staffing_table.loc[128:132, 'Facility_Level'] = ['Facility_Level_5', 'Facility_Level_3',
                                                      'Facility_Level_3', 'Facility_Level_3',
                                                      'Facility_Level_4']

# Check that in fund_staffing_table every staff count entry >= 0
assert (fund_staffing_table.loc[:, 'M01':'R04'].values >= 0).all()
# fund_staffing_table ready!

# Save the table without column 'Is_DistrictLevel'; staff counts in floats
fund_staffing_table_to_save = fund_staffing_table.drop(columns='Is_DistrictLevel', inplace=False)
# fund_staffing_table_to_save.to_csv(
#     outputlocation / 'human_resources' / 'funded' / 'ResourceFile_Staff_Table.csv', index=False)
# fund_staffing_table_to_save.to_csv(
#     outputlocation / 'human_resources' / 'funded_plus' / 'ResourceFile_Staff_Table.csv', index=False)

# ***
# --- Creating curr_staffing_table and curr_staff_list for current staff
# Extract the section about "Current TOTAl Staff'
hcw_curr_extract = wb_import.loc[3:39, 1:21]
hcw_curr_extract = hcw_curr_extract.drop([4, 5])
hcw_curr_extract.columns = hcw_curr_extract.iloc[0]
hcw_curr_extract = hcw_curr_extract.drop([3])
hcw_curr_extract = hcw_curr_extract.reset_index(drop=True)
hcw_curr_extract.fillna(0, inplace=True)

# Add in the columns to the dataframe for the labels that distinguishes whether
# these officers are allocated to the district-or-lower levels or one of the key hospitals.
hcw_curr_extract.loc[:, 'District_Or_Hospital'] = labels
hcw_curr_extract.loc[:, 'Is_DistrictLevel'] = is_distlevel

# Finished import from the CHAI excel
curr_staffing_table = hcw_curr_extract.copy()

# Check the cadre columns of curr_staffing_table is identical to fund_staffing_table
assert set(curr_staffing_table.columns[0:21]) == set(fund_staffing_table.columns[-21:])

# For curr_staffing_table, do not re-allocate Dental officer with the same reason above for established staff

# The operation of reallocating E01 in HQ to districts is not needed for curr_staffing_table,
# as no. of E01 in curr_staffing_table at HQ is zero.

# For curr_staffing_table, sort out the districts and central hospitals
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'HQ', 'District_Or_Hospital'] = 'Headquarter'
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'KCH', 'District_Or_Hospital'] = 'Referral Hospital_Central'
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'MCH', 'District_Or_Hospital'] = 'Referral Hospital_Northern'
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'QECH', 'District_Or_Hospital'] = 'Referral Hospital_Southern'
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'ZCH', 'District_Or_Hospital'] = 'Referral Hospital_Southern'
curr_staffing_table.loc[
    curr_staffing_table['District_Or_Hospital'] == 'ZMH', 'District_Or_Hospital'] = 'Zomba Mental Hospital'

# Group the referral hospitals QECH and ZCH as Referral Hospital_Southern
Is_DistrictLevel = curr_staffing_table['Is_DistrictLevel'].values  # Save the column 'Is_DistrictLevel' first
curr_staffing_table = pd.DataFrame(
    curr_staffing_table.groupby(by=['District_Or_Hospital'], sort=False).sum()).reset_index()
curr_staffing_table.insert(1, 'Is_DistrictLevel', Is_DistrictLevel[:-1])  # Add the column 'Is_DistrictLevel'

# No need to add a row for Zomba Mental Hospital, as the updated CHAI data has this row for ZMH.
# Check that in curr_staffing_table each staff count entry >=0
assert (curr_staffing_table.loc[:, 'M01':'R04'].values >= 0).all()

# Split staff to 5 special districts;
# for current staff, we include Likoma here because CHAI has no current staff allocated in Likoma
# (CHAI team they will allocate some staff to Likoma but not yet done)
split_districts = (
    ('Likoma', 'Nkhata Bay'),
    ('Lilongwe City', 'Lilongwe'),
    ('Mzuzu City', 'Mzimba'),
    ('Zomba City', 'Zomba'),
    ('Blantyre City', 'Blantyre')
)

# drop the original placeholder row for Likoma
curr_staffing_table.drop([9], inplace=True)
curr_staffing_table.reset_index(inplace=True, drop=True)

for i in np.arange(0, len(split_districts)):
    new_district = split_districts[i][0]
    super_district = split_districts[i][1]

    record = curr_staffing_table.iloc[0].copy()  # get a row of the staffing table

    # make a the record for the new district
    record['District_Or_Hospital'] = new_district
    record['Is_DistrictLevel'] = True

    # get total staff level from the super districts
    cols = set(curr_staffing_table.columns).intersection(set(officer_types_table.Officer_Type_Code))

    total_staff = curr_staffing_table.loc[
        curr_staffing_table['District_Or_Hospital'] == super_district, cols].values.squeeze()

    # get the weight
    w0 = pop_by_district.loc[new_district, 'Count'] / pop_by_district.loc[
        super_district, 'Count']  # The values in order are 0.05,0.60,0.24,0.14,1.77
    if w0 < 1:
        w = w0
    else:
        w = 0.5

    # assign w * 100% staff to the new district
    record.loc[cols] = w * total_staff
    curr_staffing_table = curr_staffing_table.append(record).reset_index(drop=True)

    # take staff away from the super district
    curr_staffing_table.loc[curr_staffing_table['District_Or_Hospital'] == super_district, cols] = \
        curr_staffing_table.loc[
            curr_staffing_table[
                'District_Or_Hospital'] == super_district, cols] - record.loc[cols]

# Confirm the merging will be perfect:
# pop = pop_by_district.reset_index(drop = False, inplace = False)
assert set(pop['District'].values) == set(
    curr_staffing_table.loc[curr_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital'])
assert len(pop['District'].values) == len(
    curr_staffing_table.loc[curr_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital'])

# ... double check by doing the merge explicitly
# pop_districts = pd.DataFrame({'District': pd.unique(pop['District'])})
chai_districts = pd.DataFrame(
    {'District': curr_staffing_table.loc[curr_staffing_table['Is_DistrictLevel'], 'District_Or_Hospital']})

merge_result = pop_districts.merge(chai_districts, how='inner', indicator=True)
assert all(merge_result['_merge'] == 'both')
assert len(merge_result) == len(pop_districts)

# Split staff within each district to level 0 (All DCSAs at HP), level 1a (Disp, HC, etc.),
# level 1b (ComHos, CHAM ComHos), and level 2 (DisHos, etc.), according to curr_staff_distribution.

# First, make the table including all districts and facility levels 0 - 2 per district,\
# by merging with district_faclevel defined previously.
curr_staffing_table = district_faclevel.merge(curr_staffing_table, how='outer')

# Split staff among levels
for district in pop['District']:
    for cadre in set(curr_staffing_table.columns[3:]):
        # The proportions
        weight = curr_staff_distribution.loc[curr_staff_distribution['Cadre_Code'] == cadre,
                                             ['Facility_Level', 'Proportion']].copy()
        # The staff count before splitting
        old_count = curr_staffing_table.loc[curr_staffing_table['District_Or_Hospital'] == district,
                                            ['Facility_Level', cadre]].copy()

        # Check that Facility levels of weight and old_count are consistent
        assert (weight['Facility_Level'].values == old_count['Facility_Level'].values).all()

        # Check that if old_count is not 0, then weight is not 0, guaranteeing that staff are split
        if (old_count[cadre] > 0).any():
            assert (weight['Proportion'] > 0).any()

        # Split
        curr_staffing_table.loc[curr_staffing_table['District_Or_Hospital'] == district, cadre] = (
            old_count[cadre].values * weight['Proportion'].values)

# Add facility levels for HQ, CenHos and ZMH
curr_staffing_table.loc[128:133, 'Facility_Level'] = ['Facility_Level_5', 'Facility_Level_3',
                                                      'Facility_Level_3', 'Facility_Level_3',
                                                      'Facility_Level_4']  # 128:132 also OK

# Save the table without column 'Is_DistrictLevel'; staff counts in floats
curr_staffing_table_to_save = curr_staffing_table.drop(columns='Is_DistrictLevel', inplace=False)
# curr_staffing_table_to_save.to_csv(
#     outputlocation / 'human_resources' / 'actual' / 'ResourceFile_Staff_Table.csv', index=False)

# ---------------------------------------------------------------------------------------------------------------------
# *** Create the Master Facilities List
# This will be a listing of each facility and the district(s) to which they attach
# The different Facility Types are notional at this stage
# The Facility Level is the important variable for the staffing: staff are assumed to be allocated
# to a particular level within a district，or a referral hospital, or others
# They do not associate with a particular type of Facility

Facility_Levels = [0, '1a', '1b', 2, 3, 4, 5]
# 0: Community/Local level - HP, Village Health Committee, Community initiatives
# 1a: Primary level - Dispensary, HC, Clinic, Maternity facility
# 1b: Primary level - Community/Rural Hospital, CHAM (Community) Hospitals
# 2: Second level - District hospital, DHO
# 3: Tertiary/Referral level - KCH, MCH, ZCH + QECH as referral hospitals
# 4: Zomba Mental Hospital, which has very limited data in CHAI dataset
# 5: Headquarter, which has staff data (but no Time_Base or Incidence_Curr data)

# declare the Facility_Type variable
# Facility_Types = ['Health Post', 'Dispensary', 'Health Centre', 'Community or Rural Hospital', 'CHAM Hospital',
#                   'District Hospital', 'DHO', 'Referral Hospital', 'Zomba Mental Hospital']
# Facility_Types_Levels = dict(zip(Facility_Types, Facility_Levels))

# Create empty dataframe that will be the Master Facilities List (mfl)
mfl = pd.DataFrame(columns=['Facility_Level', 'District', 'Region'])

pop_districts = pop['District'].values  # array; the 'pop_districts' used in previous lines is a DataFrame
pop_regions = pd.unique(pop['Region'])

# Each district is assigned with a set of community level facs (0), a set of primary level facs (1a, 1b),
# and a set of second level facs (2).
# Therefore, the total sets of facs is 4 * no. of districts + 3 (RefHos per Region) + 1 (HQ) + 1 (ZMH) \
# = 4 * 32 + 5 = 133
for d in pop_districts:
    df = pd.DataFrame({'Facility_Level': Facility_Levels[0:4], 'District': d,
                       'Region': pop.loc[pop['District'] == d, 'Region'].values[0]})
    mfl = mfl.append(df, ignore_index=True, sort=True)

# Add in the Referral Hospitals, one for each region
for r in pop_regions:
    mfl = mfl.append(pd.DataFrame({
        'Facility_Level': Facility_Levels[4], 'District': None, 'Region': r
    }, index=[0]), ignore_index=True, sort=True)

# Add the ZMH
mfl = mfl.append(pd.DataFrame({
    'Facility_Level': Facility_Levels[5], 'District': None, 'Region': None
}, index=[0]), ignore_index=True, sort=True)

# Add the HQ
mfl = mfl.append(pd.DataFrame({
    'Facility_Level': Facility_Levels[6], 'District': None, 'Region': None
}, index=[0]), ignore_index=True, sort=True)

# Create the Facility_ID
mfl.loc[:, 'Facility_ID'] = mfl.index

# Create a unique name for each Facility
name = 'Facility_Level_' + mfl['Facility_Level'].astype(str) + '_' + mfl['District']
name.loc[mfl['Facility_Level'] == 3] = 'Referral Hospital' + '_' + mfl.loc[
    mfl['Facility_Level'] == 3, 'Region']
name.loc[mfl['Facility_Level'] == 4] = 'Zomba Mental Hospital'
name.loc[mfl['Facility_Level'] == 5] = 'Headquarter'

mfl.loc[:, 'Facility_Name'] = name

# Save
mfl.to_csv(outputlocation / 'organisation' / 'ResourceFile_Master_Facilities_List.csv', index=False)

# ---------------------------------------------------------------------------------------------------------------------
# *** Create a simple mapping of all the facilities that persons in a district can access
facilities_by_district = pd.DataFrame(columns=mfl.columns)

# Each district in pop_districts has access to five facility levels.
for d in pop_districts:
    the_region = pop.loc[pop['District'] == d, 'Region'].copy().values[0]

    district_facs = mfl.loc[mfl['District'] == d]  # Include facs from level 0 to level 2

    region_fac = mfl.loc[pd.isnull(mfl['District']) & (mfl['Region'] == the_region)].copy().reset_index(drop=True)
    region_fac.loc[0, 'District'] = d  # Level 3, referral hospital

    zmh_fac = mfl.loc[pd.isnull(mfl['District']) & pd.isnull(mfl['Region']) &
                      (mfl['Facility_Name'] == 'Zomba Mental Hospital')].copy().reset_index(drop=True)
    zmh_fac.loc[0, 'District'] = d  # Level 4, Zomba Mental Hospital

    headquarter_fac = mfl.loc[pd.isnull(mfl['District']) & pd.isnull(mfl['Region']) &
                              (mfl['Facility_Name'] == 'Headquarter')].copy().reset_index(drop=True)
    headquarter_fac.loc[0, 'District'] = d  # Level 5, Headquarter

    facilities_by_district = pd.concat([facilities_by_district, district_facs, region_fac, zmh_fac, headquarter_fac],
                                       ignore_index=True)

# check that the no. of facs is no. of districts times no. of fac levels = 32 * 7 = 224
assert len(facilities_by_district) == len(pop_districts) * len(Facility_Levels)

# Save
# facilities_by_district.to_csv(outputlocation / 'organisation' / 'ResourceFile_Facilities_For_Each_District.csv',
#                               index=False)

# ---------------------------------------------------------------------------------------------------------------------
# *** Now look at the types of appointments from the sheet 'Time_Curr'
sheet = pd.read_excel(workingfile, sheet_name='Time_Curr', header=None)

# get rid of the junk rows
trimmed = sheet.loc[[7, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27]]
data_import = pd.DataFrame(data=trimmed.iloc[1:, 2:].values, columns=trimmed.iloc[0, 2:], index=trimmed.iloc[1:, 1])

data_import = data_import.dropna(axis='columns', how='all')  # get rid of the 'spacer' columns
data_import = data_import.fillna(0)

# get rid of records for which there is no call on time of any type of officer at any fac type
data_import = data_import.drop(columns=data_import.columns[data_import.sum() == 0])

# Note that in the updated 'Time_Curr', Disp has no time requirements at all for medical assistant M03,
# which is different from the previous version
assert data_import.loc['Disp', :].sum() == 0

# Note that the DCSA (CHW) never has a time requirement and that no appointments can be serviced at the HealthPost.
# We remedy this by inserting a new type of appointment, which only the DCSA can service, \
# and the time taken is 10 minutes.
new_appt_for_CHW = pd.Series(index=data_import.index,
                             name='E01_ConWithDCSA',
                             # New appointment type is a consultation with the DCSA (community health worker)
                             data=[
                                 0,  # Central Hosp - Time
                                 0,  # Central Hosp - Percent
                                 0,  # District Hosp - Time
                                 0,  # District Hosp - Percent
                                 0,  # Comm Hosp - Time
                                 0,  # Comm Hosp - Percent
                                 0,  # Urban Health Centre - Time     #10 mins
                                 0,  # Urban Health Centre - Percent  #100%
                                 0,  # Rural Health Centre - Time     #10 mins
                                 0,  # Rural Health Centre - Percent  #100%
                                 10.0,  # Health Post - Time
                                 1.0,  # Health Post - Percent
                                 0,  # Dispensary - Time              #10 mins
                                 0,  # Dispensary - Percent           #100%
                             ])

data_import = pd.concat([data_import, new_appt_for_CHW], axis=1)
assert data_import.loc['HP', :].sum() == 10.0

# We further create a new appointment type for pharmacy dispensing service that requires 5-minute working time
# from P03-Pharm Assist at facilities at levels 1a, 1b and 2, and 5 minutes from P02-Pharm Technician at
# central hospitals at level 3
new_appt_for_pharmdispensing_p03 = pd.Series(index=data_import.index,
                                             name='P03_PharmDispensing',
                                             data=[
                                                   0,  # Central Hosp - Time
                                                   0,  # Central Hosp - Percent
                                                   5.0,  # District Hosp - Time
                                                   1.0,  # District Hosp - Percent
                                                   5.0,  # Comm Hosp - Time
                                                   1.0,  # Comm Hosp - Percent
                                                   5.0,  # Urban Health Centre - Time
                                                   1.0,  # Urban Health Centre - Percent
                                                   5.0,  # Rural Health Centre - Time
                                                   1.0,  # Rural Health Centre - Percent
                                                   0.0,  # Health Post - Time
                                                   0.0,  # Health Post - Percent
                                                   5.0,  # Dispensary - Time
                                                   1.0,  # Dispensary - Percent
                                                   ])
new_appt_for_pharmdispensing_p02 = pd.Series(index=data_import.index,
                                             name='P02_PharmDispensing',
                                             data=[
                                                   5.0,  # Central Hosp - Time
                                                   1.0,  # Central Hosp - Percent
                                                   0.0,  # District Hosp - Time
                                                   0.0,  # District Hosp - Percent
                                                   0.0,  # Comm Hosp - Time
                                                   0.0,  # Comm Hosp - Percent
                                                   0.0,  # Urban Health Centre - Time
                                                   0.0,  # Urban Health Centre - Percent
                                                   0.0,  # Rural Health Centre - Time
                                                   0.0,  # Rural Health Centre - Percent
                                                   0.0,  # Health Post - Time
                                                   0.0,  # Health Post - Percent
                                                   0.0,  # Dispensary - Time
                                                   0.0,  # Dispensary - Percent
                                                   ])
data_import = pd.concat([data_import, new_appt_for_pharmdispensing_p03, new_appt_for_pharmdispensing_p02], axis=1)

# We now do not add service time for DHO as we think DHO does not deliver services directly
# Also, DHO itself in both DHIS2 and CHAI updated data does not have service record

# Add service times for Zomba Mental Hospital, by copying mental health appointment data of CenHos
# (Assuming ZMH only provide mental health services)
new_rows_for_ZMH = pd.DataFrame(index=['ZMH', 'ZMH_Per'], columns=data_import.columns.copy(),
                                data=0)
new_rows_for_ZMH.loc[:, ['C01_MentOPD', 'C01_MentClinic']] = data_import.loc[
    ['CenHos', 'CenHos_Per'], ['C01_MentOPD', 'C01_MentClinic']].copy().values

data_import = pd.concat([data_import, new_rows_for_ZMH])

# data_import ready!

# Break apart composite to give the appt_type and the officer_type
# This is used to know which column to read below...
chai_composite_code = pd.Series(data_import.columns)
chai_code = chai_composite_code.str.split(pat='_', expand=True).reset_index(drop=True)
chai_code = chai_code.rename(columns={0: 'Officer_Type_Code', 1: 'Appt_Type_Code'})

# check that officer codes line up with the officer codes already imported
assert set(chai_code['Officer_Type_Code']).issubset(set(officer_types_table['Officer_Type_Code']))

# Make dataframe summarising the types of appointments

retained_appt_type_code = pd.unique(chai_code['Appt_Type_Code'])

appt_types_table_import = sheet.loc[(1, 2, 6), 2:].transpose().reset_index(drop=True).copy()
appt_types_table_import = appt_types_table_import.rename(columns={1: 'Appt_Cat', 2: 'Appt_Type', 6: 'Appt_Type_Code'})
appt_types_table_import['Appt_Cat'] = pd.Series(appt_types_table_import['Appt_Cat']).fillna(method='ffill')
appt_types_table_import['Appt_Type'] = pd.Series(appt_types_table_import['Appt_Type']).fillna(method='ffill')
appt_types_table_import['Appt_Type_Code'] = pd.Series(appt_types_table_import['Appt_Type_Code']).fillna(method='ffill')
appt_types_table_import = appt_types_table_import.drop_duplicates().reset_index(drop=True)

# starting with the retained appt codes, merge in these descriptions
appt_types_table = pd.DataFrame(data={'Appt_Type_Code': retained_appt_type_code}).merge(appt_types_table_import,
                                                                                        on='Appt_Type_Code', how='left',
                                                                                        indicator=True)

# Fill in the missing information about the appointment type that was added above
appt_types_table.loc[appt_types_table['Appt_Type_Code'] == new_appt_for_CHW.name.split('_')[1], 'Appt_Cat'] = \
    new_appt_for_CHW.name.split('_')[1]
appt_types_table.loc[appt_types_table['Appt_Type_Code'] == new_appt_for_CHW.name.split('_')[1], 'Appt_Type'] = \
    new_appt_for_CHW.name.split('_')[1]
appt_types_table.loc[appt_types_table['Appt_Type_Code'] == new_appt_for_pharmdispensing_p03.name.split('_')[1],
                     'Appt_Cat'] = new_appt_for_pharmdispensing_p03.name.split('_')[1]
appt_types_table.loc[appt_types_table['Appt_Type_Code'] == new_appt_for_pharmdispensing_p03.name.split('_')[1],
                     'Appt_Type'] = new_appt_for_pharmdispensing_p03.name.split('_')[1]

# drop the merge check column
appt_types_table.drop(columns='_merge', inplace=True)

# Replace space with underscore in the Appt_Cat
appt_types_table['Appt_Cat'].replace(to_replace='  ', value='_', regex=True, inplace=True)
appt_types_table['Appt_Cat'].replace(to_replace=' ', value='_', regex=True, inplace=True)

# Check no holes
assert not pd.isnull(appt_types_table).any().any()

# Save
appt_types_table.to_csv(outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Types_Table.csv',
                        index=False)

# ---------------------------------------------------------------------------------------------------------------------
# *** Now, make the ApptTimeTable
# (Table that gives for each appointment, when occurring in each appt_type at each facility type, the time of each \
# type of officer required

# The sheet gives the % of appointments that require a particular type of officer and the time taken if it does
# So, turn that into an Expectation of the time taken for each type of officer (multiplying together)

# This sheet distinguished between different types of facility in terms of the time taken by appointments occurring \
# at each.
# But the CHAI data do not distinguish how many officers work at each different level of facility
# (Available staff counts for only districts (level = 0,1a,1b,2), CenHos (level = 3), and HQ (level = 5))
# Therefore, we will map these to the facility level that have been defined.
# NB. In doing this, we:
# - assume that the time taken for all appointments at each level is modelled by that for the average of \
#     facility types at that level

# CHAI: Headquarter ---> our "headquarter" (level = 5)
# CHAI: Zomba Mental Hospital ---> our 'Zomba Mental Hospital' / 'ZMH' (level = 4)
# CHAI: Central_Hospital ---> our "Referral Hospital" (level = 3)
# CHAI: District_Hospital ---> averaged into our "second level" facilities (level = 2)
# CHAI: DHO ---> averaged into our "second level" facilities (level = 2)
# CHAI: Community_Hospital ---> averaged into our "primary level" facilities (level = 1b)
# CHAI: Urban_HealthCentre ---> averaged into our "primary level" facilities (level = 1a)
# CHAI: Rural_HealthCentre ---> averaged into our "primary level" facilities (level = 1a)
# CHAI: Dispensary ---> averaged into our "primary level" facilities (level = 1a)
# CHAI: HealthPost ---> averaged into our "community level" facilities (level = 0)

# level 4
ZMH_ExpectTime = data_import.loc['ZMH'] * data_import.loc['ZMH_Per']

# Level 3
Central_Hospital_ExpecTime = data_import.loc['CenHos'] * data_import.loc['CenHos_Per']

# level 5; No data available for Headquarter; we assign NAN to it
HQ_ExpecTime = Central_Hospital_ExpecTime.copy()
HQ_ExpecTime.loc[:] = np.nan

# level 2
District_Hospital_ExpecTime = data_import.loc['DisHos'] * data_import.loc['DisHos_Per']

# level 1b
Community_Hospital_ExpecTime = data_import.loc['ComHos'] * data_import.loc['ComHos_Per']

# level 1a
Urban_HealthCentre_ExpecTime = data_import.loc['UrbHC'] * data_import.loc['UrbHC_Per']
Rural_HealthCentre_ExpecTime = data_import.loc['RurHC'] * data_import.loc['RurHC_Per']

# level 0
HealthPost_ExpecTime = data_import.loc['HP'] * data_import.loc['HP_Per']

# Average time for levels 1a, which have data for more than 1 facility types
Avg_Level1a_ExpectTime = (Urban_HealthCentre_ExpecTime + Rural_HealthCentre_ExpecTime) / 2

# Assemble
X = pd.DataFrame({
    5: HQ_ExpecTime,  # (Headquarter)
    4: ZMH_ExpectTime,  # (Zomba Mental Hospital)
    3: Central_Hospital_ExpecTime,  # (our "Referral Hospital" at region level)
    2: District_Hospital_ExpecTime,  # (DisHos at second level )
    '1b': Community_Hospital_ExpecTime,  # (ComHos at primary level)
    '1a': Avg_Level1a_ExpectTime,  # (UrbHC,RurHC at primary level)
    0: HealthPost_ExpecTime  # (HP at community level)
})

assert set(X.columns) == set(Facility_Levels)

# Split out the index into appointment type and officer type
labels = pd.Series(X.index, index=X.index).str.split(pat='_', expand=True)
labels = labels.rename(columns={0: 'Officer_Type_Code', 1: 'Appt_Type_Code'})
Y = pd.concat([X, labels], axis=1)
ApptTimeTable = pd.melt(Y, id_vars=['Officer_Type_Code', 'Appt_Type_Code'],
                        var_name='Facility_Level', value_name='Time_Taken_Mins')

# Confirm that Facility_Level is an int ---> No longer needed, as level 1a and 1b are not integers
# ApptTimeTable['Facility_Level'] = ApptTimeTable['Facility_Level'].astype(int)

# Merge in Officer_Type
ApptTimeTable = ApptTimeTable.merge(officer_types_table, on='Officer_Type_Code')

# confirm that we have the same number of entries as we were expecting
assert len(ApptTimeTable) == len(Facility_Levels) * len(data_import.columns)

# drop the rows that contain no call on resources, including NAN values
ApptTimeTable = ApptTimeTable.drop(ApptTimeTable[ApptTimeTable['Time_Taken_Mins'] == 0].index)
ApptTimeTable = ApptTimeTable.drop(ApptTimeTable[pd.isnull(ApptTimeTable['Time_Taken_Mins'])].index)
# reset index
ApptTimeTable.reset_index(drop=True, inplace=True)

# Generate appt_time_table_coarse with officer_category, instead of officer_type
appt_time_table_coarse = pd.DataFrame(
    ApptTimeTable.groupby(['Appt_Type_Code', 'Facility_Level', 'Officer_Category']).sum()
).reset_index()

# Save
# ApptTimeTable.to_csv(
#     outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv',
#     index=False)
appt_time_table_coarse.to_csv(
    outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_Appt_Time_Table.csv',
    index=False)

# ---------------------------------------------------------------------------------------------------------------------
# *** Create a table that determines what kind of appointment can be serviced in each Facility Level
ApptType_By_FacLevel = pd.DataFrame(index=appt_types_table['Appt_Type_Code'],
                                    columns=Facility_Levels,
                                    data=False,
                                    dtype=bool)

for appt_type in ApptType_By_FacLevel.index:
    for fac_level in ApptType_By_FacLevel.columns:
        # Can this appt_type happen at this facility_level?
        # Check to see if ApptTimeTable has any time requirement

        ApptType_By_FacLevel.at[appt_type, fac_level] = \
            ((ApptTimeTable['Facility_Level'] == fac_level) & (ApptTimeTable['Appt_Type_Code'] == appt_type)).any()

ApptType_By_FacLevel = ApptType_By_FacLevel.add_prefix('Facility_Level_')

# Generate appt_type_by_level_coarse consider officer_category, instead of officer_type
appt_type_by_level_coarse = pd.DataFrame(index=appt_types_table['Appt_Type_Code'],
                                         columns=Facility_Levels,
                                         data=False,
                                         dtype=bool)

for appt_type in appt_type_by_level_coarse.index:
    for fac_level in appt_type_by_level_coarse.columns:
        # Can this appt_type happen at this facility_level?
        # Check to see if appt_time_table_coarse has any time requirement

        appt_type_by_level_coarse.at[appt_type, fac_level] = \
            ((appt_time_table_coarse['Facility_Level'] == fac_level) & (
                appt_time_table_coarse['Appt_Type_Code'] == appt_type)).any()

appt_type_by_level_coarse = appt_type_by_level_coarse.add_prefix('Facility_Level_')

# Check; The two tables should be equal
assert (appt_type_by_level_coarse == ApptType_By_FacLevel).all().all()

# Save
ApptType_By_FacLevel.to_csv(
    outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_ApptType_By_FacLevel.csv', index=True)

# --- check
# Look to see where different types of staff member need to be located:
# This is just a reverse reading of where there are non-zero requests for time of particular officer-types

Officers_Need_For_Appt = pd.DataFrame(columns=['Facility_Level', 'Appt_Type_Code', 'Officer_Type_Codes'])

for a in appt_types_table['Appt_Type_Code'].values:
    for f in Facility_Levels:

        # get the staff types required for this appt

        block = ApptTimeTable.loc[(ApptTimeTable['Appt_Type_Code'] == a) & (ApptTimeTable['Facility_Level'] == f)]

        if len(block) == 0:
            # no requirement expressed => The appt is not possible at this location
            Officers_Need_For_Appt = Officers_Need_For_Appt.append(
                {'Facility_Level': f,
                 'Appt_Type_Code': a,
                 'Officer_Type_Codes': False
                 }, ignore_index=True)

        else:
            need_officer_types = list(block['Officer_Type_Code'])
            Officers_Need_For_Appt = Officers_Need_For_Appt.append(
                {'Facility_Level': f,
                 'Appt_Type_Code': a,
                 'Officer_Type_Codes': need_officer_types
                 }, ignore_index=True)

# Turn this into the the set of staff that are required for each type of appointment
FacLevel_By_Officer = pd.DataFrame(columns=Facility_Levels,
                                   index=officer_types_table['Officer_Type_Code'].values)
FacLevel_By_Officer = FacLevel_By_Officer.fillna(False)

for o in officer_types_table['Officer_Type_Code'].values:

    for i in Officers_Need_For_Appt.index:

        fac_level = Officers_Need_For_Appt.loc[i].Facility_Level
        officer_types = Officers_Need_For_Appt.loc[i].Officer_Type_Codes

        if officer_types is not False:  # (i.e. such an appointment at such a a facility is possible)

            if o in officer_types:
                FacLevel_By_Officer.loc[(FacLevel_By_Officer.index == o), fac_level] = True

# We note that three officer_types ("T01: Nutrition Staff", "R03: Sonographer" and "RO4: Radiotherapy technician") are\
#  apparently not called by any appointment type

# Assign that the Nutrition Staff will go to the Referral Hospitals (level = 3)
FacLevel_By_Officer.loc['T01', 3] = True

# Assign that the Sonographer will go to the Referral Hospitals (level = 3)
FacLevel_By_Officer.loc['R03', 3] = True

# Assign that the Radiotherapist will go to the Referral Hospitals (level = 3)
FacLevel_By_Officer.loc['R04', 3] = True

# As an option, we could assign staff at HQ to level 5 according to the info of staff
# Get the sets of officers of funded and current staff
fund_staff_HQ = fund_staffing_table[fund_staffing_table['District_Or_Hospital'] == 'Headquarter'].copy()
curr_staff_HQ = curr_staffing_table[curr_staffing_table['District_Or_Hospital'] == 'Headquarter'].copy()
fund_staff_HQ.drop(columns=['District_Or_Hospital', 'Facility_Level', 'Is_DistrictLevel'], inplace=True)
curr_staff_HQ.drop(columns=['District_Or_Hospital', 'Facility_Level', 'Is_DistrictLevel'], inplace=True)
fund_staff_HQ_Positive = fund_staff_HQ.loc[:, (fund_staff_HQ > 0).any(axis=0)]
curr_staff_HQ_Positive = curr_staff_HQ.loc[:, (curr_staff_HQ > 0).any(axis=0)]
# The union of the two sets
staff_call_at_HQ = fund_staff_HQ_Positive.columns.union(curr_staff_HQ_Positive.columns)
# Assign true value to staff_call_at_HQ
for s in staff_call_at_HQ:
    FacLevel_By_Officer.loc[s, 5] = True

# Check that all types of officer are allocated to at least one type of facility excl. HQ/Level_5
assert (FacLevel_By_Officer.iloc[:, 0:6].sum(axis=1) > 0).all()

# Change columns names: 0 -> Facility_Level_0
FacLevel_By_Officer = FacLevel_By_Officer.add_prefix('Facility_Level_')

# ---------------------------------------------------------------------------------------------------------------------
# *** Get Hours and Minutes Worked Per Staff Member, i.e., the daily capabilities
# First, read-in the number of working hours and days for each type of officer

pft_sheet = pd.read_excel(workingfile, sheet_name='PFT', header=None)
officer_types_import = pft_sheet.iloc[3, np.arange(2, 23)]

assert set(officer_types_import) == set(officer_types_table['Officer_Type_Code'])
assert len(officer_types_import) == len(officer_types_table['Officer_Type_Code'])

# Total working days per year
days_per_year_men = pft_sheet.iloc[16, np.arange(2, 23)]
days_per_year_women = pft_sheet.iloc[17, np.arange(2, 23)]
days_per_year_pregwomen = pft_sheet.iloc[18, np.arange(2, 23)]

# Percents of men, nonpregnant women, and pregnant women
fr_men = pft_sheet.iloc[66, np.arange(2, 23)]
fr_pregwomen = pft_sheet.iloc[71, np.arange(2, 23)]
fr_nonpregwomen = pft_sheet.iloc[68, np.arange(2, 23)] - pft_sheet.iloc[71, np.arange(2, 23)]

# Total average working days
workingdays = (fr_men * days_per_year_men) + (fr_nonpregwomen * days_per_year_women) + (
    fr_pregwomen * days_per_year_pregwomen)

# patient facing (i.e. non-admin working) minutes and hours daily at
# district hospitals, community hospitals, health centres
mins_daily_dishos = pft_sheet.iloc[37, np.arange(2, 23)]
hrs_daily_dishos = mins_daily_dishos / 60

mins_daily_comhos = pft_sheet.iloc[42, np.arange(2, 23)]
hrs_daily_comhos = mins_daily_comhos / 60

mins_daily_hc = pft_sheet.iloc[46, np.arange(2, 23)]
hrs_daily_hc = mins_daily_hc / 60

# Total mins per year, Average number of mins per day at
# district hospitals, community hospitals, health centres
mins_yearly_dishos = mins_daily_dishos * workingdays
mins_yearly_comhos = mins_daily_comhos * workingdays
mins_yearly_hc = mins_daily_hc * workingdays

av_mins_daily_dishos = mins_yearly_dishos / 365.25
av_mins_daily_comhos = mins_yearly_comhos / 365.25
av_mins_daily_hc = mins_yearly_hc / 365.25

# PFT - dishos, comhos, hc individual columns
# note that the average is calculated on 365.25 days (not the working days) per year
HosHC_patient_facing_time = pd.DataFrame(
    {'Officer_Type_Code': officer_types_import,
     'DisHos_Av_Mins_Per_Day': av_mins_daily_dishos,
     'ComHos_Av_Mins_Per_Day': av_mins_daily_comhos,
     'HC_Av_Mins_Per_Day': av_mins_daily_hc,
     'Total_Av_Working_Days': workingdays,
     'DisHos_Hrs_Per_Day': hrs_daily_dishos,
     'ComHos_Hrs_Per_Day': hrs_daily_comhos,
     'HC_Hrs_Per_Day': hrs_daily_hc
     }
).reset_index(drop=True)

# The new PFT has no minutes for M01 at health centres,
# but in Time_Curr, IPAdmissions/RMNCH/... appointments at Urban HCs all need time from M01.
# We therefore assume the minutes for M01 at HCs are the average of those at DisHos and CenHos,
# to solve inconsistency between PFT and Time_Curr
HosHC_patient_facing_time.loc[0, 'HC_Av_Mins_Per_Day'] = (
                                                             HosHC_patient_facing_time.loc[
                                                                 0, 'DisHos_Av_Mins_Per_Day'] +
                                                             HosHC_patient_facing_time.loc[0, 'ComHos_Av_Mins_Per_Day']
                                                         ) / 2

# How to deal with cadres (DCSA, Dental, Mental, Radiography) that do not have minutes at all in PFT,
# whereas they have time requirements in Time_Curr?
# (Compared to old PFT sheet,
# the new PFT has updated all info on available working days/non-admin daily minutes/portion of male/female/pregfemale)
# A quick fix is to use the average daily minutes of those cadres from old PFT table;
# The info required to calculate these minutes will be from the old PFT table.
pft_old = pd.read_excel(working_file_old, sheet_name='PFT', header=None)

officer_types_old = pft_old.iloc[2, np.arange(2, 23)]
assert set(officer_types_old) == set(officer_types_table['Officer_Type_Code'])
assert len(officer_types_old) == len(officer_types_table['Officer_Type_Code'])

# Total working days per year
days_men_old = pft_old.iloc[15, np.arange(2, 23)]
days_women_old = pft_old.iloc[16, np.arange(2, 23)]
days_pregwomen_old = pft_old.iloc[17, np.arange(2, 23)]

# Percents of men, nonpregnant women, and pregnant women
fr_men_old = pft_old.iloc[53, np.arange(2, 23)]
fr_pregwomen_old = pft_old.iloc[55, np.arange(2, 23)] * pft_old.iloc[57, np.arange(2, 23)]
fr_nonpregwomen_old = pft_old.iloc[55, np.arange(2, 23)] * (1 - pft_old.iloc[57, np.arange(2, 23)])

# Total average working days
working_days_old = (fr_men_old * days_men_old) + (fr_nonpregwomen_old * days_women_old) + (
    fr_pregwomen_old * days_pregwomen_old)

# patient facing (i.e. non-admin working) minutes and hours daily at
# hospitals and health centres
mins_daily_hos_old = pft_old.iloc[36, np.arange(2, 23)]
hrs_daily_hos_old = mins_daily_hos_old / 60

mins_daily_hc_old = pft_old.iloc[26, np.arange(2, 23)] - pft_old.iloc[34, np.arange(2, 23)]
hrs_daily_hc_old = mins_daily_hc_old / 60

# Total mins per year, Average number of mins per day at
# hospitals and health centres
mins_yearly_hos_old = mins_daily_hos_old * working_days_old
av_mins_daily_hos_old = mins_yearly_hos_old / 365.25

mins_yearly_hc_old = mins_daily_hc_old * working_days_old
av_mins_daily_hc_old = mins_yearly_hc_old / 365.25

# PFT - DisHos, ComHos, HC individually
# DisHos and ComHos both use hos data
HosHC_patient_facing_time_old = pd.DataFrame(
    {'Officer_Type_Code': officer_types_old,
     'DisHos_Av_Mins_Per_Day': av_mins_daily_hos_old,
     'ComHos_Av_Mins_Per_Day': av_mins_daily_hos_old,
     'HC_Av_Mins_Per_Day': av_mins_daily_hc_old,
     'Total_Av_Working_Days': working_days_old,
     'DisHos_Hrs_Per_Day': hrs_daily_hos_old,
     'ComHos_Hrs_Per_Day': hrs_daily_hos_old,
     'HC_Hrs_Per_Day': hrs_daily_hc_old
     }
).reset_index(drop=True)

# check the new and old tables have same columns and officers (in the same order)
assert (HosHC_patient_facing_time_old['Officer_Type_Code'] == HosHC_patient_facing_time['Officer_Type_Code']).all()
assert (HosHC_patient_facing_time_old.columns == HosHC_patient_facing_time.columns).all()

# check new and old pft difference
HosHC_pft_diff = pd.DataFrame(columns=HosHC_patient_facing_time.columns)
HosHC_pft_diff['Officer_Type_Code'] = HosHC_patient_facing_time['Officer_Type_Code'].values
HosHC_pft_diff.iloc[:, 1:] = (
    (HosHC_patient_facing_time.iloc[:, 1:].values -
     HosHC_patient_facing_time_old.iloc[:, 1:].values) /
    HosHC_patient_facing_time_old.iloc[:, 1:].values
)
HosHC_pft_diff = HosHC_pft_diff.append(HosHC_pft_diff.iloc[:, 1:].mean(axis=0), ignore_index=True)

# save
# HosHC_pft_diff.to_csv(
#     outputlocation / 'human_resources' / 'definitions' / 'New_Old_PFT_Difference.csv',
#     index=False)

# now add the old data of those blanks cadres to the updated PFT table
HosHC_patient_facing_time.iloc[11:, :] = HosHC_patient_facing_time_old.iloc[11:, :].copy()

# PFT table ready!

# Create final tables of daily time available at each facility by officer type: Facility_ID, Facility_Type,
# Facility_Level, Officer_Type, Officer_Type_Code, Total Average Minutes Per Day, Staff_Count

# --- Daily capability for funded staff; staff counts in floats
# For float staff counts, calculate total minutes per day
funded_staff_floats = fund_staffing_table_to_save.copy()  # staff counts
funded_daily_minutes = funded_staff_floats.copy()  # total minutes per day

for i in funded_daily_minutes.index:
    the_level = funded_daily_minutes.loc[i, 'Facility_Level']
    for officer in officer_types_table['Officer_Type_Code']:
        if the_level in ['Facility_Level_0', 'Facility_Level_1a']:  # Levels 0, 1a; HC minutes
            t = (funded_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'HC_Av_Mins_Per_Day'])
            funded_daily_minutes.loc[i, officer] = t.values[0]
        elif the_level == 'Facility_Level_1b':  # Level 1b; ComHos minutes
            t = (funded_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'ComHos_Av_Mins_Per_Day'])
            funded_daily_minutes.loc[i, officer] = t.values[0]
        else:  # Levels 2 and above; DisHos and CenHos minutes
            t = (funded_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'DisHos_Av_Mins_Per_Day'])
            funded_daily_minutes.loc[i, officer] = t.values[0]

# Long format
funded_staff_floats = pd.melt(funded_staff_floats, id_vars=['District_Or_Hospital', 'Facility_Level'],
                              var_name='Officer_Type_Code', value_name='Staff_Count')
funded_daily_minutes = pd.melt(funded_daily_minutes, id_vars=['District_Or_Hospital', 'Facility_Level'],
                               var_name='Officer_Type_Code', value_name='Total_Mins_Per_Day')
# Merge into daily capability table
funded_daily_capability = funded_daily_minutes.merge(funded_staff_floats, how='left')

# Reset facility level column to exclude 'Facility_Level_'
funded_daily_capability['Facility_Level'] = \
    funded_daily_capability['Facility_Level'].str.split(pat='_', expand=True).iloc[:, 2]
# Check that zero-minute rows are also zero staff count rows, making sure that any zero capability is
# due to no staff (but not due to zero patient facing time while having some staff)
assert not funded_daily_capability['Total_Mins_Per_Day'].isnull().values.any()
assert funded_daily_capability[funded_daily_capability['Total_Mins_Per_Day'] == 0].index.equals(
    funded_daily_capability[funded_daily_capability['Staff_Count'] == 0].index)
# Reset index
funded_daily_capability.reset_index(drop=True, inplace=True)

# Add 'District' and 'Facility_Name' columns
for i in funded_daily_capability.index:
    the_level = funded_daily_capability.loc[i, 'Facility_Level']
    if the_level in ['0', '1a', '1b', '2']:
        the_district = funded_daily_capability.loc[i, 'District_Or_Hospital']
        funded_daily_capability.loc[i, 'District'] = the_district
        funded_daily_capability.loc[i, 'Facility_Name'] = 'Facility_Level_' + str(the_level) + '_' + the_district
    else:
        funded_daily_capability.loc[i, 'Facility_Name'] = funded_daily_capability.loc[i, 'District_Or_Hospital']
# Drop column 'District_Or_Hospital'
funded_daily_capability.drop(columns='District_Or_Hospital', inplace=True)

# Add info from mfl: Region and Facility ID
for i in funded_daily_capability.index:
    the_facility_name = funded_daily_capability.loc[i, 'Facility_Name']
    the_ID = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Facility_ID']
    the_region = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Region']

    funded_daily_capability.loc[i, 'Facility_ID'] = the_ID.values
    funded_daily_capability.loc[i, 'Region'] = the_region.values

# Add 'officer_category' info
funded_daily_capability = funded_daily_capability.merge(officer_types_table, on='Officer_Type_Code', how='left')

# Group by officer categories; consider coarse officers
funded_daily_capability_coarse = pd.DataFrame(
    funded_daily_capability.groupby(
        ['Facility_ID', 'Facility_Name', 'Facility_Level', 'District', 'Region', 'Officer_Category'],
        dropna=False)[['Total_Mins_Per_Day', 'Staff_Count']].sum()
).reset_index()

# Since not dropped zero-minute rows in lines 1717-1718,
# check that there are entries for all coarse cadres and all facility id
assert set(mfl.Facility_ID) == set(funded_daily_capability_coarse.Facility_ID.drop_duplicates())
assert len(funded_daily_capability_coarse) == len(mfl) * len(officer_types_table.Officer_Category.drop_duplicates())

# --- Daily capability for current staff; staff counts in floats
# For float staff counts, calculate total minutes per day
curr_staff_floats = curr_staffing_table_to_save.copy()  # staff counts
curr_daily_minutes = curr_staff_floats.copy()  # total minutes per day

for i in curr_daily_minutes.index:
    the_level = curr_daily_minutes.loc[i, 'Facility_Level']
    for officer in officer_types_table['Officer_Type_Code']:
        if the_level in ['Facility_Level_0', 'Facility_Level_1a']:  # Levels 0, 1a; HC minutes
            t = (curr_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'HC_Av_Mins_Per_Day'])
            curr_daily_minutes.loc[i, officer] = t.values[0]
        elif the_level == 'Facility_Level_1b':  # Level 1b; ComHos minutes
            t = (curr_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'ComHos_Av_Mins_Per_Day'])
            curr_daily_minutes.loc[i, officer] = t.values[0]
        else:  # Levels 2 and above; DisHos and CenHos minutes
            t = (curr_staff_floats.loc[i, officer] *
                 HosHC_patient_facing_time.loc[HosHC_patient_facing_time['Officer_Type_Code'] == officer,
                                               'DisHos_Av_Mins_Per_Day'])
            curr_daily_minutes.loc[i, officer] = t.values[0]

# Long format
curr_staff_floats = pd.melt(curr_staff_floats, id_vars=['District_Or_Hospital', 'Facility_Level'],
                            var_name='Officer_Type_Code', value_name='Staff_Count')
curr_daily_minutes = pd.melt(curr_daily_minutes, id_vars=['District_Or_Hospital', 'Facility_Level'],
                             var_name='Officer_Type_Code', value_name='Total_Mins_Per_Day')
# Merge into daily capability table
curr_daily_capability = curr_daily_minutes.merge(curr_staff_floats, how='left')

# Reset facility level column to exclude 'Facility_Level_'
curr_daily_capability['Facility_Level'] = \
    curr_daily_capability['Facility_Level'].str.split(pat='_', expand=True).iloc[:, 2]
# Check that zero-minute rows are also zero staff count rows, making sure that any zero capability is
# due to no staff (but not due to zero patient facing time while having some staff)
assert not curr_daily_capability['Total_Mins_Per_Day'].isnull().values.any()
assert curr_daily_capability[curr_daily_capability['Total_Mins_Per_Day'] == 0].index.equals(
    curr_daily_capability[curr_daily_capability['Staff_Count'] == 0].index)
# Reset index
curr_daily_capability.reset_index(drop=True, inplace=True)

# Add 'District' and 'Facility_Name' columns
for i in curr_daily_capability.index:
    the_level = curr_daily_capability.loc[i, 'Facility_Level']
    if the_level in ['0', '1a', '1b', '2']:
        the_district = curr_daily_capability.loc[i, 'District_Or_Hospital']
        curr_daily_capability.loc[i, 'District'] = the_district
        curr_daily_capability.loc[i, 'Facility_Name'] = 'Facility_Level_' + str(the_level) + '_' + the_district
    else:
        curr_daily_capability.loc[i, 'Facility_Name'] = curr_daily_capability.loc[i, 'District_Or_Hospital']
# Drop column 'District_Or_Hospital'
curr_daily_capability.drop(columns='District_Or_Hospital', inplace=True)

# Add info from mfl: Region and Facility ID
for i in curr_daily_capability.index:
    the_facility_name = curr_daily_capability.loc[i, 'Facility_Name']
    the_ID = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Facility_ID']
    the_region = mfl.loc[mfl['Facility_Name'] == the_facility_name, 'Region']

    curr_daily_capability.loc[i, 'Facility_ID'] = the_ID.values
    curr_daily_capability.loc[i, 'Region'] = the_region.values

# Add 'officer_category' info
curr_daily_capability = curr_daily_capability.merge(officer_types_table, on='Officer_Type_Code', how='left')

# Group by officer categories; consider coarse officers
curr_daily_capability_coarse = pd.DataFrame(
    curr_daily_capability.groupby(
        ['Facility_ID', 'Facility_Name', 'Facility_Level', 'District', 'Region', 'Officer_Category'],
        dropna=False)[['Total_Mins_Per_Day', 'Staff_Count']].sum()
).reset_index()

# Since not dropped zero-minute rows in lines 1797-1798,
# check that there are entries for all coarse cadres and all facility id
assert set(mfl.Facility_ID) == set(curr_daily_capability_coarse.Facility_ID.drop_duplicates())
assert len(curr_daily_capability_coarse) == len(mfl) * len(officer_types_table.Officer_Category.drop_duplicates())

# Save
curr_daily_capability_coarse.to_csv(
    outputlocation / 'human_resources' / 'actual' / 'ResourceFile_Daily_Capabilities.csv', index=False)

# Need to # following lines below when generate funded_plus capability
funded_daily_capability_coarse.to_csv(
    outputlocation / 'human_resources' / 'funded' / 'ResourceFile_Daily_Capabilities.csv', index=False)

# *** Only for funded_plus ********************************************************************************************
# funded_daily_capability_coarse.to_csv(
#     outputlocation / 'human_resources' / 'funded_plus' / 'ResourceFile_Daily_Capabilities.csv', index=False)
# *********************************************************************************************************************

# ---------------------------------------------------------------------------------------------------------------------
# final check that for an appointment required at a particular level (in Appt_Time_Table), \
# then indeed, the staff capabilities are available to satisfy that, for a person in any district \
# (including the regional and national facilities);
# also, find the HCW not there ((i.e., with 0 capability)) when required by an appt type for each facility id
# (i.e., each pair of facility level and district)


# Define the check function
def all_appts_can_run(capability):
    # Creat a table storing whether the appts have consistent requirements/demand and capabilities/supply
    appt_have_or_miss_capability = appt_time_table_coarse.copy()
    # Delete the column of minutes
    assert (appt_have_or_miss_capability['Time_Taken_Mins'] > 0).all()  # ensure that each row/appt is required
    appt_have_or_miss_capability.drop(columns=['Time_Taken_Mins'], inplace=True)
    # Store the info of district (including central hospital, ZMH) that fails
    appt_have_or_miss_capability.loc[:, 'fail_district'] = ''

    for _I in appt_have_or_miss_capability.index:  # Loop through all appts
        # Get the info of required app, level and officer_category
        # the_appt = appt_have_or_miss_capability.loc[I, 'Appt_Type_Code']
        L = appt_have_or_miss_capability.loc[_I, 'Facility_Level']
        the_officer_category = appt_have_or_miss_capability.loc[_I, 'Officer_Category']

        # Check in daily_capabilities that the required officer_category at a level is there or not, for every district
        # Store the info of district (including central hospital, ZMH) that fails
        if L in Facility_Levels[0:4]:  # Levels 0, 1a, 1b, 2
            k = 0  # Record how many districts fail
            for D in pop_districts:
                idx = capability[
                    (capability['District'] == D) &
                    (capability['Facility_Level'] == str(L)) &
                    (capability['Officer_Category'] == the_officer_category) &
                    (capability['Total_Mins_Per_Day'] > 0)].index
                if idx.size == 0:
                    # Store the district that fails to provide required officer_category
                    appt_have_or_miss_capability.loc[_I, 'fail_district'] = \
                        appt_have_or_miss_capability.loc[_I, 'fail_district'] + D + ','
                    k += 1
            if k == 0:
                appt_have_or_miss_capability.loc[_I, 'fail_district'] = 'All districts pass'
        elif L == 3:  # Level 3 central hospital
            m = 0  # Record how many regions fail
            for region in pop_regions:
                idx1 = capability[
                    (capability['Region'] == region) &
                    (capability['Facility_Level'] == str(L)) &
                    (capability['Officer_Category'] == the_officer_category) &
                    (capability['Total_Mins_Per_Day'] > 0)].index
                if idx1.size == 0:
                    # Store the regional hospital that fails
                    appt_have_or_miss_capability.loc[_I, 'fail_district'] = \
                        appt_have_or_miss_capability.loc[_I, 'fail_district'] + 'Referral Hospital_' + region + ','
                    m += 1
            if m == 0:
                appt_have_or_miss_capability.loc[_I, 'fail_district'] = 'All districts pass'
        elif L == 4:  # Zomba Mental Hospital
            n = 0  # Record is ZMH failed
            idx2 = capability[
                (capability['Facility_Level'] == str(L)) &
                (capability['Officer_Category'] == the_officer_category) &
                (capability['Total_Mins_Per_Day'] > 0)].index
            if idx2.size == 0:
                appt_have_or_miss_capability.loc[_I, 'fail_district'] = \
                    appt_have_or_miss_capability.loc[_I, 'fail_district'] + 'Zomba Mental Hospital,'
                n += 1
            if n == 0:
                appt_have_or_miss_capability.loc[_I, 'fail_district'] = 'All districts pass'
        else:
            assert 0 == 1  # There should be no 'else'; otherwise, the generated tables above is incorrect

    return appt_have_or_miss_capability


def find_districts_with_no_required_hcw(capability, scenario):
    # get appts have or miss capability data
    appt_miss_cap = all_appts_can_run(capability)

    # keep rows where required appts have no requested hcw (i.e. with 0-capability)
    appt_miss_cap = appt_miss_cap.loc[~(appt_miss_cap['fail_district'] == 'All districts pass')]

    # split fail districts, merge and melt
    fail_district_split = appt_miss_cap['fail_district'].str.split(',', expand=True)
    fail_district_split.replace({'': None}, inplace=True)
    fail_district_split.dropna(axis='columns', how='all', inplace=True)

    appt_miss_cap.drop(columns='fail_district', inplace=True)
    appt_miss_cap = appt_miss_cap.merge(fail_district_split, how='outer', left_index=True, right_index=True)

    appt_miss_cap = pd.melt(appt_miss_cap, id_vars=['Appt_Type_Code', 'Facility_Level', 'Officer_Category'],
                            value_name=scenario)
    appt_miss_cap.drop(columns='variable', inplace=True)
    appt_miss_cap.dropna(axis='index', how='any', inplace=True)

    return appt_miss_cap


df_actual = find_districts_with_no_required_hcw(curr_daily_capability_coarse, 'actual')
df_funded = find_districts_with_no_required_hcw(funded_daily_capability_coarse, 'funded')
appts_with_no_required_hcw = pd.merge(df_actual, df_funded,
                                      on=['Appt_Type_Code', 'Facility_Level', 'Officer_Category'],
                                      how='outer')
appts_with_no_required_hcw = pd.melt(appts_with_no_required_hcw,
                                     id_vars=['Appt_Type_Code', 'Facility_Level', 'Officer_Category'],
                                     var_name='HR_Scenario',
                                     value_name='Fail_District_Or_CenHos')
appts_with_no_required_hcw.drop_duplicates(inplace=True, ignore_index=True)
appts_with_no_required_hcw.dropna(axis='index', how='any', inplace=True)

# *** Only for funded_plus ********************************************************************************************
# df_funded_plus = find_districts_with_no_required_hcw(funded_daily_capability_coarse, 'funded_plus')
# assert len(df_funded_plus) == 0
# *********************************************************************************************************************

# save results for actual and funded HR scenarios;
# excl. funded_plus scenario, where there is no failing district or CenHos
appts_with_no_required_hcw.to_csv(
    outputlocation / 'human_resources' / 'definitions' / 'ResourceFile_Appts_That_Require_HCW_Who_Are_Not_Present.csv',
    index=False)

# Save results for funded
# Need to # following lines below when generate funded_plus capability
# appt_have_or_miss_capability_funded = all_appts_can_run(funded_daily_capability_coarse)
# appt_have_or_miss_capability_funded.to_csv(
#     outputlocation / 'human_resources' / 'funded' / 'appt_have_or_miss_capability.csv', index=False)

# *** Only for funded_plus ********************************************************************************************
# appt_have_or_miss_capability_funded = all_appts_can_run(funded_daily_capability_coarse)
# appt_have_or_miss_capability_funded.to_csv(
#     outputlocation / 'human_resources' / 'funded_plus' / 'appt_have_or_miss_capability.csv', index=False)
# *********************************************************************************************************************

# Save results for actual
# appt_have_or_miss_capability_actual = all_appts_can_run(curr_daily_capability_coarse)
# appt_have_or_miss_capability_actual.to_csv(
#     outputlocation / 'human_resources' / 'actual' / 'appt_have_or_miss_capability.csv', index=False)


# compare actual and funded capabilities
funded_daily_capability_compare = funded_daily_capability_coarse.drop(columns=['Facility_ID', 'Facility_Name'],
                                                                      inplace=False).copy()
funded_daily_capability_compare.set_index(['Facility_Level', 'District', 'Region', 'Officer_Category'], inplace=True)
funded_daily_capability_compare.rename(columns={'Total_Mins_Per_Day': 'Funded_Total_Mins_Per_Day',
                                                'Staff_Count': 'Funded_Staff_Count'}, inplace=True)
curr_daily_capability_compare = curr_daily_capability_coarse.drop(columns=['Facility_ID', 'Facility_Name'],
                                                                  inplace=False).copy()
curr_daily_capability_compare.set_index(['Facility_Level', 'District', 'Region', 'Officer_Category'], inplace=True)
curr_daily_capability_compare.rename(columns={'Total_Mins_Per_Day': 'Curr_Total_Mins_Per_Day',
                                              'Staff_Count': 'Curr_Staff_Count'}, inplace=True)
diff_capability = funded_daily_capability_compare.join(curr_daily_capability_compare, how='outer')
diff_capability.fillna(0, inplace=True)
diff_capability['Diff_Staff_Count'] = diff_capability['Funded_Staff_Count'] - diff_capability['Curr_Staff_Count']
diff_capability['Diff_Total_Mins_Per_Day'] = (diff_capability['Funded_Total_Mins_Per_Day'] -
                                              diff_capability['Curr_Total_Mins_Per_Day'])
diff_capability.reset_index(drop=False, inplace=True)
