"""A subpackage to define and organize PUDL database tables by data group."""

import importlib
import pkgutil

from pudl.metadata.helpers import build_foreign_keys

RESOURCE_METADATA = {}
for module_info in pkgutil.iter_modules(__path__):
    module = importlib.import_module(f"{__name__}.{module_info.name}")
    resources = module.RESOURCE_METADATA
    RESOURCE_METADATA.update(resources)

FOREIGN_KEYS: dict[str, list[dict]] = build_foreign_keys(RESOURCE_METADATA)
"""Generated foreign key constraints by resource name.

See :func:`pudl.metadata.helpers.build_foreign_keys`.
"""

ENTITIES: dict[str, dict[str, list[str] | dict[str, str]]] = {
    "plants": {
        "id_cols": ["plant_id_eia"],
        "static_cols": [
            "city",
            "county",
            "latitude",
            "longitude",
            "plant_name_eia",
            "state",
            "street_address",
            "zip_code",
        ],
        "annual_cols": [
            "ash_impoundment",
            "ash_impoundment_lined",
            "ash_impoundment_status",
            "balancing_authority_code_eia",
            "balancing_authority_name_eia",
            "datum",
            "energy_storage",
            "ferc_cogen_docket_no",
            "ferc_cogen_status",
            "ferc_exempt_wholesale_generator_docket_no",
            "ferc_exempt_wholesale_generator",
            "ferc_small_power_producer_docket_no",
            "ferc_small_power_producer",
            "ferc_qualifying_facility_docket_no",
            "grid_voltage_1_kv",
            "grid_voltage_2_kv",
            "grid_voltage_3_kv",
            "iso_rto_code",
            "liquefied_natural_gas_storage",
            "natural_gas_local_distribution_company",
            "natural_gas_storage",
            "natural_gas_pipeline_name_1",
            "natural_gas_pipeline_name_2",
            "natural_gas_pipeline_name_3",
            "nerc_region",
            "has_net_metering",
            "pipeline_notes",
            "primary_purpose_id_naics",
            "regulatory_status_code",
            "reporting_frequency_code",
            "sector_id_eia",
            "sector_name_eia",
            "service_area",
            "transmission_distribution_owner_id",
            "transmission_distribution_owner_name",
            "transmission_distribution_owner_state",
            "utility_id_eia",
            "water_source",
            "data_maturity",
        ],
        "mapped_schemas": [
            {"operator_utility_id_eia": "utility_id_eia"},
            {"plant_id_eia_direct_support_1": "plant_id_eia"},
            {"plant_id_eia_direct_support_2": "plant_id_eia"},
            {"plant_id_eia_direct_support_3": "plant_id_eia"},
        ],
    },
    "generators": {
        "id_cols": ["plant_id_eia", "generator_id"],
        "static_cols": [
            "duct_burners",
            "generator_operating_date",
            "topping_bottoming_code",
            "solid_fuel_gasification",
            "pulverized_coal_tech",
            "fluidized_bed_tech",
            "subcritical_tech",
            "supercritical_tech",
            "ultrasupercritical_tech",
            "stoker_tech",
            "other_combustion_tech",
            "bypass_heat_recovery",
            "rto_iso_lmp_node_id",
            "rto_iso_location_wholesale_reporting_id",
            "associated_combined_heat_power",
            "original_planned_generator_operating_date",
            "can_switch_when_operating",
            "previously_canceled",
        ],
        "annual_cols": [
            "capacity_mw",
            "fuel_type_code_pudl",
            "can_burn_multiple_fuels",
            "ownership_code",
            "owned_by_non_utility",
            "deliver_power_transgrid",
            "summer_capacity_mw",
            "winter_capacity_mw",
            "summer_capacity_estimate",
            "winter_capacity_estimate",
            "minimum_load_mw",
            "distributed_generation",
            "technology_description",
            "reactive_power_output_mvar",
            "prime_mover_code",
            "energy_source_code_1",
            "energy_source_code_2",
            "energy_source_code_3",
            "energy_source_code_4",
            "energy_source_code_5",
            "energy_source_code_6",
            "energy_source_1_transport_1",
            "energy_source_1_transport_2",
            "energy_source_1_transport_3",
            "energy_source_2_transport_1",
            "energy_source_2_transport_2",
            "energy_source_2_transport_3",
            "startup_source_code_1",
            "startup_source_code_2",
            "startup_source_code_3",
            "startup_source_code_4",
            "time_cold_shutdown_full_load_code",
            "synchronized_transmission_grid",
            "turbines_num",
            "operational_status_code",
            "operational_status",
            "planned_modifications",
            "planned_net_summer_capacity_uprate_mw",
            "planned_net_winter_capacity_uprate_mw",
            "planned_new_capacity_mw",
            "planned_uprate_date",
            "planned_net_summer_capacity_derate_mw",
            "planned_net_winter_capacity_derate_mw",
            "planned_derate_date",
            "planned_new_prime_mover_code",
            "planned_energy_source_code_1",
            "planned_repower_date",
            "other_planned_modifications",
            "other_modifications_date",
            "planned_generator_retirement_date",
            "carbon_capture",
            "can_cofire_fuels",
            "can_switch_oil_gas",
            "turbines_inverters_hydrokinetics",
            "nameplate_power_factor",
            "uprate_derate_during_year",
            "uprate_derate_completed_date",
            "current_planned_generator_operating_date",
            "summer_estimated_capability_mw",
            "winter_estimated_capability_mw",
            "generator_retirement_date",
            "utility_id_eia",
            "ferc_qualifying_facility",
            "data_maturity",
            "energy_storage_capacity_mwh",
            "net_capacity_mwdc",
        ],
        "mapped_schemas": [
            {"operator_utility_id_eia": "utility_id_eia"},
            {
                "plant_id_eia_direct_support_1": "plant_id_eia",
                "generator_id_direct_support_1": "generator_id",
            },
            {
                "plant_id_eia_direct_support_2": "plant_id_eia",
                "generator_id_direct_support_2": "generator_id",
            },
            {
                "plant_id_eia_direct_support_3": "plant_id_eia",
                "generator_id_direct_support_3": "generator_id",
            },
        ],
    },
    "boilers": {
        "id_cols": ["plant_id_eia", "boiler_id"],
        "static_cols": [
            "boiler_manufacturer",
            "boiler_manufacturer_code",
        ],
        "annual_cols": [
            "boiler_operating_date",
            "boiler_status",
            "boiler_retirement_date",
            "boiler_type",
            "firing_type_1",
            "firing_type_2",
            "firing_type_3",
            "firing_rate_using_coal_tons_per_hour",
            "firing_rate_using_oil_bbls_per_hour",
            "firing_rate_using_gas_mcf_per_hour",
            "firing_rate_using_other_fuels",
            "boiler_fuel_code_1",
            "boiler_fuel_code_2",
            "boiler_fuel_code_3",
            "boiler_fuel_code_4",
            "waste_heat_input_mmbtu_per_hour",
            "wet_dry_bottom",
            "fly_ash_reinjection",
            "hrsg",
            "max_steam_flow_lbs_per_hour",
            "turndown_ratio",
            "efficiency_100pct_load",
            "efficiency_50pct_load",
            "air_flow_100pct_load_cubic_feet_per_minute",
            "new_source_review",
            "new_source_review_date",
            "new_source_review_permit",
            "regulation_particulate",
            "regulation_so2",
            "regulation_nox",
            "standard_particulate_rate",
            "standard_so2_rate",
            "standard_nox_rate",
            "unit_particulate",
            "unit_so2",
            "unit_nox",
            "compliance_year_particulate",
            "compliance_year_nox",
            "compliance_year_so2",
            "particulate_control_out_of_compliance_strategy_1",
            "particulate_control_out_of_compliance_strategy_2",
            "particulate_control_out_of_compliance_strategy_3",
            "so2_control_out_of_compliance_strategy_1",
            "so2_control_out_of_compliance_strategy_2",
            "so2_control_out_of_compliance_strategy_3",
            "so2_control_existing_caaa_compliance_strategy_1",
            "so2_control_existing_caaa_compliance_strategy_2",
            "so2_control_existing_caaa_compliance_strategy_3",
            "so2_control_planned_caaa_compliance_strategy_1",
            "so2_control_planned_caaa_compliance_strategy_2",
            "so2_control_planned_caaa_compliance_strategy_3",
            "nox_control_out_of_compliance_strategy_1",
            "nox_control_out_of_compliance_strategy_2",
            "nox_control_out_of_compliance_strategy_3",
            "nox_control_existing_caaa_compliance_strategy_1",
            "nox_control_existing_caaa_compliance_strategy_2",
            "nox_control_existing_caaa_compliance_strategy_3",
            "nox_control_planned_caaa_compliance_strategy_1",
            "nox_control_planned_caaa_compliance_strategy_2",
            "nox_control_planned_caaa_compliance_strategy_3",
            "compliance_year_mercury",
            "mercury_control_existing_strategy_1",
            "mercury_control_existing_strategy_2",
            "mercury_control_existing_strategy_3",
            "mercury_control_existing_strategy_4",
            "mercury_control_existing_strategy_5",
            "mercury_control_existing_strategy_6",
            "mercury_control_proposed_strategy_1",
            "mercury_control_proposed_strategy_2",
            "mercury_control_proposed_strategy_3",
            "nox_control_existing_strategy_1",
            "nox_control_existing_strategy_2",
            "nox_control_existing_strategy_3",
            "nox_control_manufacturer",
            "nox_control_manufacturer_code",
            "nox_control_proposed_strategy_1",
            "nox_control_proposed_strategy_2",
            "nox_control_proposed_strategy_3",
            "nox_control_status_code",
            "regulation_mercury",
            "so2_control_existing_strategy_1",
            "so2_control_existing_strategy_2",
            "so2_control_existing_strategy_3",
            "so2_control_proposed_strategy_1",
            "so2_control_proposed_strategy_2",
            "so2_control_proposed_strategy_3",
            "standard_so2_percent_scrubbed",
            "data_maturity",
        ],
    },
    "utilities": {
        "id_cols": ["utility_id_eia"],
        "static_cols": ["utility_name_eia"],
        "annual_cols": [
            "street_address",
            "city",
            "state",
            "zip_code",
            "entity_type",
            "plants_reported_owner",
            "plants_reported_operator",
            "plants_reported_asset_manager",
            "plants_reported_other_relationship",
            "attention_line",
            "address_2",
            "zip_code_4",
            "contact_firstname",
            "contact_lastname",
            "contact_title",
            "contact_firstname_2",
            "contact_lastname_2",
            "contact_title_2",
            "phone_extension",
            "phone_extension_2",
            "phone_number",
            "phone_number_2",
            "data_maturity",
        ],
        "mapped_schemas": [
            {
                "owner_utility_id_eia": "utility_id_eia",
                "owner_utility_name_eia": "utility_name_eia",
                "owner_state": "state",
                "owner_country": "country",
                "owner_street_address": "street_address",
                "owner_zip_code": "zip_code",
                "owner_city": "city",
            },
            {
                "operator_utility_id_eia": "utility_id_eia",
                "operator_utility_name_eia": "utility_name_eia",
                "operator_state": "state",
            },
        ],
    },
}
"""Columns kept for either entity or annual EIA tables in the harvesting process.

For each entity type (key), the ID columns, static columns, annual columns, and mapped
columns.

The order of the entities matters. Plants must be harvested before utilities, since
plant location must be removed before the utility locations are harvested.

``mapped_schemas`` allows for harvesting an entity ID / value relationship
from multiple columns in the same input dataframe. Each item in ``mapped_schemas`` is a
dictionary mapping column names in one of the cleaned tables to the standard column names
for that entity.  This is useful if a table has entities that should be harvested,
but whose column names don't have the same name as those in the ``id_cols``, ``static_cols``,
or ``annual_cols`` list. For example, in the ownership table the owner and operator utility
columns map to different column names in the other tables, i.e. "owner_utility_id_eia": "utility_id_eia".
In the harvesting process, a copy of the clean dataframe is made, and these columns are
renamed so the relationship can be harvested and added to the normalized entity tables.
"""
