Module pii_codex.utils.file_util

File utils

Expand source code
"""
File utils
"""
import json
import os
from pathlib import Path
import pandas as pd

from .logging import logger

dirname = os.path.dirname(__file__)


def get_relative_path(path_to_file: str):
    """
    Returns the file_path relative to the project

    @param path_to_file: str
    @return: Path
    """
    filename = os.path.join(dirname, path_to_file)

    return Path(__file__).parent / filename


def write_json_file(folder_name: str, file_name: str, json_data):
    """
    Writes json file given json data, a folder name, and a file name.

    @param folder_name: str
    @param file_name: str
    @param json_data:
    """
    # Create a new directory because it does not exist
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

        logger.info(f"A new version directory has been created: {folder_name}")

    with open(file_name, "w", encoding="utf-8") as json_file:
        json.dump(
            json.loads(json_data),
            json_file,
            ensure_ascii=False,
            indent=4,
        )


def delete_file(
    file_path: str = "pii_type_mappings",
):
    """
    Deletes a version file if it exists

    @param file_path: str
    """

    # Delete file if it exists
    if os.path.exists(file_path):
        os.remove(file_path)

        logger.info(f"The file {file_path} has been deleted")
    else:
        raise Exception(f"The file {file_path} does not exist")


def delete_folder(
    folder_path: str,
):
    """
    Deletes a folder if it exists and nothing is within it

    @param folder_path: str
    """

    # Delete folder if it exists
    if os.path.exists(folder_path):
        os.rmdir(folder_path)

        logger.info(f"The folder {folder_path} has been deleted")
    else:
        raise Exception(f"The folder {folder_path} does not exist")


# region MAPPING FILE UTILS


def open_pii_type_mapping_csv(
    mapping_file_version: str = "v1", mapping_file_name: str = "pii_type_mappings"
):
    """
    Opens the PII type mappings CSV file and returns a Pandas DataFrame

    @param mapping_file_name: str
    @param mapping_file_version: str
    """
    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{mapping_file_name}.csv"
    )
    with file_path.open() as file:
        return pd.read_csv(file)


def open_pii_type_mapping_json(
    mapping_file_version: str = "v1", mapping_file_name: str = "pii_type_mappings"
):
    """
    Opens the PII type mappings JSON file and returns a Pandas DataFrame

    @param mapping_file_name: str
    @param mapping_file_version: str
    """

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{mapping_file_name}.json"
    )
    with file_path.open() as file:
        json_file_dataframe = pd.read_json(file)
        json_file_dataframe.drop("index", axis=1, inplace=True)

        return json_file_dataframe


def convert_pii_type_mapping_csv_to_json(
    data_frame: pd.DataFrame,
    mapping_file_version: str = "v1",
    json_file_name: str = "pii_type_mappings",
):
    """
    Writes JSON mapping file given a dataframe. Used primarily to update data folder with new versions

    @param data_frame: DataFrame
    @param mapping_file_version: str
    @param json_file_name: str
    """

    folder_path = get_relative_path(f"../data/{mapping_file_version}")

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{json_file_name}.json"
    )

    write_json_file(
        folder_name=folder_path,
        file_name=file_path,
        json_data=data_frame.reset_index().to_json(orient="records"),
    )


def delete_json_mapping_file(
    mapping_file_version: str = "v1",
    json_file_name: str = "pii_type_mappings",
):
    """
    Deletes a version file within a data version folder

    @param mapping_file_version: str
    @param json_file_name: str
    """

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{json_file_name}.json"
    )

    delete_file(file_path)


def delete_json_mapping_folder(
    mapping_file_version: str,
):
    """
    Deletes a version folder within the data folder

    @param mapping_file_version: str
    """

    folder_path = get_relative_path(f"../data/{mapping_file_version}")
    delete_folder(folder_path)


# endregion

Functions

def convert_pii_type_mapping_csv_to_json(data_frame: pandas.core.frame.DataFrame, mapping_file_version: str = 'v1', json_file_name: str = 'pii_type_mappings')

Writes JSON mapping file given a dataframe. Used primarily to update data folder with new versions

@param data_frame: DataFrame @param mapping_file_version: str @param json_file_name: str

Expand source code
def convert_pii_type_mapping_csv_to_json(
    data_frame: pd.DataFrame,
    mapping_file_version: str = "v1",
    json_file_name: str = "pii_type_mappings",
):
    """
    Writes JSON mapping file given a dataframe. Used primarily to update data folder with new versions

    @param data_frame: DataFrame
    @param mapping_file_version: str
    @param json_file_name: str
    """

    folder_path = get_relative_path(f"../data/{mapping_file_version}")

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{json_file_name}.json"
    )

    write_json_file(
        folder_name=folder_path,
        file_name=file_path,
        json_data=data_frame.reset_index().to_json(orient="records"),
    )
def delete_file(file_path: str = 'pii_type_mappings')

Deletes a version file if it exists

@param file_path: str

Expand source code
def delete_file(
    file_path: str = "pii_type_mappings",
):
    """
    Deletes a version file if it exists

    @param file_path: str
    """

    # Delete file if it exists
    if os.path.exists(file_path):
        os.remove(file_path)

        logger.info(f"The file {file_path} has been deleted")
    else:
        raise Exception(f"The file {file_path} does not exist")
def delete_folder(folder_path: str)

Deletes a folder if it exists and nothing is within it

@param folder_path: str

Expand source code
def delete_folder(
    folder_path: str,
):
    """
    Deletes a folder if it exists and nothing is within it

    @param folder_path: str
    """

    # Delete folder if it exists
    if os.path.exists(folder_path):
        os.rmdir(folder_path)

        logger.info(f"The folder {folder_path} has been deleted")
    else:
        raise Exception(f"The folder {folder_path} does not exist")
def delete_json_mapping_file(mapping_file_version: str = 'v1', json_file_name: str = 'pii_type_mappings')

Deletes a version file within a data version folder

@param mapping_file_version: str @param json_file_name: str

Expand source code
def delete_json_mapping_file(
    mapping_file_version: str = "v1",
    json_file_name: str = "pii_type_mappings",
):
    """
    Deletes a version file within a data version folder

    @param mapping_file_version: str
    @param json_file_name: str
    """

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{json_file_name}.json"
    )

    delete_file(file_path)
def delete_json_mapping_folder(mapping_file_version: str)

Deletes a version folder within the data folder

@param mapping_file_version: str

Expand source code
def delete_json_mapping_folder(
    mapping_file_version: str,
):
    """
    Deletes a version folder within the data folder

    @param mapping_file_version: str
    """

    folder_path = get_relative_path(f"../data/{mapping_file_version}")
    delete_folder(folder_path)
def get_relative_path(path_to_file: str)

Returns the file_path relative to the project

@param path_to_file: str @return: Path

Expand source code
def get_relative_path(path_to_file: str):
    """
    Returns the file_path relative to the project

    @param path_to_file: str
    @return: Path
    """
    filename = os.path.join(dirname, path_to_file)

    return Path(__file__).parent / filename
def open_pii_type_mapping_csv(mapping_file_version: str = 'v1', mapping_file_name: str = 'pii_type_mappings')

Opens the PII type mappings CSV file and returns a Pandas DataFrame

@param mapping_file_name: str @param mapping_file_version: str

Expand source code
def open_pii_type_mapping_csv(
    mapping_file_version: str = "v1", mapping_file_name: str = "pii_type_mappings"
):
    """
    Opens the PII type mappings CSV file and returns a Pandas DataFrame

    @param mapping_file_name: str
    @param mapping_file_version: str
    """
    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{mapping_file_name}.csv"
    )
    with file_path.open() as file:
        return pd.read_csv(file)
def open_pii_type_mapping_json(mapping_file_version: str = 'v1', mapping_file_name: str = 'pii_type_mappings')

Opens the PII type mappings JSON file and returns a Pandas DataFrame

@param mapping_file_name: str @param mapping_file_version: str

Expand source code
def open_pii_type_mapping_json(
    mapping_file_version: str = "v1", mapping_file_name: str = "pii_type_mappings"
):
    """
    Opens the PII type mappings JSON file and returns a Pandas DataFrame

    @param mapping_file_name: str
    @param mapping_file_version: str
    """

    file_path = get_relative_path(
        f"../data/{mapping_file_version}/{mapping_file_name}.json"
    )
    with file_path.open() as file:
        json_file_dataframe = pd.read_json(file)
        json_file_dataframe.drop("index", axis=1, inplace=True)

        return json_file_dataframe
def write_json_file(folder_name: str, file_name: str, json_data)

Writes json file given json data, a folder name, and a file name.

@param folder_name: str @param file_name: str @param json_data:

Expand source code
def write_json_file(folder_name: str, file_name: str, json_data):
    """
    Writes json file given json data, a folder name, and a file name.

    @param folder_name: str
    @param file_name: str
    @param json_data:
    """
    # Create a new directory because it does not exist
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

        logger.info(f"A new version directory has been created: {folder_name}")

    with open(file_name, "w", encoding="utf-8") as json_file:
        json.dump(
            json.loads(json_data),
            json_file,
            ensure_ascii=False,
            indent=4,
        )