Module pii_codex.services.assessment_service

Expand source code
from typing import List, Tuple
from collections import Counter
from itertools import chain

from ..config import PII_MAPPER
from ..models.analysis import RiskAssessment, AnalysisResult
from ..utils.statistics_util import get_mean, get_sum


class PIIAssessmentService:
    """
    Class for mapping PII types to categories and extracting them.
    """

    def assess_pii_type(self, detected_pii_type: str) -> RiskAssessment:
        """
        Assesses a singular detected PII type given a type name string from common.PIIType enum
        @param detected_pii_type: type name strings from common.PIIType enum
        @return: RiskAssessment
        """
        return PII_MAPPER.map_pii_type(detected_pii_type)

    def assess_pii_type_list(
        self, detected_pii_types: List[str]
    ) -> List[RiskAssessment]:
        """
        Assesses a list of detected PII types given an array of type name strings from common.PIIType enum
        @param detected_pii_types: array type name strings from common.PIIType
        enum (e.g. ["PHONE_NUMBER", "US_SOCIAL_SECURITY_NUMBER"])
        @return: List[RiskAssessment]
        """
        ranked_pii: List[RiskAssessment] = []

        for pii_type in detected_pii_types:
            ranked_pii.append(PII_MAPPER.map_pii_type(pii_type))

        return ranked_pii

    @staticmethod
    def calculate_risk_assessment_score_average(
        risk_assessments: List[RiskAssessment],
    ) -> float:
        """
        Returns the average risk score per token

        @param risk_assessments:
        @return: float
        """
        return get_mean([assessment.risk_level for assessment in risk_assessments])

    @staticmethod
    def get_detected_pii_count(analyses: List[AnalysisResult]) -> int:
        """
        Returns the count of detected PII for analyses performed on a collection

        @param analyses: List[ScoredAnalysisResult]
        @return: float
        """
        return get_sum(
            [
                len(analysis.analysis)
                for analysis in analyses
                if analysis.get_detected_types()
            ]
        )

    @staticmethod
    def get_detected_pii_types(
        analyses: List[AnalysisResult],
    ) -> Tuple[set[str], Counter]:
        """
        Returns the list of detected PII types and their frequencies for analyses performed on a collection

        @param analyses: List[ScoredAnalysisResult]
        @return: Tuple[List[str], Counter]
        """
        flattened_list_of_detections = list(
            chain.from_iterable(
                [analysis.get_detected_types() for analysis in analyses]
            )
        )

        return set(flattened_list_of_detections), Counter(flattened_list_of_detections)

Classes

class PIIAssessmentService

Class for mapping PII types to categories and extracting them.

Expand source code
class PIIAssessmentService:
    """
    Class for mapping PII types to categories and extracting them.
    """

    def assess_pii_type(self, detected_pii_type: str) -> RiskAssessment:
        """
        Assesses a singular detected PII type given a type name string from common.PIIType enum
        @param detected_pii_type: type name strings from common.PIIType enum
        @return: RiskAssessment
        """
        return PII_MAPPER.map_pii_type(detected_pii_type)

    def assess_pii_type_list(
        self, detected_pii_types: List[str]
    ) -> List[RiskAssessment]:
        """
        Assesses a list of detected PII types given an array of type name strings from common.PIIType enum
        @param detected_pii_types: array type name strings from common.PIIType
        enum (e.g. ["PHONE_NUMBER", "US_SOCIAL_SECURITY_NUMBER"])
        @return: List[RiskAssessment]
        """
        ranked_pii: List[RiskAssessment] = []

        for pii_type in detected_pii_types:
            ranked_pii.append(PII_MAPPER.map_pii_type(pii_type))

        return ranked_pii

    @staticmethod
    def calculate_risk_assessment_score_average(
        risk_assessments: List[RiskAssessment],
    ) -> float:
        """
        Returns the average risk score per token

        @param risk_assessments:
        @return: float
        """
        return get_mean([assessment.risk_level for assessment in risk_assessments])

    @staticmethod
    def get_detected_pii_count(analyses: List[AnalysisResult]) -> int:
        """
        Returns the count of detected PII for analyses performed on a collection

        @param analyses: List[ScoredAnalysisResult]
        @return: float
        """
        return get_sum(
            [
                len(analysis.analysis)
                for analysis in analyses
                if analysis.get_detected_types()
            ]
        )

    @staticmethod
    def get_detected_pii_types(
        analyses: List[AnalysisResult],
    ) -> Tuple[set[str], Counter]:
        """
        Returns the list of detected PII types and their frequencies for analyses performed on a collection

        @param analyses: List[ScoredAnalysisResult]
        @return: Tuple[List[str], Counter]
        """
        flattened_list_of_detections = list(
            chain.from_iterable(
                [analysis.get_detected_types() for analysis in analyses]
            )
        )

        return set(flattened_list_of_detections), Counter(flattened_list_of_detections)

Static methods

def calculate_risk_assessment_score_average(risk_assessments: List[RiskAssessment]) ‑> float

Returns the average risk score per token

@param risk_assessments: @return: float

Expand source code
@staticmethod
def calculate_risk_assessment_score_average(
    risk_assessments: List[RiskAssessment],
) -> float:
    """
    Returns the average risk score per token

    @param risk_assessments:
    @return: float
    """
    return get_mean([assessment.risk_level for assessment in risk_assessments])
def get_detected_pii_count(analyses: List[AnalysisResult]) ‑> int

Returns the count of detected PII for analyses performed on a collection

@param analyses: List[ScoredAnalysisResult] @return: float

Expand source code
@staticmethod
def get_detected_pii_count(analyses: List[AnalysisResult]) -> int:
    """
    Returns the count of detected PII for analyses performed on a collection

    @param analyses: List[ScoredAnalysisResult]
    @return: float
    """
    return get_sum(
        [
            len(analysis.analysis)
            for analysis in analyses
            if analysis.get_detected_types()
        ]
    )
def get_detected_pii_types(analyses: List[AnalysisResult]) ‑> Tuple[set[str], collections.Counter]

Returns the list of detected PII types and their frequencies for analyses performed on a collection

@param analyses: List[ScoredAnalysisResult] @return: Tuple[List[str], Counter]

Expand source code
@staticmethod
def get_detected_pii_types(
    analyses: List[AnalysisResult],
) -> Tuple[set[str], Counter]:
    """
    Returns the list of detected PII types and their frequencies for analyses performed on a collection

    @param analyses: List[ScoredAnalysisResult]
    @return: Tuple[List[str], Counter]
    """
    flattened_list_of_detections = list(
        chain.from_iterable(
            [analysis.get_detected_types() for analysis in analyses]
        )
    )

    return set(flattened_list_of_detections), Counter(flattened_list_of_detections)

Methods

def assess_pii_type(self, detected_pii_type: str) ‑> RiskAssessment

Assesses a singular detected PII type given a type name string from common.PIIType enum @param detected_pii_type: type name strings from common.PIIType enum @return: RiskAssessment

Expand source code
def assess_pii_type(self, detected_pii_type: str) -> RiskAssessment:
    """
    Assesses a singular detected PII type given a type name string from common.PIIType enum
    @param detected_pii_type: type name strings from common.PIIType enum
    @return: RiskAssessment
    """
    return PII_MAPPER.map_pii_type(detected_pii_type)
def assess_pii_type_list(self, detected_pii_types: List[str]) ‑> List[RiskAssessment]

Assesses a list of detected PII types given an array of type name strings from common.PIIType enum @param detected_pii_types: array type name strings from common.PIIType enum (e.g. ["PHONE_NUMBER", "US_SOCIAL_SECURITY_NUMBER"]) @return: List[RiskAssessment]

Expand source code
def assess_pii_type_list(
    self, detected_pii_types: List[str]
) -> List[RiskAssessment]:
    """
    Assesses a list of detected PII types given an array of type name strings from common.PIIType enum
    @param detected_pii_types: array type name strings from common.PIIType
    enum (e.g. ["PHONE_NUMBER", "US_SOCIAL_SECURITY_NUMBER"])
    @return: List[RiskAssessment]
    """
    ranked_pii: List[RiskAssessment] = []

    for pii_type in detected_pii_types:
        ranked_pii.append(PII_MAPPER.map_pii_type(pii_type))

    return ranked_pii