#!/bin/env python3 """ Extract area of cancerous tissue (larger than 200um^2) and extract the percentage area of cancerous tissue per square """ import pandas as pd import numpy as np from scipy import stats ################################################################################ # SW620 ################################################################################ meta = pd.read_csv("../data/20220406_metadata_micrometastasis_quantification.csv") measure = pd.read_csv("../data/measurements_29_april_237pm.tsv", sep="\t") measure["Image_crop"] = measure["Image"].apply(lambda x: x.split(".vsi")[0]) merge = pd.merge(measure, meta, left_on="Image_crop", right_on="Image_name") merge_filt = merge.loc[merge["Area µm^2"] >= 200] total_area = pd.read_csv("../data/measurements_area_squares.tsv", sep="\t") total_area = total_area.loc[total_area["ROI"] == "Rectangle", ["Image", "Area µm^2"]].drop_duplicates() total_area.columns = ["Image", "Total Area"] sel_cancer = merge_filt["Class"] == "cancer_cells" merge_cancer = merge_filt.loc[sel_cancer == True] # Area meta["animal"] = meta["Identifier"].apply(lambda x: x.split("_")[2]) squares_area = merge_cancer.groupby(["Image", "Parent"])["Area µm^2"].sum().reset_index() cond = merge_cancer.groupby(["Image", "Parent"])["Condition"].first().reset_index() animal = merge_cancer.groupby(["Image", "Parent"])["Condition"].first().reset_index() squares_area = pd.merge(squares_area, cond) squares_area = pd.merge(squares_area, total_area) squares_area["Image_crop"] = squares_area["Image"].apply(lambda x: x.split(".vsi")[0]) squares_area = pd.merge(squares_area, meta[["Image_name", "animal"]], left_on="Image_crop", right_on="Image_name") squares_area["percent area"] = squares_area["Area µm^2"] / squares_area["Total Area"] * 100 squares_area = squares_area.reset_index() # Remove misassigned square s = (squares_area["percent area"] > 3) & (squares_area["Condition"] == "GFP") squares_area = squares_area.loc[~s] squares_area.to_csv("../results/sw620_squares_area.csv") pou_area = squares_area.loc[squares_area["Condition"] == "POU5F1B", "percent area"] gfp_area = squares_area.loc[squares_area["Condition"] == "GFP", "percent area"] t, pval = stats.ttest_ind(pou_area, gfp_area) ################################################################################ # LS1034 ################################################################################ meta = pd.read_csv("../data/20220504_metadata_micrometastasis_quantification_LS1034_knockdown_slide_scanner.csv") measure = pd.read_csv("../data/20220504_detection_measurements_micrometastasis_quantification_LS1034_knockdown_slide_scanner.tsv", sep="\t") measure["Image_crop"] = measure["Image"].apply(lambda x: x.split(".vsi")[0]) merge = pd.merge(measure, meta, left_on="Image_crop", right_on="Image_name") merge_filt = merge.loc[merge["Area µm^2"] >= 200] total_area = pd.read_csv("../data/20220504_annotation_measurements_micrometastasis_quantification_LS1034_knockdown_slide_scanner.tsv", sep="\t") total_area = total_area.loc[total_area["ROI"] == "Rectangle", ["Image", "Area µm^2"]].drop_duplicates() total_area.columns = ["Image", "Total Area"] sel_cancer = merge_filt["Class"] == "cancer_cells" merge_cancer = merge_filt.loc[sel_cancer == True] # Area meta["animal"] = meta["Identifier"].apply(lambda x: "-".join(x.split("_")[1:3])) squares_area = merge_cancer.groupby(["Image", "Parent"])["Area µm^2"].sum().reset_index() cond = merge_cancer.groupby(["Image", "Parent"])["Condition"].first().reset_index() animal = merge_cancer.groupby(["Image", "Parent"])["Condition"].first().reset_index() squares_area = pd.merge(squares_area, cond) squares_area = pd.merge(squares_area, total_area) squares_area["Image_crop"] = squares_area["Image"].apply(lambda x: x.split(".vsi")[0]) squares_area = pd.merge(squares_area, meta[["Image_name", "animal"]], left_on="Image_crop", right_on="Image_name") squares_area["percent area"] = squares_area["Area µm^2"] / squares_area["Total Area"] * 100 squares_area = squares_area.reset_index() squares_area.to_csv("../results/ls1034_squares_area.csv") sh_area = squares_area.loc[squares_area["Condition"] == "sh3", "percent area"] nts_area = squares_area.loc[squares_area["Condition"] == "NTS", "percent area"] t, pval = stats.ttest_ind(sh_area, nts_area)