Source code for annotation.wis_annotated_text_to_alignment
import sys
import os
import pandas as pd
[docs]def add_annotation(df_source, df_target, df_alignment):
"""
Adds NER/POS annotations in the alignment files by copying the annotations from the spreadsheets.
Differently from the EVS, ESS and SHARE files, all the WIS files have 1-1 correspondences and come prealigned,
therefore these files do not have to go through the Alignment algorithm.
Args:
param1 df_source (pandas dataframe): the dataframe that holds the preprocessed annotated source questionnaire.
param2 df_target (pandas dataframe): the dataframe that holds the preprocessed annotated target questionnaire.
param3 df_alignment (pandas dataframe): the dataframe that holds the alignment questionnaire, without annotations.
Returns:
df_alignment (pandas dataframe) with added NER and POS annotations that were copied from the df_source and df_target.
"""
df_alignment['source_ner_tagged_text'] = df_source['ner_tagged_text'].values
df_alignment['source_pos_tagged_text'] = df_source['pos_tagged_text'].values
df_alignment['target_ner_tagged_text'] = df_target['ner_tagged_text'].values
df_alignment['target_pos_tagged_text'] = df_target['pos_tagged_text'].values
return df_alignment
def main(folder_path, filename_source, filename_target, filename_alignment):
path = os.chdir(folder_path)
df_source = pd.read_csv(filename_source, dtype=str, sep='\t')
df_target = pd.read_csv(filename_target, dtype=str, sep='\t')
df_alignment = pd.read_csv(filename_alignment, dtype=str, sep='\t')
df = add_annotation(df_source, df_target, df_alignment)
df.to_csv(filename_alignment, encoding='utf-8', sep='\t', index=False)
if __name__ == "__main__":
folder_path = str(sys.argv[1])
filename_source = str(sys.argv[2])
filename_target = str(sys.argv[3])
filename_alignment = str(sys.argv[4])
main(folder_path, filename_source, filename_target, filename_alignment)