Source code for retrieve_from_tables
"""
Python3 script for retrieving data from MCSQ database
Before running the script, install requirements: pandas, numpy, SQLAlchemy, psycopg2
Author: Danielly Sorato
Author contact: danielly.sorato@gmail.com
"""
from DB.base import *
from DB.survey import *
from DB.survey_item import *
from DB.module import *
from DB.introduction import *
from DB.instruction import *
from DB.response import *
from DB.request import *
from sqlalchemy import MetaData
import pandas as pd
from sqlalchemy.sql import select
[docs]def get_tagged_text_from_survey_item_table():
"""
Gets the survey_itemid and the POS tagged text from the survey_item table and creates a dictionary.
Returns:
A dictionary with survey_itemids as keys and POS tagged text as values.
"""
session = session_factory()
tagged_text_dict = dict()
result = session.execute("select survey_itemid, pos_tagged_text from survey_item;")
for i in result:
if i[0] is not None:
survey_itemid = i[0]
tagged_text_dict[survey_itemid] = i[1]
session.close()
return tagged_text_dict
[docs]def create_tagged_text_dict(id_list):
"""
Gets the survey_itemid and the POS tagged text from the survey_item table and creates a dictionary.
Args:
param1 id_list (list of strings): a language specific list of the target segment IDs in the alignment table.
Returns:
A dictionary with target survey_itemids as keys and POS tagged text as values.
"""
session = session_factory()
tagged_text_dict = dict()
result = session.execute("select survey_itemid, pos_tagged_text from survey_item;")
for i in result:
if i[0] is not None and i[0] in id_list:
survey_itemid = i[0]
tagged_text_dict[survey_itemid] = i[1]
session.close()
return tagged_text_dict
[docs]def get_ids_from_alignment_table(survey_itemid):
"""
Gets all IDs (either source or target) from the alignment table.
Args:
param1 survey_itemid (string): name of the column indicating if the desired IDs to be retrived are from source or from target.
Returns:
A list of survey_itemids.
"""
session = session_factory()
result = session.execute("select "+survey_itemid+" from alignment")
session.close()
survey_itemid_list = []
for i in result:
if i[0] is not None:
if i[0] not in survey_itemid_list:
survey_itemid_list.append(i[0])
return survey_itemid_list
[docs]def get_ids_from_alignment_table_per_language(language):
"""
Gets all target IDs from the alignment table based on the language.
Args:
param1 language (string): target language.
Returns:
A list of all target_survey_itemids in the alignment table.
"""
session = session_factory()
result = session.execute("select target_survey_itemid from alignment where target_survey_itemid ilike '%"+language+"%' and target_pos_tagged_text is null")
session.close()
survey_itemid_list = []
for i in result:
if i[0] is not None:
if i[0] not in survey_itemid_list:
survey_itemid_list.append(i[0])
return survey_itemid_list
[docs]def build_id_dicts_per_language(language):
"""
Gets all text segments and their IDs and builds a dictionary by item type.
Args:
param1 language (string): target language.
Returns:
Four different dictionaries (one for each item type). The IDs are the keys and the text segments are the values.
"""
session = session_factory()
result = session.execute("select requestid, responseid, instructionid, introductionid, text from survey_item where country_language ilike '"+language+"%'")
session.close()
request = dict()
response = dict()
instruction = dict()
introduction = dict()
for i in result:
requestid = i[0]
responseid = i[1]
instructionid = i[2]
introductionid = i[3]
text = i[4]
if requestid is not None and isinstance(requestid, int):
request[requestid] = text
elif responseid is not None and isinstance(responseid, int):
response[responseid] = text
elif instructionid is not None and isinstance(instructionid, int):
instruction[instructionid] = text
elif introductionid is not None and isinstance(introductionid, int):
introduction[introductionid] = text
return request, response, instruction, introduction
[docs]def get_introduction_id(text):
"""
Gets an introduction segment ID based on its text.
Args:
param1 text (string): the introduction segment text.
Returns:
introduction segment ID (int).
"""
session = session_factory()
if "'" in text:
text = text.replace("'", "''")
result = session.execute("select introductionid from introduction where text='"+text+"'")
session.close()
for i in result:
return i[0]
[docs]def get_instruction_id(text):
"""
Gets an instruction segment ID based on its text.
Args:
param1 text (string): the instruction segment text.
Returns:
instruction segment ID (int).
"""
session = session_factory()
if "'" in text:
text = text.replace("'", "''")
result = session.execute("select instructionid from instruction where text='"+text+"'")
session.close()
for i in result:
return i[0]
[docs]def get_request_id(text):
"""
Gets an request segment ID based on its text.
Args:
param1 text (string): the request segment text.
Returns:
request segment ID (int).
"""
session = session_factory()
if "'" in text:
text = text.replace("'", "''")
result = session.execute("select requestid from request where text='"+text+"'")
session.close()
for i in result:
return i[0]
[docs]def get_response_id(text, item_value):
"""
Gets an response segment ID based on its text.
Args:
param1 text (string): the response segment text.
Returns:
response segment ID (int).
"""
session = session_factory()
if "'" in text:
text = text.replace("'", "''")
if isinstance(item_value, str):
result = session.execute("select responseid from response where text='"+text+"' and item_value='"+item_value+"'")
else:
result = session.execute("select responseid from response where text='"+text+"' and item_value is null")
session.close()
for i in result:
return i[0]
[docs]def get_module_id(module_name):
"""
Gets an module ID based on its name.
Args:
param1 module_name (string): the name of the module.
Returns:
response module ID (int).
"""
session = session_factory()
if "'" in module_name:
module_name = module_name.replace("'", "''")
result = session.execute("select moduleid from module where module_name='"+module_name+"'")
session.close()
for i in result:
return i[0]
def get_survey_id(surveyid):
session = session_factory()
if "'" in surveyid:
surveyid = surveyid.replace("'", "''")
result = session.execute("select surveyid from survey where surveyid='"+surveyid+"'")
session.close()
for i in result:
return i[0]