import re
import string
from ess_special_answer_categories import *
from essmodules import *
from evsmodules import *
[docs]def remove_spaces_from_item_name(item_name):
"""
Removes spaces in item names such as A 1, because the MCSQ standard are item names without spaces (A1).
Args:
param1 item_name (string): item_name retrieved from the input file.
Returns:
item_name (string) withour spaces.
"""
return item_name.replace(" ", "")
[docs]def get_country_language_and_study_info(filename):
"""
Retrieves the country/language and study metadata based on the input filename,
or survey_item_ID prefix.
The filenames respect a nomenclature rule, as follows:
SSS_RRR_YYYY_CC_LLL
S = study name
R = round or wave
Y = study year
C = Country (ISO code with two digits, except for SOURCE)
L = Language
Args:
param1 filename (string): name of the input file.
Returns:
country/language (string) and study metadata (string).
"""
if 'txt' in filename:
filename_without_extension = re.sub('\.txt', '', filename)
filename_split = filename_without_extension.split('_')
else:
filename_split = filename.split('_')
study = filename_split[0]+'_'+filename_split[1]+'_'+filename_split[2]
country_language = filename_split[3]+'_'+filename_split[4]
return study, country_language
[docs]def standardize_supplementary_item_name(item_name):
"""
Standardizes the item name metadata of supplementary modules G, H and I
Args:
param1 item_name: item_name metadata, extracted from input file.
Returns:
Standardized item_name, when applicable.
"""
if 'GF' in item_name:
return item_name.replace('GF', 'G')
elif 'IF' in item_name:
return item_name.replace('IF', 'I')
elif 'HF' in item_name:
return item_name.replace('HF', 'H')
elif 'GS' in item_name:
return item_name.replace('GS', 'G')
elif 'IS' in item_name:
return item_name.replace('IS', 'I')
elif 'HS' in item_name:
return item_name.replace('HS', 'H')
else:
return item_name
[docs]def retrieve_supplementary_module(essmodules,item_name):
"""
Matches the item_name against the dictionary stored in the ESSModulesRRR objects.
Rotating/supplementary modules are defined by round because they may change from
round to round.
Args:
param1 essmodules (Python object): ESSModulesRRR object, instantiated according to the round.
param2 item_name (string): name of survey item, retrieved in previous steps.
Returns:
matching value for item name (string).
"""
for k,v in list(essmodules.modules.items()):
if re.compile(k).match(item_name):
return v
[docs]def retrieve_item_module(item_name, study):
"""
Retrieves the module of the survey_item, based on information from the ESSModulesRRR objects.
This information comes from the source questionnaires.
Args:
param1 item_name (string): name of survey item, retrieved in previous steps.
param2 study (string): study metadata, embedded in the file name.
Returns:
module of survey_item (string).
"""
if 'ESS' in study:
if re.compile(r'A').match(item_name):
return 'A - Media; social trust'
elif re.compile(r'B').match(item_name):
return 'B - Politics, including: political interest, efficacy, trust, electoral and other forms of participation, party allegiance, socio-political evaluations/orientations, multi-level governance'
elif re.compile(r'C').match(item_name):
return 'C - Subjective well-being and social exclusion; religion; perceived discrimination; national and ethnic identity'
elif re.compile(r'F').match(item_name):
return 'F - Socio-demographic profile, including: Household composition, sex, age, type of area, Education & occupation details of respondent, partner, parents, union membership, household income, marital status'
else:
if 'R01' in study:
essmodules = ESSSModulesR01()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R02' in study:
essmodules = ESSSModulesR02()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R03' in study:
essmodules = ESSSModulesR03()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R04' in study:
essmodules = ESSSModulesR04()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R05' in study:
essmodules = ESSSModulesR05()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R06' in study:
essmodules = ESSSModulesR06()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R07' in study:
essmodules = ESSSModulesR07()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R08' in study:
essmodules = ESSSModulesR08()
v = retrieve_supplementary_module(essmodules,item_name)
return v
elif 'R09' in study:
essmodules = ESSSModulesR09()
v = retrieve_supplementary_module(essmodules,item_name)
return v
else:
evsmodules = EVSModules1990()
if item_name.lower() in evsmodules.perceptions_of_life:
return 'Perceptions of Life'
elif item_name.lower() in evsmodules.politics_and_society:
return 'Politics and Society'
elif item_name.lower() in evsmodules.environment:
return 'Environment'
elif item_name.lower() in evsmodules.family:
return 'Family'
elif item_name.lower() in evsmodules.work:
return 'Work'
elif item_name.lower() in evsmodules.religion_and_morale:
return 'Religion and Morale'
elif item_name.lower() in evsmodules.national_identity:
return 'National Identity'
elif item_name.lower() in evsmodules.socio_demographics:
return 'Socio Demographics and Interview Characteristics'
else:
return None
[docs]def clean_text(text):
"""
Cleans Request, Introduction and Instruction text segments by removing
undesired characters and standardizing some character representations.
A string input is expected, if the input is not a string instance,
the method returns '', so the entry is ignored in the data extraction loop.
Args:
param1 text (string expected): text to be cleaned.
Returns:
cleaned text (string).
"""
if isinstance(text, str):
text = re.sub(r'\s([?.!"](?:\s|$))', r'\1', text)
text = re.sub('й','й', text)
text = re.sub('Й','Й', text)
text = re.sub('å','å', text)
text = re.sub('ö','ö', text)
text = re.sub('Ö','Ö', text)
text = re.sub('ä','ä', text)
text = re.sub('ů','ů', text)
text = re.sub('ý','ý', text)
text = re.sub('č','č', text)
text = re.sub('Č','Č', text)
text = re.sub('ě','ě', text)
text = re.sub('Ě','Ě', text)
text = re.sub('Ď','Ď', text)
text = re.sub('ţ','ţ', text)
text = re.sub('Ţ','Ţ', text)
text = re.sub('Š','Š', text)
text = re.sub('š', 'š', text)
text = re.sub('ř', 'ř', text)
text = re.sub('Ř', 'Ř', text)
text = re.sub('Ä','Ä', text)
text = re.sub('Á','Á', text)
text = re.sub('Å','Å', text)
text = re.sub('ü','ü', text)
text = re.sub('ê','ê', text)
text = re.sub('Ê','Ê', text)
text = re.sub('è','è', text)
text = re.sub('í','í', text)
text = re.sub('î','î', text)
text = re.sub('é','é', text)
text = re.sub('ó','ó', text)
text = re.sub('Í','Í', text)
text = re.sub('ú','ú', text)
text = re.sub('à','à', text)
text = re.sub('Ó','Ó', text)
text = re.sub('õ','õ', text)
text = re.sub('ã','ã', text)
text = re.sub('Ã','Ã', text)
text = re.sub('ô','ô', text)
text = re.sub('û','û', text)
text = re.sub('ï','ï', text)
text = re.sub('á','á', text)
text = re.sub('–', '-', text)
text = re.sub('’',"'", text)
text = re.sub('´',"'", text)
text = re.sub("…", "...", text)
text = text.replace("... ...", "...")
text = re.sub(" :", ":", text)
text = re.sub("’", "'", text)
text = re.sub("[.]{4,}", "", text)
text = re.sub("[_]{2,}", "", text)
text = re.sub('>', "",text)
text = re.sub('<', "",text)
text = re.sub('Q[0-9]+\.', "",text)
text = re.sub('\[', "",text)
text = re.sub('\]', "",text)
text = re.sub('^[A-Z]\.\s', "",text)
text = re.sub('S\.R\.', "SR",text)
text = re.sub('S\.R', "SR",text)
text = re.sub('SR\.', "SR",text)
text = re.sub('s\.r', "SR",text)
text = re.sub('s\.r\.', "SR",text)
text = re.sub('S\.r', "SR",text)
text = "".join(filter(lambda char: char != "»", text))
text = "".join(filter(lambda char: char != "«", text))
text = text.replace(" ?", "?")
text = text.replace('\n',' ')
text = text.rstrip()
else:
text = ''
return text
[docs]def expand_interviewer_abbreviations(text, country_language):
"""
Switches abbreviations of the word interviewer for the full form.
Args:
param1 text (string): sentence being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
text (string) without abbreviations for the word interviewer, when applicable.
"""
if 'CZE' in country_language:
text = text.replace('Taz.', 'Tazatel')
elif '_ES' in country_language or 'POR' in country_language:
text = text.replace('Ent.', "Entrevistador")
elif 'ENG_' in country_language:
text = text.replace('Int.', "Interviewer")
elif 'FRE_' in country_language:
text = text.replace('Enq.', "Enquêteur")
elif 'GER_' in country_language:
text = text.replace('Befr.', "Befrager")
text = text.replace('INT.', "Interviewer")
elif 'NOR' in country_language:
text = text.replace('Int.', "Intervjuer")
return text
[docs]def instantiate_special_answer_category_object(country_language):
"""
Instantiates the SpecialAnswerCategories object that stores both the text
and category values of the special answers (don't know, refusal, not applicable
and write down) in accordance to the country_language metadata parameter.
Args:
param1 country_language (string): country_language metadata parameter, embedded in file name.
Returns:
instance of SpecialAnswerCategories object (Python object), in accordance to the country_language.
"""
if 'CAT' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesCAT()
elif 'CZE' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesCZE()
elif 'ENG_' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesENG()
elif 'FRE_' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesFRE()
elif 'GER_' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesGER()
elif 'NOR' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesNOR()
elif 'POR' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesPOR()
elif 'SPA' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesSPA()
elif 'RUS' in country_language:
if '_EE' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesRUS_EE()
elif '_IL' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesRUS_IL()
elif '_LV' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesRUS_LV()
elif '_LT' in country_language:
ess_special_answer_categories = SpecialAnswerCategoriesRUS_LT()
else:
ess_special_answer_categories = SpecialAnswerCategoriesRUS_RU_UA()
return ess_special_answer_categories
[docs]def check_if_answer_is_special_category(text, answer_value, ess_special_answer_categories):
"""
Verifies if a given answer segment is one of the special answer categories,
by testing the answer text against the attributes of SpecialAnswerCategories object.
This method serves the purpose of standardizing the special answer category values.
Args:
param1 text (string): answer segment currently being analyzed.
param2 answer_value (string): answer category value, defined in clean_answer() method.
param3 ess_special_answer_categories (Python object): instance of SpecialAnswerCategories object, in accordance to the country_language.
Returns:
answer text (string) and its category value (string). When the answer is a special answer category,
the text and category values are the ones stored in the SpecialAnswerCategories object.
"""
if text.lower() == ess_special_answer_categories.dont_know[0].lower():
return ess_special_answer_categories.dont_know[0], ess_special_answer_categories.dont_know[1]
elif text.lower() == ess_special_answer_categories.refuse[0].lower():
return ess_special_answer_categories.refuse[0], ess_special_answer_categories.refuse[1]
elif text.lower() == ess_special_answer_categories.dontapply[0].lower():
return ess_special_answer_categories.dontapply[0], ess_special_answer_categories.dontapply[1]
elif text.lower() == ess_special_answer_categories.write_down[0].lower():
return ess_special_answer_categories.write_down[0], ess_special_answer_categories.write_down[1]
return text, answer_value
[docs]def clean_answer(text, ess_special_answer_categories):
"""
Cleans the answer segment, by standardizing the text (when it is a special answer category),
and attributing an category value to it.
Args:
param1 text (string): answer segment currently being analyzed.
param2 ess_special_answer_categories (Python object): instance of SpecialAnswerCategories object, in accordance to the country_language.
Returns:
answer text (string) and its category value (string). When the answer is a special answer category,
the text and category values are the ones stored in the SpecialAnswerCategories object.
"""
answer_value = None
if isinstance(text, str) == False:
return None, None
text = text.replace('(No ho sap)','No ho sap')
text = text.replace("(Don't know)","Don't know")
if re.compile(r'^00\s\w+').match(text):
text = text.split('00', 1)
answer_text = text[1].rstrip()
answer_value = '0'
elif re.compile(r'^01\s\w+').match(text):
text = text.split('01', 1)
answer_text = text[1].rstrip()
answer_value = '1'
elif re.compile(r'^012\s\w+').match(text):
text = text.split('012', 1)
answer_text = text[1].rstrip()
answer_value = '12'
elif re.compile(r'^02\s\w+').match(text):
text = text.split('02', 1)
answer_text = text[1].rstrip()
answer_value = '2'
elif re.compile(r'^03\s\w+').match(text):
text = text.split('03', 1)
answer_text = text[1].rstrip()
answer_value = '3'
elif re.compile(r'^04\s\w+').match(text):
text = text.split('04', 1)
answer_text = text[1].rstrip()
answer_value = '4'
elif re.compile(r'^05\s\w+').match(text):
text = text.split('05', 1)
answer_text = text[1].rstrip()
answer_value = '5'
elif re.compile(r'^06\s\w+').match(text):
text = text.split('06', 1)
answer_text = text[1].rstrip()
answer_value = '6'
elif re.compile(r'^07\s\w+').match(text):
text = text.split('07', 1)
answer_text = text[1].rstrip()
answer_value = '7'
elif re.compile(r'^09\s\w+').match(text):
text = text.split('09', 1)
answer_text = text[1].rstrip()
answer_value = '9'
elif re.compile(r'^10\s\w+').match(text):
text = text.split('10', 1)
answer_text = text[1].rstrip()
answer_value = '10'
elif re.compile(r'^0\s\w+').match(text):
text = text.split('0', 1)
answer_text = text[1].rstrip()
answer_value = '0'
elif re.compile(r'^88\s\w+').match(text):
text = text.split('88', 1)
answer_text = text[1].rstrip()
answer_value = '888'
elif re.compile(r'^77\s\w+').match(text):
text = text.split('77', 1)
answer_text = text[1].rstrip()
answer_value = '777'
elif re.compile(r'^99\s\w+').match(text):
text = text.split('99', 1)
answer_text = text[1].rstrip()
answer_value = '999'
elif re.compile(r'^J\s.+').match(text):
text = text.split('J', 1)
answer_text = text[1].rstrip()
answer_value = 'J'
elif re.compile(r'^R\s.+').match(text):
text = text.split('R', 1)
answer_text = text[1].rstrip()
answer_value = 'R'
elif re.compile(r'^C\s.+').match(text):
text = text.split('C', 1)
answer_text = text[1].rstrip()
answer_value = 'C'
elif re.compile(r'^M\s.+').match(text):
text = text.split('M', 1)
answer_text = text[1].rstrip()
answer_value = 'M'
elif re.compile(r'^F\s.+').match(text):
text = text.split('F', 1)
answer_text = text[1].rstrip()
answer_value = 'F'
elif re.compile(r'^S\s.+').match(text):
text = text.split('S', 1)
answer_text = text[1].rstrip()
answer_value = 'S'
elif re.compile(r'^K\s.+').match(text):
text = text.split('K', 1)
answer_text = text[1].rstrip()
answer_value = 'K'
elif re.compile(r'^P\s.+').match(text):
text = text.split('P', 1)
answer_text = text[1].rstrip()
answer_value = 'P'
elif re.compile(r'^D\s.+').match(text):
text = text.split('D', 1)
answer_text = text[1].rstrip()
answer_value = 'D'
elif re.compile(r'^H\s.+').match(text):
text = text.split('H', 1)
answer_text = text[1].rstrip()
answer_value = 'H'
elif re.compile(r'^U\s.+').match(text):
text = text.split('U', 1)
answer_text = text[1].rstrip()
answer_value = 'U'
elif re.compile(r'^G\s.+').match(text):
text = text.split('G', 1)
answer_text = text[1].rstrip()
answer_value = 'G'
elif re.compile(r'^N\s.+').match(text):
text = text.split('N', 1)
answer_text = text[1].rstrip()
answer_value = 'N'
elif re.compile(r'^Z\s.+').match(text):
text = text.split('Z', 1)
answer_text = text[1].rstrip()
answer_value = 'Z'
elif re.compile(r'^T\s.+').match(text):
text = text.split('T', 1)
answer_text = text[1].rstrip()
answer_value = 'T'
elif re.compile(r'^Y\s.+').match(text):
text = text.split('Y', 1)
answer_text = text[1].rstrip()
answer_value = 'Y'
elif re.compile(r'^Q\s.+').match(text):
text = text.split('Q', 1)
answer_text = text[1].rstrip()
answer_value = 'Q'
elif re.compile(r'^E\s.+').match(text):
text = text.split('E', 1)
answer_text = text[1].rstrip()
answer_value = 'E'
elif re.compile(r'^L\s.+').match(text):
text = text.split('L', 1)
answer_text = text[1].rstrip()
answer_value = 'L'
elif re.compile(r'^B\s.+').match(text):
text = text.split('B', 1)
answer_text = text[1].rstrip()
answer_value = 'B'
elif re.compile(r'^55\s.+').match(text):
text = text.split('55', 1)
answer_text = text[1].rstrip()
answer_value = '55'
elif re.compile(r'^\+4\s.+').match(text):
text = text.split('+4', 1)
answer_text = text[1].rstrip()
answer_value = '+4'
elif re.compile(r'^\+3\s.+').match(text):
text = text.split('+3', 1)
answer_text = text[1].rstrip()
answer_value = '+3'
elif re.compile(r'^\+2\s.+').match(text):
text = text.split('+2', 1)
answer_text = text[1].rstrip()
answer_value = '+2'
elif re.compile(r'^\+1\s.+').match(text):
text = text.split('+1', 1)
answer_text = text[1].rstrip()
answer_value = '+1'
elif re.compile(r'^\-4\s.+').match(text):
text = text.split('-4', 1)
answer_text = text[1].rstrip()
answer_value = '-4'
elif re.compile(r'^\-3\s.+').match(text):
text = text.split('-3', 1)
answer_text = text[1].rstrip()
answer_value = '-3'
elif re.compile(r'^\-2\s.+').match(text):
text = text.split('-2', 1)
answer_text = text[1].rstrip()
answer_value = '-2'
elif re.compile(r'^\-1\s.+').match(text):
text = text.split('-1', 1)
answer_text = text[1].rstrip()
answer_value = '-1'
else:
answer_text = text.strip()
answer_value = None
answer_text = answer_text.strip()
answer_text, answer_value = check_if_answer_is_special_category(answer_text, answer_value, ess_special_answer_categories)
return answer_text, answer_value
[docs]def check_if_segment_is_instruction(sentence, country_language):
"""
Calls the appropriate instruction recognition method, according to the language.
Args:
param1 sentence (string): sentence being analyzed in outer loop of data extraction.
param2 country_language (string): country_language metadata, embedded in file name.
Returns:
bypass the return of instruction_recognition methods (boolean).
"""
if 'CZE' in country_language:
return instruction_recognition_czech(sentence,country_language)
if 'ENG' in country_language:
return instruction_recognition_english(sentence,country_language)
if '_ES' in country_language:
return instruction_recognition_catalan_spanish(sentence,country_language)
if 'FRE' in country_language:
return instruction_recognition_french(sentence,country_language)
if 'GER' in country_language:
return instruction_recognition_german(sentence,country_language)
if 'NOR' in country_language:
return instruction_recognition_norwegian(sentence,country_language)
if 'POR' in country_language:
return instruction_recognition_portuguese(sentence,country_language)
if 'RUS' in country_language:
return instruction_recognition_russian(sentence,country_language)
[docs]def instruction_recognition_russian(text,country_language):
"""
Recognizes an instruction segment for texts written in Russian,
based on regex named groups patterns.
Args:
param1 text (string): text (in Russian) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<interviewer>)(ИНТЕРВЬЮЕР|ИНТЕРВЬЮЕРА)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '№' not in text_aux:
return True
regex= r"^(?P<note>)ЗАМЕЧАНИЕ\s(?P<to>)ДЛЯ\s(?P<interviewer>)(ИНТЕРВЬЮЕР|ИНТЕРВЬЮЕРА|ИНТЕРЬВЬЮЕРА)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)ПРИМЕЧАНИЕ\b"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<readout>)(ЗАЧИТАТЬ|ЗАЧИТАЙТЕ|ЗАЧИТАЙТЕ|ЗАЧИТАЙТ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<listofinstitutions>)СПИСОК\sИНСТИТУЦИЙ\s(?P<read>)(ЗАЧИТАТЬ|ЗАЧИТАЙТЕ|ЗАЧИТАЙТЕ|ЗАЧИТАЙТ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<alternatives>)(АЛЬТЕРНАТИВЫ)?\s?(?P<dont>)НЕ\s(?P<read>)(ЗАЧИТЫВАТЬ|ЧИТАТЬ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)СПРОСИТЕ\s(?P<again>)ЕЩЕ\sРАЗ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)СПРOСИТЬ\s(?P<ifdoesnotlivewithspouse>)ЕСЛИ\sНЕ\sЖИВЕТ\sС\sСУПРУГОМ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<after>)ПОСЛЕ\sТОГО\s(?P<therespondent>)КАК\sРЕСПОНДЕНТ\s(?P<answers>)ДАСТ\sОТВЕТ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<possible>)ВОЗМОЖЕН\s(?P<oneanswer>)(ТОЛЬКО\sОДИН\sОТВЕТ|ОДИН\sВЫБОР)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<checkall>)(ОТМЕТЬТЕ|ВЫБЕРИТЕ|КОДИРУЕТЕ|Назовите)\sВСЕ\s(?P<thatapplies>)(ЧТО\sПОДХОДИТ|КОТОРЫЕ\sПОДХОДЯТ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<name>)НАЗЫВАЙТЕ\s(?P<groups>)ГРУППЫ\s(?P<peopleororganization>)ЛЮДЕЙ ИЛИ ОРГАНИЗАЦИИ "
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<options>)(ВАРИАНТЫ|УТВЕРЖДЕНИЯ)?\s?(?P<read>)(ПРОЧИТАТЬ|ЗАЧИТАЙТЕ)\s(?P<out>)ВСЛУХ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<write>)(ВПИШИТЕ|ЗАПИШИТЕ)\s(?P<duration>)(ПРОДОЛЖИТЕЛЬНОСТЬ|СКОЛЬКО\sВРЕМЕНИ|КОЛИЧЕСТВО\sВРЕМЕНИ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)ПРОЧИТАЙТЕ\s(?P<each>)КАЖДОЕ\s(?P<utterance>)УТВЕРЖДЕНИЕ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ОПРОСИТЕ\s(?P<respondent>)РЕСПОНДЕНТА\s?(?P<use>)(ОТКРЫТЬ)?\s?(?P<card>)(КАРТОЧКУ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)ЗАЧИТЫВАЙТЕ\s(?P<by>)ПО\s(?P<each>)КАЖДОЙ\s(?P<line>)СТРОКЕ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifyouthink>)Если\s(Вы|вы)\sсчитаете\s(?P<thatthis>)(что)?\s?(эти|Ваш|Ваша|эта|этот|свой)\s(?P<income>)(неттодоход|доходы|разница|различия|доход|зарплата|заработок)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<one>)ОДИН\s(?P<answer>)ОТВЕТ\s?(?P<in>)(В)?\s?(?P<each>)(КАЖДОЙ)?\s?(?P<line>)(СТРОКЕ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<canmark>)МОЖНО ОТМЕТИТЬ\s(?P<more>)БОЛЬШЕ\s(?P<than>)ЧЕМ\s(?P<one>)ОДИН\s(?P<answer>)(ОТВЕТ|ВАРИАНТ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)(ОТМЕТИТЬ|ОТМЕТЬТЕ)\s(?P<only>)ТОЛЬКО\s(?P<one>)ОДИН\s(?P<answer>)(ОТВЕТ|ВАРИАНТ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)(ПРИМЕЧАНИЕ|ПОЯСНЕНИЕ|ПРИМЕЧАНИЯ)\s(?P<interviewer>)(ИНТЕРВЬЮЕРУ|ОПРАШИВАЮЩЕМУ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Пожалуйста\s(?P<choose>)выберите\s(?P<nomorethantwo>)не\sболее\sдвух"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)Пожалуйста\s(?P<choose>)выберите\s(?P<optionclosesttoyourview>)мнение\sнаиболее\sблизкое\sвашим\sвзглядам"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<again>)(СНОВА|CHOBA)?\s?(?P<use>)(ИСПОЛЬЗУЙТЕ)?\s?(?P<card>)(КАРТОЧКА|КАРТОЧКУ|KAРTOЧКА)\s(?P<number>)(Nr|\d+|\w+)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<show>)ПОДАЙТЕ\s(?P<respondent>)РЕСПОНДЕНТУ\s(?P<card>)(КАРТОЧКА|КАРТОЧКУ|KAРTOЧКА)\s(?P<number>)(Nr|\d+|\w+)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<next>)ДАЛЕЕ\s(?P<ask>)ЗАДАЙТЕ\s(?P<question>)ВОПРОСЫ\s(?P<all>)ВСЕМ\s(?P<respondents>)РЕСПОНДЕНТАМ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Выберите\s(?P<youranswer>)ответ\sпо\sследующей\sскале\s(?P<card>)карточке"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<respondent>)(РЕСПОНДЕНТ|РЕСПОНДЕНТЫ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<for>)ДЛЯ\s(?P<respondent>)РЕСПОНДЕНТОВ\s(?P<male>)МУЖСКОГО ПОЛА"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<respondent>)Отвечают\s(?P<malefemale>)(женщины|мужчины)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)ОТМЕТЬТЕ\s(?P<one>)ОДНО\s(?P<option>)ЧИСЛО"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)ОТМЕТЬТЕ\s(?P<answers>)ОТВЕТ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<possible>)ВОЗМОЖЕ(Н|Т)\s(?P<one>)ОДИН\s(?P<answer>)(ОТВЕТ|ВЫБОР)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)(УТОЧНИТЕ|Спросите|ВЫЯСНИТЕ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)(СПАШИВАТЬ|СПРАШИВАТЬ|СПРОСИТЬ|СПРАШИВАЙТЕ|СПРОСИТЕ|СПРАШИВАИТE|ЗАДАВАТЬ)\s(?P<to>)(У)?\s?(?P<all>)(ВСЕХ|ВСЕМ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)КОДИРОВАТЬ\s(?P<all>)ВСЕ(Х)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Пожалуйста\s(?P<selectyouranswer>)выберите\s(свой)?\s?ответ\s(?P<fromoptions>)(среди|из)\s(вариантовт|вариантов)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<ask>)(ОТМЕТЬТЕ|ВЫБЕРИТЕ)\s(?P<all>)ВСЕ\s?(?P<applicable>)(ПОДХОДЯЩИЕ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<for>)ДЛЯ\s(?P<interviewer>)ИНТЕРВЬЮЕРА"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)ИНТЕРВЬЮЕРА\s(?P<note>)ОТМЕЧАЕТ\s(?P<code>)КОД"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Воспользуйтесь\s(?P<thesame>)той же\s(?P<card>)(карточкой|карточку)\s?(?P<toanswer>)(Для ответа|Для ответов)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<use>)Используйте\s(?P<please>)пожалуйста\s(?P<this>)эту\s(?P<card>)карточку\s(?P<toanswer>)(Для ответа|Для ответов)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux:
return True
regex= r"^(?P<please>)пожалуйста\s(?P<use>)Используйте\s(?P<this>)эту\s(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<again>)(снова)?\s?(?P<use>)используйте\s(?P<please>)(пожалуйста)?\s?(?P<this>)эту\s(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)\s(?P<again>)(снова)?\s?(?P<use>)(воспользуйтесь|пользуйтесь)\s(?P<please>)(пожалуйста)?\s?(?P<this>)(этой)?\s?(?P<thesame>)(той же)?\s?(?P<card>)(карточкой|карточку)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<please>)(Пожалуйста)?\s?(?P<use>)(воспользуйтесь|пользуйтесь)\s(?P<this>)(этой|шкалой)\s(?P<on>)(на)?\s?(?P<card>)(карточки|карточкой|карты|карточке)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)Выберите\s(?P<your>)(свой)?\s?(?P<one>)(один)?\s?(?P<answer>)ответ\s(?P<of>)из\s(?P<this>)этой\s(?P<card>)(карточки|карточкой|карты|карточке|карте)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)Выберите\s(?P<your>)(свой)?\s?(?P<one>)(один)?\s?(?P<answer>)ответ\s(?P<this>)(этой)?\s?(?P<of>)(из)?\s?(?P<proposed>)(предложенных)?\s?(?P<on>)(на)?\s?(?P<card>)(карточки|карточкой|карты|карточке|карте)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)Выберите\s(?P<answer>)ответ\s(?P<of>)из\s(?P<proposed>)предложенных\s(?P<on>)на\s(?P<this>)этой\s(?P<card>)карточке"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Выберите\s(?P<your>)свой\s(?P<answer>)ответ\s(?P<on>)на\s(?P<this>)этой\s(?P<card>)карте"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)Выберите\s(?P<answer>)ответ\s(?P<of>)из\s(?P<proposedat>)(предложенных на)?\s?(?P<this>)этой\s(?P<card>)карточки"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Выберите\s(?P<please>)(пожалуйста)?\s?(?P<of>)из\s(?P<this>)этой\s(?P<card>)карточки"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Выберите\s(?P<please>)(пожалуйста)?\s?(?P<answer>)ответ\s(?P<of>)из\s(?P<card>)карточки"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)(Выберите|выберите)\s(?P<your>)(свой)?\s?(?P<answer>)ответ\s(?P<of>)из\s(?P<options>)вариантов\s(?P<proposed>)(предложенных)?\s?(?P<on>)на\s(?P<tjis>)(этой)?\s?(?P<card>)(карточки|карточке)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)(ОТМЕТЬТЕ|Выберите)\s(?P<all>)(ВСЕ|все)\s(?P<that>)(ПОХОДЯЩИЕ|которые)\s(?P<applies>)(ВАРИАНТЫ|подходят)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<select>)(Выберите|ОТМЕТЬЕ)\s(?P<please>)(Пожалуйста)?\s?(?P<answer>)ответ\s(?P<of>)из\s(?P<this>)этой\s(?P<card>)карточки"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)(Выберите|ОТМЕТЬЕ)\s(?P<please>)(Пожалуйста)?\s?(?P<only>)(только)?\s?(?P<one>)один\s(?P<option>)(вариант)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)(отметьте|выберите)\s(?P<only>)только\s(?P<one>)один\s(?P<option>)(вариант)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Отметить\s(?P<please>)(Пожалуйста)?\s?(?P<only>)(только)?\s?(?P<one>)один\s(?P<option>)вариант"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)выберите\s(?P<all>)все\s(?P<options>)варианты\s(?P<answer>)ответа\s(?P<on>)на\s(?P<card>)(карточки|карточкой|карты|карточке)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)Выберите\s(?P<on>)на\s(?P<card>)(карточки|карточкой|карты|карточке)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<use>)используйте\s(?P<toanswer>)(Для ответа|Для ответов)\s(?P<this>)эту\s(?P<card>)(карточки|карточкой|карточку|карточке)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<mark>)отметьте\s(?P<one>)один\s(?P<option>)квадрат"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<answer>)ОТВЕЧАЮТ\s(?P<all>)ВСЕ\s(?P<respondents>)РЕСПОНДЕНТЫ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<again>)(СНОВА)?\s?(?P<card>)(КАРТА|КАРТОЧКА)\s(?P<continue>)(ПРОДОЛЖАЕТСЯ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<please>)(Пожалуйста)?\s?(?P<now>)(Теперь)?\s?(?P<use>)используйте\s(?P<this>)эту\s(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<please>)(пожалуйста)?\s?(?P<now>)(Теперь)?\s?(?P<use>)(Используйте|используйте|используйте)\s(?P<this>)(эту|ту)\s(?P<same>)(же)\s?(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(пожалуйста)?\s?(?P<now>)(Теперь)?\s?(?P<use>)(Используйте|используйте|используйте)\s(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<this>)(эту|ту)\s(?P<same>)(же)\s?(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)\s(?P<use>)(используйте|пользуйтесь)\s(?P<please>)(пожалуйста)?\s?(?P<card>)(карточку|карточкой)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<check>)(Заполните|Отметьте)\s(?P<please>)(пожауйста|пожалуйста|пожалуйста)?\s?(?P<only>)(только)?\s?(?P<one>)(один|одну)\s(?P<box>)(ячейку|клетку|ответ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(пожалуйста)?\s?(?P<check>)отметьте\s(?P<one>)один\s(?P<box>)квадрат"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<use>)Используйте\s(?P<please>)(пожалуйста|пожалуйста)?\s?(?P<this>)(эту)?\s?(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(пожалуйста|пожалуйста)?\s?(?P<use>)Используйте\s(?P<toanswer>)(Для ответа|Для ответов)\s(?P<this>)эту\s(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<check>)ОТМЕТЬТЕ\s(?P<all>)ВСЕ\s(?P<answers>)НАЗВАННЫЕ ОТВЕТЫ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)\s(?P<please>)(пожалуйста)?\s?(?P<use>)используйте\s(?P<this>)(эту)?\s?(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<possible>)МОЖНО\s(?P<check>)ОТМЕТИТЬ\s(?P<more>)БОЛЬШЕ\s(?P<than>)ЧЕМ\s(?P<one>)ОДИН\s(?P<answer>)ОТВЕТ"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<mark>)(Заполните|Отметьте)\s(?P<please>)(пожалуйста)?\s?(?P<one>)одну\s(?P<box>)клетку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<read>)(ПРОЧИТАЙТЕ|ЗАЧИТАТЬ)\s(?P<each>)КАЖДОЕ\s(?P<utterance>)УТВЕРЖДЕНИЕ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)ОТМЕТЬТЕ\s(?P<only>)(ТОЛЬКО)?\s?(?P<one>)ОДИН\s(?P<option>)(ВАРИАНТ|КОД|ЧИСЛО)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<write>)ВПИШИТЕ\s(?P<only>)ТОЛЬКО\s(?P<one>)ОДИН"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readout>)ЗАЧИТЫВАЙТЕ\s(?P<by>)ПО\s(?P<each>)КАЖДОЙ\s(?P<line>)СТРОКЕ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readout>)(ЗАЧИТЫВАЙТЕ|ЗАЧИТАЙТЕ)\s(?P<questions>)ВОПРОСЫ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<questions>)ВОПРОСЫ\s(?P<readout>)(ЗАЧИТЫВАЙТЕ|ЗАЧИТАЙТЕ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readout>)(ЗАЧИТЫВАЙТЕ|ЗАЧИТАЙТЕ)\s(?P<statements>)УТВЕРЖДЕНИЯ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Используйте\s(?P<the>)ту\s(?P<same>)(же|самую)\s(?P<card>)карту"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<check>)ОТМЕЧАТЬ\s(?P<all>)ВСЕ\s(?P<that>)КОТОРЫЕ\s(?P<apply>)ПОДХОДЯТ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)ПОДАЙТЕ\s(?P<respondent>)РЕСПОНДЕНТУ\s(?P<card>)(КАРТОЧКУ|карточка)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<the>)ТА\s(?P<same>)ЖЕ\s(?P<card>)(КАРТОЧКУ|карточка)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<ask>)(ЗАДАЙТЕ|ЗАДАВАТЬ|ЗАДАВАЙТЕ)\s(?P<questions>)(ВОПРОСЫ|ВОПРОС)\s?(?P<everyone>)(ВСЕМ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readout>)(ЗАЧИТЫВАЙТЕ|ЗАЧИТАЙТЕ)\s(?P<questions>)(ВОПРОСЫ|ВОПРОС)\s(?P<mark>)(ОТМЕЧАЙТЕ)?\s?(?P<answer>)(ОТВЕТ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<this>)ЭТОТ\s(?P<question>)ВОПРОС\s(?P<show>)ЗАДАЙТЕ\s(?P<all>)(ВСЕМ|ВСЕ|ВСЕX)\s(?P<respondent>)(РЕСПОНДЕНТА|РЕСПОНДЕНТУ|РЕСПОНДЕНТЫ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<select>)выберите\s(?P<one>)один\s(?P<answer>)ответ\s(?P<on>)(на|из)\s(?P<this>)этой\s(?P<card>)(карточке|карточки)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)(Выберите|Выбирите)?\s?(?P<your>)(Ваш)?\s?(?P<please>)(пожалуйста)?\s?(?P<answer>)ответ\s(?P<from>)из\s(?P<this>)этой\s(?P<card>)(карточке|карточки)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<use>)Используйте\s(?P<still>)(все еще)?\s?(?P<toanswer>)(Для ответа|Для ответов)?\s?(?P<this>)(эту)?\s?(?P<card>)(карточке|карточки|карточку)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Выберите\s(?P<your>)(Ваш|свой)\s(?P<answer>)ответ\s(?P<on>)на\s(?P<this>)(этой)?\s?(?P<card>)(карточке|карточки|карточку)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<select>)Выберите\s(?P<please>)пожалуйста\s(?P<one>)один\s(?P<answer>)ответ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<still>)Все еще\s(?P<use>)используйте\s(?P<card>)(карточке|карточки|карточку)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<toanswer>)(Для ответа|Для ответов)\s(?P<use>)используйте\s(?P<please>)(Пожалуйста)?\s?(?P<card>)(карточке|карточки|карточку)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)Пожалуйста\s(?P<use>)используйте\s(?P<this>)(эту)?\s?(?P<card>)карточку"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(Пожалуйста)?\s?(?P<use>)используйте\s(?P<thesame>)(ту же|ту же самую)?\s?(?P<card>)(карточку|карту)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'оцените' not in text and 'чтобы показать' not in text and 'где бы Вы поместили себя' not in text and 'которые я назову' not in text and 'скажите' not in text and 'ответьте' not in text and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<note>)(ПРИМЕЧАНИЕ)?\s?(?P<to>)ДЛЯ\s(?P<interviewer>)ИНТЕРВЬЮЕРА"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<if>)ЕСЛИ\s(?P<not>)НЕ\s(?P<born>)(РОДИЛСЯ|РОДИЛАСЬ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Выберите\s(?P<youranswer>)свой ответ\s(?P<fromcard>)на карте"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"^(?P<ask>)ПОПРОСИТЕ\s(?P<respondent>)РЕСПОНДЕНТА"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<giveascoreofn>)дайте\sоценку\s\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<wherenmeans>)(если|где)?\s?\d\s(баллов)?\s?означает\s(?P<anything>).*\s(?P<andnmeans>)(а|и)\s\d+\s(означает)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<wherenmeans>)(если|где)?\s?\d\sзначит\s(?P<anything>).*\s(?P<andnmeans>)(а|и)\s\d+\s(значит)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<wherenmeans>)где\s\d\s(?P<anything>).*\s(?P<andnmeans>)(а|и)\s\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<nmeans>)\d+\s(-\s?)?\s?означает\s(?P<anything>).*\s(?P<andnmeans>)(а|и)?\s?\d+\s(-\s?)?\s?означает"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<anynumber>)любую\sцифру\s(?P<fromnton>)от\s\d\sдо\s\d+(?P<whichbetterreflectsyouropinion>)образом\sотражает\sВаше\sмнение"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'поместили себя' not in text_aux:
return True
regex= r"(?P<probe>)РАССЛЕДОВАТE"
matches = re.findall(regex, text)
if matches:
return True
regex= r"(?P<insertnomorethannlanguages>)УКАЗАТЬ\sНЕ\sБОЛЕЕ\s\d\sЯЗЫКОВ"
matches = re.findall(regex, text)
if matches:
return True
regex= r"(?P<acceptestimations>)РЕСПОНДЕНТ МОЖЕТ УКАЗАТЬ ПРИБЛИЗИТЕЛЬНО"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<roundup>)(ОКРУГЛИТЕ ДО|ОКРУГЛЯЙТЕ)"
matches = re.findall(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<ifrespondent>)ЕСЛИ РЕСПОНДЕНТ"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<askallrespondents>)ЗАДАЙТЕ\s(ЭТОТ)?\s?(ВОПРОС|ВОПРОСЫ)\s(ВСЕМ)?\s?РЕСПОНДЕНТАМ"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<consideradoptions>)УСЫНОВЛЕНИЕ/УДОЧЕРЕНИЕ ТАК ЖЕ УЧИТЫВАЕТСЯ"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<considerworkdays>)ПОЛНЫЙ И НЕПОЛНЫЙ РАБОЧИЙ ДЕНЬ СЧИТАЙТЕ ОДИНАКОВО"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
[docs]def instruction_recognition_german(text,country_language):
"""
Recognizes an instruction segment for texts written in German,
based on regex named groups patterns.
Args:
param1 text (string): text (in German) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<for>)(FÜR)?\s?(?P<interviewer>)(Befrager|Interviewer)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and "Interviewer ID" not in text:
return True
regex= r"^(?P<round>)Runden\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<informal>)INFORMELL:"
matches = re.search(regex, text_aux)
if matches:
return True
regex= r"^(?P<attention>)ACHTUNG:"
matches = re.search(regex, text_aux)
if matches:
return True
regex= r"^(?P<round>)ANM:"
matches = re.search(regex, text_aux)
if matches:
return True
regex= r"^(?P<estimation>)(Schätzung|SCHÄTZWERTE)\s?(?P<accept>)akzeptieren\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<checkeachapplicable>)Jeweils\sZutreffendes"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<all>)(Alles)?\s?(?P<each>)(Jeweils)?\s?(?P<circleabove>)Genannte(s)?\s(Stufe)?\s?einkreisen"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<againaccordingto>)Wieder\snach\sgrünem\sBildblatt"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<again>)(Immer noch|Weiterhin|Weiter|Und noch die|Noch einmal)?\s?(?P<card>)(karte\s(\d+|[a-z]+)|liste\s(\d+|[a-z]+))"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux:
return True
regex= r"^(?P<untilquestionx>)(bis\s\w\d+)?\s?(?P<card>)(karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)(NACHFRAGEN|NACHFASSEN)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probeonce>)EINMAL\sNACHHAKEN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probe>)Zusatzfrage"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<readoutstatements>)JEDE(S)?\s(STATEMENT|AUSSAGE)\sVORLESEN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<readout>)vorlesen"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<to>)an\s(?P<all>)alle"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<askall>)Alle\sfragen"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeduration>)(DAUER|ZEIT)\sEINTRAGEN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<a>)(AN)?\s?(?P<male>)(MÄNNLICHE|Weibliche)\s(?P<respondent>)BEFRAGTE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<questionsfor>)FRAGEVERSION\sFÜR\s(?P<malefemale>)(MÄNNER|FRAUEN)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeall>)ZUTREFFENDES\sBITTE\sKODIEREN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<givetwoanswers>)Geben\sSie\sbis\szu\szwei\sAntworten"
matches = re.search(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<ifyouthink>)Wenn\sSie\s(glauben|finden)?\s?(?P<that>)(dass)?\s?(?P<these>)(Ihr|diese(s|r)?|Ihren)\s(Einkommen|Unterschiede|Verdienst)"
matches = re.search(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<ifyouthink>)Wenn\sSie\s(?P<yourincome>)Ihren\sLohn"
matches = re.search(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<markall>)MARKIEREN\sSIE\sALLES"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\sSie\s(?P<also>)(auch)?\s?(?P<to>)(dazu)?\s?(?P<forthis>)(dafür)?\s?(?P<the>)(die)?\s?(?P<this>)(diese)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)\s(?P<answer>)beantworten\sSie\s(?P<this>)diese\s(?P<question>)Frage"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(bitte)\s(?P<answer>)sagen\sSie\s(?P<tomeusing>)es mir anhand\s(?P<this>)dieser"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\sSie\s(?P<again>)dazu\swieder\s(?P<the>)(die|diese)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<use>)(Verwenden|benutzen|nutzen)\sSie\s(?P<again>)dazu\s(?P<please>)(bitte)?\s?(?P<the>)(die)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<tellme>)(Sagen Sie es mir)?\s?(?P<please>)bitte\s(?P<answer>)(antworten sie)?\s?(?P<again>)(wieder)?\s?(?P<using>)anhand von\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<use>)(Verwenden|benutzen|Benützen|nutzen)\sSie\s(?P<now>)(jetzt|nun)?\s?(?P<please>)(bitte)?\s?(?P<the>)(die|diese)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(Sie|Die)\s(?P<onceagain>)noch einmal\s(?P<this>)(diese)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(bitte)?\s?(?P<mark>)Nennen\s(Sie|Die)\s(?P<all>)alles"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<note>)Hinweis\s(?P<for>)für\s(?P<the>)(den)?\s?(?P<interviewer>)(Interviewer|Befrager)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<tick>)kreuzen\s(?P<a>)Sie ein\s(?P<box>)Kästchen an"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<tick>)kreuzen\s(?P<a>)Sie bitte ein\s(?P<box>)Kästchen an"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<this>)Sie\s(diese|die)?\s?(?P<same>)(selbe)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<this>)Sie dafür diese\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<again>)Sie nochmals\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<again>)nochmals\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<answer>)sagen Sie\s(?P<forme>)(es mir|es bitte)\s(?P<again>)(noch einmal|nochmals)?\s?(?P<withthis>)(anhand von dieser|anhand dieser|anhand von)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<this>)Sie\s(?P<again>)(wieder|wieder diese)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<please>)Sie bitte\s(?P<again>)(wieder|wieder diese)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<toanswer>)Sie für Ihre Antwort\s(?P<again>)(wieder|die gleiche|nochmals)?\s?(?P<the>)(die|diese|der|von)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<this>)diese\s(?P<card>)(Karte|liste)\s(?P<use>)verwenden"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<use>)verwenden\s(?P<please>)Sie bitte\s(?P<this>)(diese)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)verwenden\s(?P<this>)(die|diese)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)verwenden\s(?P<this>)Sie diese\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<mark>)ringeln\s(?P<a>)Sie eine\s(?P<answer>)Antwortzahl"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<only>)NUR\s(?P<one>)EINE\s(?P<answer>)(ANTWORT|NENNUNG|Angabe)\s?(?P<possible>)(MÖGLICH)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<multiple>)(Mehrere|MEHRFACHNENNUNGEN)\s(?P<answer>)(Antworten)?\s?(?P<possible>)möglich"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)BITTE\s(?P<choose>)VERSUCHEN SIE\s(?P<an>)EINE\s(?P<answer>)ANWORT\s(?P<fromthis>)VON DIESER\s(?P<card>)(KARTE|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<this>)Sie\s(diese|die)\s(?P<same>)(gleiche)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<choose>)wählen\s(?P<sie>)Sie\s(?P<one>)(eine|Ihre)?\s?(?P<answer>)Antwort\s(?P<fromthis>)(von|von dieser|von der|auf der|auf dieser|aus)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<choose>)wählen\s(?P<sie>)Sie\s(?P<one>)(eine|Ihre|von)?\s?(?P<the>)(die|der|dieser)?\s?(?P<thecard>)(Karte|liste)?\s?(\d+)?\s?(die|der|dieser)?\s?(?P<answer>)(Antwortmöglichkeiten|Antwort|Antwortmöglichkeit)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<just>)nur\s(?P<one>)eine\s(?P<answer>)Antwort\s(?P<select>)auswählen"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<choose>)(wählen)?\s?(?P<sie>)(Sie)?\s?(?P<one>)(eine|Ihre|von)?\s?(?P<answer>)Antwort\s(?P<on>)aus\s(?P<the>)(die|der|dieser)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<answer>)Antwort\s(?P<fromthis>)aus\s(dieser|die)\s(?P<card>)(Karte|liste)\s(?P<select>)auswählen"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<use>)(Verwenden|benutzen|nutzen)\s(?P<continue>)Sie\s(weiterhin|wieder|wiederum)?\s?(?P<now>)(jetzt)?\s?(?P<this>)(diese|dieselbe)?\s?(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<please>)(Bitte)?\s?(?P<lookat>)betrachten Sie\s(?P<nowthis>)nun (diese|dieselbe)\s(?P<card>)(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"^(?P<andnow>)Und nun\s(?P<use>)benutzen\s(?P<thecard>)Sie\s(Karte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<inascale>)einer\sSkala\s(?P<fromnton>)von\s\d\sbis\s\d+\s(?P<inwhich>)(wobei\s\d+\sbedeutet)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<usescalefromnton>)Verwenden\sSie\sdazu\sdiese\sSkala\svon\s\d\sbis\s\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<choosen>)wählen\sSie\d+(?P<anything>).*\s(?P<choosem>)wählen\sSie\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<wherenmeans>)\d+\ssteht\sfür(?P<anything>).*\s(?P<andnmeans>)\d+\sfür"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<wherenmeans>)\d+\sheißt\s(?P<anything>).*\s(?P<andnmeans>)\d+\sbedeutet"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<wherenmeans>)wobei\s\d\s(?P<anything>).*\s(?P<andnmeans>)und\s\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<answerbasedon>)Antworten\sSie\sanhand\s(?P<list>)(dieser)?\s?(liste|karte)\s(\d+)?\s?(?P<nmeans>)\d\sbedeutet\s(?P<anything>).*\s(?P<andn>)und\s\d+"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<nwouldmean>)\d+\swürde\sbedeuten"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'ablehnen' not in text_aux and 'sagen Sie mir' not in text_aux and 'sagen Sie es mir' not in text_aux and 'sagen Sie mir' not in text_aux:
return True
regex= r"(?P<nmeans>)\d+\sbedeutet"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<nmeans>)\d+\sbedeutet\s(?P<anything>).*\s(?P<nstandsfor>)\d+\ssteht\sfür\s"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<nmeans>)\d+\s(-\s?)?\s?heißt\s(?P<anything>).*\s(?P<andnmeans>)\d+\s(-\s?)?\s?bedeutet"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<nstandsfor>)\d+\ssteht\shier\sfür\s(?P<anything>).*\s(?P<andnmeans>)und\s\d+\sfür"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
[docs]def instruction_recognition_norwegian(text,country_language):
"""
Recognizes an instruction segment for texts written in Norwegian,
based on regex named groups patterns.
Args:
param1 text (string): text (in Norwegian) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<programmer>)programmerer:"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)Intervjuer"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'Intervjuer ID' not in text:
return True
regex= r"^(?P<malefemale>)(MANNLIGE|KVINNELIGE)\s(?P<respondent>)(IO|intervjuobjekter)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<respondent>)IO\s(?P<isman>)ER\sMANN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)insistere"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<toall>)(SPØR)?\s?alle"
matches = re.search(regex, text, re.IGNORECASE)
if matches and 'land' not in text:
return True
regex= r"^(?P<typein>)SKRIV\sINN"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probe>)(OPPFØLGING|FØLG\sOPP\sEN\sGANG)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)LES\s(?P<aloud>)(HØYT|OPP)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeall>)(REGISTRER|KOD)\s(ALT|ALLE)\s?(?P<answers>)(SVAR)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<codelowest>)(REGISTRER|KOD)\sdet\slaveste\skodetallet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeall>)(REGISTRER|KOD)\s(?P<onlyone>)BARE\sET(T)?\s(?P<answers>)(SVAR|alternativ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeall>)(REGISTRER|KOD)\s(ALT|ALLE)\s(?P<thatapplies>)SOM"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeall>)(REGISTRER|KOD)\s(?P<maximum>)MAKSIMALT"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<givetime>)GI\sIO\sTID\sTIL\s(?P<tolookcard>)Å\sSTUDERE\sSVARKORTET"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<tobeincluded>)FOR\sÅ\sREGNES\sMED"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<tobefilled>)FYLLES\sUT"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<donotinclude>)IKKE\sTA\sMED\s"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<tickfor>)KRYSS\sAV\sFOR\s"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<includealso>)REGN\sOGSÅ\sMED"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<frozenfruit>)FROSSEN\sFRUKT\s(?P<mustbeincluded>)SKAL\sREGNES\sMED"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<forinstance>)FOR\sEKSEMPEL\s(?P<wheninterview>)HVI(S)?\sINTERVJUET"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<referto>)VIS\sTIL\s(?P<samemonth>)SAMME\sMÅNED\s(?P<asinterview>)SOM\sINTERVJUET"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<ifyouthink>)Hvis\sdu\ssynes\s(?P<these>)(disse)?\s?(?P<different>)(inntektene|forskjellene|inntekten)"
matches = re.search(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<choose>)Velg\s(?P<youranswers>)svaret\sditt\s(?P<from>)fra\s(?P<this>)(dette)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'si' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<lookat>)Se\spå\s(?P<this>)(dette)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'si' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<takea>)Ta\sen\s(?P<lookat>)titt\spå\s(?P<this>)(dette)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux:
return True
regex= r"^(?P<answer>)Svar\s(?P<based>)med utgangspunkt\s(?P<on>)i\s(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux:
return True
regex= r"^(?P<please>)Vær snill å\s(?P<use>)bruke\s(?P<this>)(dette)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)Les\s(?P<out>)opp\s(?P<the>)de\s(?P<different>)ulike\s(?P<organizations>)institusjonene"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Velg\s(?P<an>)et(t)?\s?(tall|av)?\s(?P<option>)(alternativene|svaralternativ|svaralternativene|alternativ|svar)?\s?(?P<from>)(nederst)?\s?(fra|på)\s(?P<this>)(dette)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<no>)ikke\s(?P<card>)kort"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifrespondent>)HVIS\sIO\s"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<allkinds>)ALL\sSLAGS\s(?P<ofcontact>)KONTAKT\s(?P<mustbeincluded>)SKAL\sREGNES\sMED"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<again>)(vis|Forsatt|FORTSATT)?\s?(?P<card>)kort"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'si' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<note>)notat\s(?P<for>)til\s(?P<interviewer>)intervjueren"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)les\s(?P<out>)høyt\s(?P<the>)(opp)?\s?(?P<options>)(alternativene)?\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)kod\s(?P<all>)alt\s(?P<that>)(som)?\s?(?P<apply>)(passer)?\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)stilles\s(?P<to>)til\s(?P<all>)alle"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Bruk\s(?P<still>)(fortsatt)?\s?(?P<this>)(det|dette)?\s?(?P<card>)kortet\s(?P<andtellwhichopinion>)og\ssi\shvilket\ssyn\s(?P<isclosesttoyours>)du\ser\smest\senig\si"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<lookat>)(Se\s?på)?\s?(?P<use>)Bruk\s(?P<still>)(fortsatt)?\s?(?P<this>)(det|dette)?\s?(?P<again>)(samme|fortsatt)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'påstand' not in text_aux and 'si' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"(?P<scalefrom>)skala\sfra\s(?P<nton>)\d\s?(-|til)\s?\d+"
matches = re.findall(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"(?P<numbersfrom>)tall\sfra\s(?P<nton>)\d\s?(-|til)\s?\d+"
matches = re.findall(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"(?P<nstandsfor>)\d\s(står|star)\sfor\s(?P<anything>).*\s(?P<andnstandsfor>)\d+\s(står|star)\sfor"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"(?P<nstandsfor>)Null\s(står|star)\sfor\s(?P<anything>).*\s(?P<andnstandsfor>)ti\s(står|star)\sfor"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"(?P<wherenmeans>)hvor\s\d\sbetyr\s(?P<anything>).*\s(?P<andnmeans>)og\s\d+\sbetyr"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'fortell meg' not in text_aux and 'plasser deg selv' not in text_aux and 'fortelle' not in text_aux:
return True
regex= r"^(?P<readout>)LES\sOPP\s(?P<options>)(ALTERNATIVENE|kategoriene)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Bruk\s(?P<answer>)kategoriene\s(?P<this>)(på)?\s?(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Vennligst\s(?P<mark>)sett\s(?P<one>)ett\s(?P<case>)kryss"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Vennligst\s(?P<mark>)sett\s(?P<case>)kryss\s(?P<closest>)i ruten nærmest\s(?P<youropinion>)din mening"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<answerby>)Svar ved å\s(?P<using>)bruke\s(?P<this>)dette\s(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Bruk\s(?P<this>)det\s(?P<same>)samme\s(?P<card>)kortet\s(?P<to>)til\s(?P<answer>)å svare"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<select>)Velg\s(?P<one>)et(t)?\s(?P<option>)(svaralternativ|svaralternativene|alternativene|alternativ)\s(?P<from>)fra\s(?P<this>)(det|dette)\s(?P<card>)kortet"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<select>)Velg\s(?P<one>)et(t)?\s(?P<option>)(svaralternativ|svaralternativene|alternativene|alternativ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<newcard>)Her et nytt kort"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
[docs]def instruction_recognition_french(text,country_language):
"""
Recognizes an instruction segment for texts written in French,
based on regex named groups patterns.
Args:
param1 text (string): text (in French) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<programmer>)Programmeur"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)(Enquêteur|Enqueteur)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)(lisez|lire)\s?(?P<outloud>)(haute voix)?\s?(?P<each>)(chaque)?\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)(lisez|lire)\s?(?P<the>)(les)?\s?(?P<each>)(chaque)?\s?(?P<utterances>)PROPOSITION(S)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)CODER\s(?P<all>)TOUTES\s(?P<the>)(les)?\s?(?P<answers>)REPONSES"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)CODER\s(?P<fromthe>)A\sPARTIR\sDE\sLA"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)(CODAGE|CODER)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and text != 'CODER LE SEXE':
return True
regex= r"^(?P<code>)CITE(R|Z)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)NOTE(R|Z)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifnecessary>)AU\sBESOIN\s(?P<chooseaccording>)CHOISIR\sSELON\s(?P<thepriority>)LA\sPRIORITE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)code(z|r)\s(?P<all>)tout(?P<thatapplies>)((ce)? qui s'applique)?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)(demandez|poser)?\s?(?P<all>)(a|á)\stous"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<askifmainactivity>)POSER SI ACTIVITE PRINCIPALE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<attention>)ATTENTION"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(montrer|montrez|LAISSER)\s(?P<again>)(encore|toujours|A NOUVEAU)?\s?(?P<the>)(la)?\s?(?P<card>)(carte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(montrer|montrez|LAISSER)?\s?(?P<again>)(encore|toujours|A NOUVEAU)?\s?(?P<the>)(la)?\s?(?P<card>)(carte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez|Veuillez s'il vous plaît)?(,)?\s?(?P<continue>)(Continuez)?\s?(à|a)?\s?(?P<use>)utiliser\s(?P<this>)cette\s(?P<card>)carte"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez|Je vous prie)?(,)?\s?(?P<use>)(d')?utilise(z|r)\s(?P<thesame>)la même\s(?P<card>)(carte|liste)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît)?(,)?\s?(?P<iaskyouto>)(Je vous prie)?\s?(?P<use>)(d'utiliser|utilisez)\s(?P<this>)cette\s(?P<card>)(carte|liste)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux and 'affirmations' not in text_aux:
return True
regex= r"^(?P<please>)(S'il vous plaît)?(,)?\s?(?P<iaskyouto>)(Je vous prie|Veuillez)?\s?(?P<use>)(d'utiliser|utilise(r|z))\s(?P<again>)(de nouveau|à nouveau|encore|toujours)?\s?(?P<this>)cette\s(?P<same>)(même)?\s?(?P<card>)(carte|liste)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Utilisez\s(?P<thesame>)toujours cette même\s(?P<card>)carte"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux and 'affirmations' not in text_aux:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)(,)?\s(?P<answer>)répondre\s(?P<using>)(en utilisant|A L'AIDE)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)?(,)?\s?(?P<again>)(encore)?\s?(?P<use>)utiliser\s(?P<this>)(cette)?\s?(?P<the>)(la)?\s?(?P<card>)carte"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)note\s(?P<for>)(a|à)\s(?P<theinterviewer>)(l'enqueteur|l'enquetêur|l'enquêteur)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<instruction>)(instruction|note)\s(?P<for>)pour\s(?P<theinterviewer>)(l'enqueteur|l'enquetêur|l'ENQUÊTEUR)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"(?P<codeormark>)(code(r|z)|note(r|z))?\s?(?P<one>)UNE\s(?P<only>)SEULE\s(?P<answer>)(REPONSE|RÉPONSE)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<codeormark>)(code(r|z)|note(r|z))?\s?(?P<only>)SEULEMENT\s(?P<one>)(UNE)?\s?(?P<answer>)(REPONSE|RÉPONSE)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<one>)UNE\s(?P<answer>)(REPONSE|RÉPONSE)\s(?P<forline>)par\sligne"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)Demander\s(?P<thefive>)les\scinq\s(?P<mostimportant>)plus\simportantes"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<show>)Présenter\s(?P<optioncards>)la\scarte\sdes\sopinions"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux:
return True
regex= r"(?P<nequals>)\d\s?=\s?(?P<anything>).*(?P<xequals>)\d+\s?="
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux:
return True
regex= r"(?P<choosetheoption>)vous\schoisissez\sle\s(chiffre|nombre)\s\d+"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux:
return True
regex= r"^(?P<choose>)choix\s(?P<multiple>)multiple\s?(?P<possible>)(possible)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<if>)Si\s(?P<the>)le\s(?P<respondent>)répondant"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)?(,)?\s?(?P<choose>)(choisir|Choisissez)\s(?P<onlyone>)(une|une seule)?\s?(?P<your>)(votre)?\s?(?P<answer>)réponse(s)?\s"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)?(,)?\s?(?P<choose>)(choisir|Choisissez)\s(?P<onlyone>)(une|une seule)?\s?(?P<answer>)(réponse(s)?)?\s?(?P<fromcard>)sur\scette\scarte"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux:
return True
regex= r"^(?P<choose>)choix\s(?P<multiple>)multiple\s(?P<possible>)possible"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<multiple>)(PLUSIEURS|Plusieures|cinq)\s(?P<answers>)(reponses|réponses)\s(?P<possible>)(possibles)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<answers>)(reponses|réponses)\s(?P<multiple>)multiples"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<respondent>)REPONDANT\s(?P<malefemale>)(MASCULIN|FEMININ)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewergiverespondent>)L'ENQUETEUR\sLAISSE\sAU\sREPONDANT"
matches = re.search(regex, text_aux)
if matches:
return True
regex= r"^(?P<enounce>)ENONCEZ\s(?P<one>)UNE\s(?P<organization>)ORGANISATION\s(?P<ata>)A LA\s(?P<time>)FOIS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<you>)VOUS\s(?P<can>)POUVEZ\s(?P<mark>)COCHER\s(?P<multiple>)PLUSIEURS\s(?P<options>)CASES"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<withthehelp>)Toujours à l'aide\s(?P<of>)de\s(?P<this>)cette\s(?P<card>)carte"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and 'dites-moi' not in text_aux and 'affirmations' not in text_aux:
return True
regex= r"^(?P<continueto>)Continuez à\s(?P<use>)utiliser\s(?P<this>)cette\s(?P<card>)carte"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Je vous prie de\s(?P<choose>)choisir\s(?P<youranswer>)votre réponse\s(?P<from>)sur\s(?P<this>)cette\s(?P<card>)carte."
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)?(,)?\s?(?P<mark>)coche(z|r)\s(?P<theoption>)la case\s(?P<that>)(qui)?\s?(?P<represent>)(correspond|correspondante)\s?(?P<better>)(le mieux)?\s?(?P<your>)((à\s)?votre)?\s?(?P<answer>)(réponse(s)?)?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Veuillez\s(?P<choose>)choisir\s(?P<onlyone>)une seule des\s(?P<answer>)réponses\s(?P<from>)figurant sur\s(?P<this>)cette\s(?P<card>)(carte|liste)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Veuillez)?\s?(?P<answer>)(Répondez|répondre)\s(?P<using>)(à l'aide de|au moyen de)\s(?P<this>)cette\s(?P<card>)(carte|liste)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(S'il vous plaît|Veuillez)?(,)?\s?(?P<mark>)Cerclez\s(?P<theoption>)le code\s(?P<correspondto>)correspondant à\s(?P<answer>)votre réponse\s"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)NOTE(R|Z)\s(?P<with>)AVEC\s(?P<asmany>)LE PLUS DE\s(?P<details>)DÉTAILS\s(?P<possible>)POSSIBLES"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)RELANCE(R|Z)?"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<insist>)RELANCE(R|Z)\s(?P<interviewer>)(enquetêur|enqueteur|enquêteur)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<scalefrom>)échelle\sde\s(?P<nton>)\d\s?à\s?\d+\s(?P<wherenmeans>)où\s\d\ssignifie\sque"
matches = re.findall(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"^(?P<usea>)Utilisez\sune\s(?P<scalefrom>)échelle\sde\s(?P<nton>)\d\sà\s\d+(?P<wherenmeans>).*\(\d+\).*\(\d+\)"
matches = re.findall(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<nmeansthat>)\d\ssignifie\s(que|qu'il)?\s?(?P<anything>).*\s(?P<andnmeans>)(et)?\s?\d+\s(signifie)?\s?(que|qu'il)?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<nmeansthat>)\d\ssignifie\s(?P<anything>).*\s(?P<andnmeans>)\d+\ssignifie\s.*(?P<andthenotesinthemiddle>)et les notes intermédiaires permettent de nuancer votre jugement"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<answerusingthiscard>)Répondez s'il vous plaît en utilisant cette carte(?P<anything>).*\s(?P<andnmeans>)et\s\d+\ssignifie"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"^(?P<nmeansthat>)\d\ssignifie\sque"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
regex= r"(?P<giveascoreofn>)donnez\sun\sscore\sde\s\d+"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
[docs]def instruction_recognition_english(text,country_language):
"""
Recognizes an instruction segment for texts written in English,
based on regex named groups patterns.
Args:
param1 text (string): text (in English) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<programmer>)Programmer:"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)Interviewer"
matches = re.search(regex, text, re.IGNORECASE)
if matches and "Interviewer ID" not in text:
return True
regex= r"^(?P<computercode>)COMPUTER\sCODE"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<typeinduration>)TYPE\sIN\sDURATION"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<writeinnumber>)write\sIN\snumber"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<prespecifiedcodes>)(use|to\sbe\scoded\sinto)\s(pre\-specified)?\s?(codes|ISO)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<tobeaskedascountryspecific>)(To|can)\sbe\sasked\sas\sa\scountry\-specific\squestion"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<tobeaskedascountryspecific>)Country\-specific\s(\(question\sand\))?\s?codes"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<chooseuptofive>)Please\schoose\sup\sto\sfive"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<toberecorded>)To\sbe\srecoded\sinto"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(Please)?\s?(?P<choosetheoption>)choose\sthe\soption.*(?P<closesttoyourview>)closest\sto\syour\sview"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<malefemalerespondents>)(ASK\sIF\sRESPONDENT\sIS\sFEMALE|FEMALE\sRESPONDENT(S)?|ASK\sIF\sRESPONDENT\sIS\sMALE|MALE\sRESPONDENT(S)?)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probeifnecessary>)probe\sif\snecessary"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifyouthink>)If\syou\sthink\s(?P<paymentcondition>)((this|your)\s(gross)?\s?pay|your\sincome\sfrom\s|these\sincomes|these\sdifferences)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)INTERVIEWER\s(?P<ifrespondent>)If\sthe\srespondent\s(?P<needadditionalinstructions>)needs\sadditional\sinstructions"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)NOTE\s(?P<numbermustbeexclusive>)THIS\sNUMBER\sMUST\sBE\sEXCLUSIVE\s(?P<toindividualinterviewers>)TO\sINDIVIDUAL\sINTERVIEWERS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<answer>)answer\s(?P<using>)using\s(?P<this>)this\s(?P<card>)card\s(?P<where>)where\s(?P<zero>)0\s(?P<means>)means"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<nmeans>)\d\smeans\s(?P<anything>).*(?P<and>)(and)?\s?(?P<xmeans>)\d+\smeans"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<nmeans>)\d\smeans\s(?P<anything>).*(?P<and>)(and)?\s?(?P<xmeans>)\d+\smeans"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<nmeans>)\d\sis\s(?P<anything>).*(?P<and>)and\s(?P<xmeans>)\d+\sis"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<nmeans>)WHERE\s\d\s?=\s?(?P<anything>).*(?P<and>)and\s(?P<xmeans>)\d+\s?=\s?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<nmeans>)On\sa\sscale\sfrom\s\d\sto\s\d+\s(?P<pleaseindicate>)please\sindicate"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<nmeans>)WHERE\s\d\smeans\s(?P<anything>).*(?P<and>)and\s(?P<xmeans>)\d+"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<give>)give\s(?P<ascoreof>)a\sscore\sof\s(?P<number>)\d+\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(please)?\s?(?P<use>)use\s(?P<this>)this\s(?P<card>)card\s?(?P<again>)(again)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<tell>)tell\s(?P<me>)me\s(?P<on>)on\s(?P<a>)a\s(?P<score>)score"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<again>)(again)?\s?(?P<still>)(still)?\s?(?P<card>)showcard"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<card>)card\s(?P<numberorletter>)(\d+|[a-z])"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(show|still)\s(?P<income>)(income)?\s?(?P<card>)card\s?(?P<again>)(again)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<ask>)ask\s(?P<all>)all"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probe>)probe"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)code\s(?P<one>)(one|under|five)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<prompt>)prompt"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)insist"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)Please\s(?P<use>)use\s(?P<the>)the\s(?P<responses>)responses\s(?P<onthis>)on\sthis\s(?P<card>)card"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<look>)look\s(?P<atthis>)at\sthis\s(?P<card>)card"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<ingrid>)IN GRID\s(?P<collect>)collect\s(?P<details>)details\s(?P<of>)of\s(?P<respondent>)respondent"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<then>)then\s(?P<collect>)collect\s(?P<details>)details\s(?P<of>)of\s(?P<other>)other(s)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)code\s(?P<one>)one\s(?P<answer>)(answer)?\s?(?P<apply>)only"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)code\s(?P<ab>)AN\s(?P<answer>)(answer)?\s?(?P<foreach>)FOR\sEACH"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeorselect>)(code|select)\s(?P<all>)all\s(?P<that>)that\s(?P<apply>)(apply|applies)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)code\s(?P<all>)all"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<eng_gbcodes>)(Northern\sIreland|England\sScotland).*\scodes"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<enterdetails>)NOW\sENTER\sDETAILS\sFOR\sTHE\sOLDEST"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)read\s(?P<out>)out\s?(?P<each>)(each)?\s?(?P<statement>)(statement)?\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<still>)(still)?\s?(?P<use>)use\s(?P<this>)(this)?\s?(?P<the>)(the)?\s?(?P<same>)(same)?\s?(?P<card>)card"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<choose>)choose\s(?P<oneor>)(one|your|an)\s(?P<answer>)answer\s(?P<infrom>)(in|from)\s(?P<this>)this\s(?P<same>)(same)?\s?(?P<card>)card"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<prompt>)prompt\s(?P<inrelation>)in relation\s(?P<to>)to\s(?P<precodes>)precodes"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<select>)select\s(?P<only>)only\s(?P<one>)one"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<tick>)tick\s(?P<oneorthe>)(one|the)?\s?(?P<box>)box"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(please)?\s?(?P<choose>)choose\s(?P<one>)(one)?\s?(?P<your>)(your)?\s?(?P<answer>)answer\s(?P<from>)from\s(?P<card>)(card|the options)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)note\s(?P<forthe>)(for the)?\s?(?P<to>)(to)?\s?(?P<interviewer>)interviewer"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<if>)If\s(?P<respondent>)respondent"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ASK\s(?P<if>)If\s(?P<partner>)PARTNER\s(?P<in>)in\s(?P<paid>)paid\s(?P<work>)work"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ASK\s(?P<if>)If\s(?P<father>)father\s(?P<employed>)employed"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ASK\s(?P<if>)If\s(?P<father>)father\s(?P<working>)working"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ASK\s(?P<if>)If\s(?P<mother>)mother\s(?P<employed>)employed"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)ASK\s(?P<if>)If\s(?P<mother>)mother\s(?P<working>)working"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
return False
[docs]def instruction_recognition_czech(text,country_language):
"""
Recognizes an instruction segment for texts written in Czech,
based on regex named groups patterns.
Args:
param1 text (string): text (in Czech) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<code>)(KÓDUJE|KODUJE|zkontroluje)?\s?(?P<interviewer>)(tazatele|taz)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)(tazatele|taz)\s?(?P<code>)(KÓDUJE|KODUJE|zkontroluje)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<filter>)FILTR:"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"(?P<ask>)(PTEJTE SE)?\s?(?P<if>)(POKUD)?\s?(?P<the>)(JE)?\s?(?P<respondent>)(RESPONDENT(I)?|RESPONDENTKA|RESPONDENTŮ)"
matches = re.search(regex, text)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<ifhasno>)POKUD NEMÁ ŽÁDNÉHO"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<readout>)PŘEDČÍTEJTE"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<codeall>)KODUJTE\sVŠECHNY"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<chooseuptofive>)Vyznačte\spouze\spět"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<selectuptofive>)Prosím\svyberte\snejvýše\spět"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<registerestimations>)ZAZNAMENEJTE\sI\sODHADY"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<roundup>)ZAOKROUHLETE"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<mainwork>)HLAVNÍ PRÁCE"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<record>)ZAZNAMENEJTE"
matches = re.search(regex, text)
if matches and text != 'ZAZNAMENEJTE POHLAVÍ':
return True
regex= r"^(?P<askrespondent>)POŽÁDEJTE RESPONDENTA"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<respondent>)RESPONDENT(I)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<roundtofullhour>)ZAOKROUHLETE NA CELÉ"
matches = re.search(regex, text)
if matches:
return True
regex= r"(?P<includeadoptions>)ZAHRNUJTE\sADOPCE"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<ask>)PTEJTE\s(?P<all>)SE VŠECH\s(?P<employees>)ZAMĚSTANCŮ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<markwiththiscard>)S\spomocí\stéto\skarty\sprosím\soznačte"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<now>)(A nyní|Nyní)?\s?(?P<use>)(Použijte|použijte)\s(?P<again>)(znovu)?\s?(?P<graduate>)(stupnici)?\s?(?P<this>)(tuto|této)?\s?(?P<card>)kart(u|a|y)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<toanswer>)(K odpovědi|Při odpovědích)\s(?P<use>)(použijte|pouţijte)\s(?P<please>)(prosím)?\s?(?P<this>)tuto\s(?P<card>)kart(u|a|y)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<please>)prosím\s(?P<use>)užijte\s(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<instruction>)Pokyn"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<prompt>)ZJIŠŤUJE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(PŘEDLOŽTE|PŘEDLOŢTE)?\s?(?P<still>)(STÁLE JEŠTĚ)?\s?(?P<again>)(OPĚT)?\s?(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)poznámka\s(?P<for>)pro\s(?P<interviewer>)tazatele"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<respondent>)(RESPONDENKY|RESPONDENTI)\s(?P<malefemale>)(ŽENY|MUŽI)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<ineachline>)(V|NA)?\s?(KAŽDÉM ŘÁDKU)?\s?(?P<possible>)(MOŽNÁ|MOŢNÁ)\s(?P<only>)POUZE\s(?P<one>)JEDNA\s(?P<answer>)ODPOVĚĎ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<ineachline>)(V|NA)\sKAŽDÉM\sŘÁDKU\s(?P<choose>)VYBERTE\s(?P<one>)JEDNU\s(?P<answer>)ODPOVĚĎ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<possible>)(MOŽNÁ|MOŢNÁ)?\s?(?P<only>)(POUZE|JE POUZE)\s(?P<one>)(JEDNA|JEDINÁ)\s(?P<answer>)ODPOVĚĎ"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<probe>)SONDUJTE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)(ptejte|Přejte)\s(?P<all>)se všech"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<dontread>)NEČTĚTE\s(?P<only>)POUZE\s(?P<code>)ZAKÓDUJTE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<look>)Prohlédněte\s(?P<at>)si\s(?P<this>)(tuto)?\s?(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<read>)přečtěte\s(?P<category>)varianty\s(?P<answers>)odpovědí"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)VYBERTE\s(?P<please>)(prosím)?\s?(?P<only>)(POUZE|jen)\s(?P<one>)JEDNU\s(?P<answer>)(ODPOVĚĎ)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)(ČTĚTE|ČTETE)\s(?P<out>)NAHLAS\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<prompt>)ZJIŠŤUJTE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)(ČTĚTE|ČTETE|PŘEČTĚTE)\s(?P<out>)NAHLAS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<again>)(opět|STÁLE)\s(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(prosím)?\s?(?P<again>)(opět)?\s?(?P<use>)(Použijete|použijte|pouţijte)\s(?P<this>)(tuto)?\s?(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<use>)(Použijete|Použijte|Pouţijte)\s(?P<please>)(prosím)?\s?(?P<same>)(stejnou)?\s?(?P<this>)(tuto)?\s?\s(?P<card>)kart(u|a)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<chooseyour>)Vyberte svou\s(?P<answer>)odpověď\s(?P<accordingto>)podle této\s(?P<card>)karty"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Vyberte(,)?\s(?P<please>)prosím(,)?\s(?P<youranswer>)svou\sodpověď\s(?P<accordingto>)(podle|z)\stéto\s(?P<card>)karty"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"(?P<choose>)Vyberte\s(?P<please>)(prosím)?\s?(?P<one>)(jen jednu|jednu)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<choose>)Pokud\ssi\smyslíte\s(?P<thatyourthis>)že\s(je|jsou)\s(vaše|váš|tyto)\s(?P<income>)(mzda|příjem|příjmy|rozdíly)"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<please>)(prosím)?\s?(?P<choose>)Vyberte\s(?P<one>)(jen jednu|jednu)?\s?(?P<answer>)odpověď\s(?P<fromthis>)(z|na)\smožností"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<choose>)Vyberte\s(?P<please>)(prosím)?\s?(?P<one>)(jen jednu|jednu)?\s?(?P<answer>)odpověď\s(?P<fromthis>)(z|na)\stéto\s(?P<card>)(karty|kartě)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<youranswer>)Svou\sodpověď\s(?P<please>)(prosím)?\s?(?P<choose>)Vyberte\s(?P<fromthis>)(z|na)\stéto\s(?P<card>)(karty|kartě)"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<choose>)Vyberte\s(?P<please>)prosím\s?(?P<one>)(jen jednu|jednu)\s(?P<answer>)odpověď\s?(?P<fromthis>)(z této)?\s?(?P<card>)(karty|kartě)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"^(?P<choose>)Vyberte\s(?P<please>)(prosím)?\s?(?P<only>)jen\s(?P<one>)jednu\s(?P<option>)(možnost)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Vyberte\s(?P<all>)všechny"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<toanswer>)Při odpovědích\s(?P<use>)použijte\s(?P<this>)tuto\s(?P<card>)kart(u|a|y)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<wherenmeans>)\d+\sznamená\s(?P<anything>).*\s(?P<andnmeans>)(a)?\s?\d+\s(znamená)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE):
return True
regex= r"(?P<wherenmeans>)(kde)?\s?\d\sje\s(?P<anything>).*\s(?P<andnmeans>)(a)?\s?\d+\s(je)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE) and 'měsíčně' not in text:
return True
regex= r"(?P<wherenmeans>)(kde)?\s?\d\s(?P<anything>).*\s(?P<andnmeans>)(a)?\s?\d+\s(je)?"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux and not re.findall('řekněte', text_aux, re.IGNORECASE) and 'měsíčně' not in text:
return True
regex= r"(?P<givenscore>)dejte\sjí\s\d+\sbodů"
matches = re.findall(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and ':' not in text_aux and '...' not in text_aux:
return True
return False
[docs]def instruction_recognition_portuguese(text,country_language):
"""
Recognizes an instruction segment for texts written in Portuguese,
based on regex named groups patterns.
Args:
param1 text (string): text (in Portuguese) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<programador>)programador"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)entrevistador"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<card>)(cartão|lista)\s(?P<numberorletter>)(\d+|\w+)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)ler\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<all>)TODOS\s(?P<except>)MENOS\sOS\s(?P<single>)SOLTEIROS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<prompt>)APROFUNDAR"
matches = re.search(regex, text)
if matches:
return True
regex= r"^(?P<respondent>)INQUIRIDOS\s(?P<sex>)DO\sSEXO\s(?P<one>)(FEMININO|masculino)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<part>)parte\s(?P<lower>)inferior\sd(o|a)\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)ASSINALAR\s(?P<only>)APENAS\s(?P<one>)UM(A)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<then>)DEPOIS\s(?P<repeat>)REPETIR\s(?P<forthe>)PARA\sA\s(?P<institution>)INSTITUIÇÃO"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)CODIFICAR\s(?P<only>)APENAS\s(?P<one>)UMA"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)CODIFICAR\s(?P<only>)TOD(O|A)S"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifyouthink>)Se\s(considera|acha)\s(?P<thatyourthis>)que\s(o\sseu|ess(e|a)(s)?)\s(?P<income>)(rendimento|diferenças)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<answer>)Responda\s(?P<please>)(por favor)?\s?(?P<using>)(utilizando|com)\s(?P<theorthis>)(o|a|um|uma|esta|este)\s(?P<same>)(mesmo|mesma)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)utilize\s(?P<please>)(por favor)?\s?(?P<thesame>)(o\smesmo|a\smesma)\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<choose>)Escolha\s(?P<youranswer>)a sua resposta\s(?P<fromthe>)(a\spartir\sd(o|a))?\s?(?P<inthis>)(neste)?\s?(?P<following>)(seguinte)?\s?(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<use>)(utilize|use)\s(?P<youranswer>)as\srespostas\s(?P<fromthe>)dest(a|e)\s(?P<following>)(seguinte)?\s?(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<forthis>)Para\sisso\s(?P<youwilluse>)(utilizará|utilize)\s(?P<please>)(por favor)?\s?(?P<thefollowing>)(o|a)\sseguinte\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(mostrar|manter)\s(?P<again>)(novamente)?\s?(?P<the>)(a|o)?\s?(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)(mostrando|utilizando)\s(?P<again>)ainda\s(?P<this>)este\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<use>)(utilize|mostrar|use)\s(?P<this>)(este|est(e|a)\smesmo|(o|a)\smesm(o|a))?\s?(?P<card>)(cartão|lista)\s?(?P<again>)(novamente)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<use>)utilize\s(?P<this>)es(te|ta|sa|se)\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)(utilize\s|use\s)(?P<please>)(por\sfavor)?\s?(?P<this>)es(te|ta|sa|se)\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)utilize\s(?P<thesame>)(o|a)\smesm(o|a)(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<continue>)continue\s(?P<show>)(mostrando|utilizando)\s(?P<this>)(este|este mesmo)?\s?(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<suggest>)sugerir\s(?P<categories>)categorias\s(?P<de>)de\s(?P<answer>)resposta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<choose>)escolha\s(?P<the>)(a)?\s?(?P<youranswer>)resposta(s)?\s(?P<fromthis>)(a\spartir\s)?(deste|nest(e|a))\s(?P<card>)(cartão|lista)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)escolha\s(?P<please>)(por favor)?\s?(?P<youranswer>)a\ssua\sresposta\s(?P<fromthis>)(a\spartir\s)?deste\s(?P<card>)cartão"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)escolha\s(?P<please>)(por favor)?\s?(?P<amongoptions>)entre\sas\sopções"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)(nota)?\s?(?P<for>)(para|ao)?\s?(?P<interviewer>)entrevistador\s?(?P<code>)(codifica(r)?)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<choose>)escolha\s(?P<option>)a afirmação\s(?P<closer>)que mais se aproxima da\s(?P<youropinion>)sua opinião"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)ler\s(?P<one>)uma\s(?P<organization>)organização\s(?P<ateachtime>)de cada vez"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<scalefrom>)escala\sde\s(?P<nton>)\d\sa\s\d+\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and '10,' not in text_aux:
return True
regex= r"(?P<thenmeans>)\d\ssignifica\s(que)?\s?(?P<anything>).*\s(?P<andthenmeans>)(de|e)\s(o)?\s?\d+"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<thenmeans>)(em)?\s?que\d\ssignifica\s(?P<anything>).*\s(?P<andthenmeans>)(de|e)\s(o)?\s?\d+\s?(significa)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<inascalewherenmeans>)Responda\spor\sfavor\snuma\sescala"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<coose>)escolha\s(?P<please>)(por favor)?\s?(?P<amongoptions>)entre\sas\sopções"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<givenpoints>)dê\s\d+\spontos\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<notethenumber>)EXPLICITAR\sO\sNÚMERO\s(?P<ofhoursandminutes>)DE\sHORAS\sE\sMINUTOS\b"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)ler\s(?P<slowly>)(pausadamente)?\s?(?P<outloud>)(em voz alta)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Escolha\s(?P<oneofthe>)uma das\s(?P<following>)(seguintes)?\s?(?P<answers>)respostas"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux:
return True
regex= r"^(?P<choose>)Indique\s(?P<onlyone>)só\suma\s(?P<following>)hipótese"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux:
return True
regex= r"^(?P<askthe>)PEDIR AO\s(?P<respondent>)ENTREVISTADO\s(?P<adescription>)UMA DESCRIÇÃO"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<code>)(codificar|assinalar|registr(ar|e))\s(?P<all>)(todas|todos)\s?(?P<the>)(as)?\s?(?P<that>)(que\sse\s)?(?P<apply>)aplicam?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<codeormark>)(codificar|assinalar|registr(ar|e))\s(?P<only>)(só|apenas)?\s?(?P<one>)uma\s(?P<answer>)(resposta)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)Use\s(?P<please>)por\sfavor(?P<thiscale>)esta\sescala\s(?P<toanswer>)para\sresponder\s"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux:
return True
regex= r"^(?P<describe>)descrever\s(?P<details>)detalhadamente"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)perguntar a\s(?P<all>)todos"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)REGISTR(E|AR)\s(?P<anything>)(UMA|CINCO|NA|TOD(O|A)S)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)insistir"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
return False
[docs]def instruction_recognition_catalan_spanish(text,country_language):
"""
Recognizes an instruction segment for texts written either in Spanish or Catalan,
based on regex named groups patterns.
Args:
param1 text (string): text (in Spanish or Catalan) currently being analyzed.
param2 country_language (string): country_language metadata embedded in file name.
Returns:
True if the segment is an instruction or False if it is not.
"""
text_aux = text
text = text.translate(str.maketrans(' ', ' ', string.punctuation))
regex= r"^(?P<programador>)programador"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)(entrevistador|ENTREVISTADOR/A)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
if 'CAT' in country_language:
regex= r"^(?P<typeinduration>)ANOTAR\sDURADA"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)Preguntar:"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<attentioninterviewer>)ATENCIÓ\sENTREVISTADOR(/A)?:"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<continue>)(continuï|continueu|continuar|seguir)?\s?(?P<show>)mostr(ar|eu|ant|ando)\s(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<versionfor>)VERSIÓ\sPER\sA\s(?P<respondent>)ENTREVISTATS\s(DONES|HOMES)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<malefemalerespondents>)(SOLO|SÓLO)\s(PARA|A)\sENTREVISTAD(A|O)S\s?(MUJERES|HOMBRES)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<spontaneousanswer>)RESPOSTA\s(ESPONTÁNIA|ESPONTÀNIA)\s(?P<dontread>)NO\sLLEGIR"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<multipleanswer>)RESPOSTA\sM(Ú|U)LTIPLE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<still>)(encara)?\s?(?P<card>)targeta\s(\d+|\w+)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<continue>)seguir\s(?P<with>)amb\s(?P<the>)la\s(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<continue>)seguir\s(?P<showing>)MOSTRANT\s(?P<the>)(la)?\s?(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<use>)(Utilitzi)?\s?(?P<this>)aquesta\s(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<ifyouthink>)Si\s(creu|pensa)\sque\s(?P<thispayis>)(la\sseva\sremuneració|els\sseus\singressos|aquests\singressos|aquestes\sdiferències)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<important>)IMPORTANT:\s(?P<interviewer>)Entrevistador(/a)?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<interviewer>)ENTREVISTADOR(/A)?:\s(?P<ifrespondent>)Si\sl'entrevistat\s(?P<needadditionalinstructions>)necessita\sinstruccions\saddicionals"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)FER\s(?P<if>)SI\s(?P<interviewer>)L'ENTREVISTADOR\s(?P<codified>)HA CODIFICAT"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(si\sus\splau)?\s?(?P<use>)(utili(tzi|zi|tzeu|zeu)|segueixi\sutilitzant|continu(ï|eu)\sutilitzant)\s(?P<this>)(aquesta)?\s?(?P<the>)(la)?\s?(?P<same>)(mateixa)?\s?(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?\s?(?P<answer>)respongui\s(?P<using>)utilitzant\s(?P<this>)aquesta\s(?P<same>)(mateixa)?\s?(?P<card>)targeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<suggest>)suggerir\s(?P<categories>)categories\s(?P<de>)de\s(?P<answer>)resposta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(si us plau)?(\,)?\s?(?P<choose>)encercli\s(?P<one>)una\s(?P<option>)opció"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(si us plau)?\s?(?P<choose>)encercli\s(?P<option>)l'opció\s(?P<closer>)més propera"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<scalefrom>)escala\s(del|de)\s(?P<nton>)\d\s(al|a)\s\d+\s"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'expressi' not in text:
return True
regex= r"(?P<scalefrom>)triï\sun\snúmero\sde\s(?P<nton>)\d\sa\s\d+\s(?P<where>)on\s(el)?\s?(?P<nmeans>)\d\sés"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'expressi' not in text:
return True
regex= r"(?P<thenmeans>)el\s\d\svol\sdir\s(?P<anything>).*\s(?P<andthenmeans>)i\s(el)?\s?\d+\s(vol\sdir)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'expressi' not in text:
return True
regex= r"(?P<thenmeans>)\d\ssignifica\s(que)?\s?(?P<anything>).*\s(?P<andthenmeans>)i\s(el)?\s?\d+\ssignifica\s(que)?\s?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'expressi' not in text:
return True
regex= r"^(?P<please>)(si us plau)?(,)?\s?(?P<choose>)(esculli|elegeixi|Triï)\s(?P<the>)(la)?\s?(?P<your>)(seva)?\s?(?P<answer>)resposta\s(?P<fromthis>)(d'aquesta|en\saquesta)\s(?P<card>)targeta"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"(?P<choose>)(esculli|elegeixi|Triï)\s(?P<theoption>)l'opció\s(?P<fromthis>)d'aquesta\s(?P<card>)targeta"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?(,)?\s?(?P<choose>)(esculli|elegeixi|Triï)\s(?P<the>)(la)?\s?(?P<your>)(seva)?\s?(?P<answer>)resposta\s(?P<amongoptions>)(entre|d'entre)\sles\sopcions"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?(,)?\s?(?P<choose>)(esculli|elegeixi|Triï)\s(?P<your>)(la seva)?\s?(?P<one>)(una|la)?\s?(?P<ofthe>)(de les)?\s?(?P<answer>)(resposta|respostes)\s(?P<thatappearin>)(que apareixen en)?\s?(?P<fromthis>)(aquesta|d'aquesta)\s(?P<card>)targeta"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?(,)?\s?(?P<choose>)(esculli|elegeixi|Triï)\s(?P<only>)només\s(?P<one>)una\s(?P<fromthis>)d'aquestes"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?\s?(?P<use>)Utilitzi\s(?P<thiscard>)aquesta\s(tarjeta|targeta)\s(?P<toguide>)com\sa\sguia\sper\sa\s(?P<youranswer>)la\sseva\sresposta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?\s?(?P<note>)(anotar|codificar)\s(?P<only>)només\s(?P<one>)una\s(?P<answer>)resposta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<please>)(si us plau)?(,)\s?(?P<choose>)miri de triar\s(?P<the>)la\s(?P<answer>)resposta\s(?P<fromthis>)d'aquesta\s(?P<card>)targeta"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux:
return True
regex= r"^(?P<dont>)(no)?\s?(?P<read>)llegi(r|u)\s?(?P<out>)(alta)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<noteallnecessarypeople>)Anoti\stotes\sles\spersones\sque\scalgui"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<noteallnecessarypeople>)MARCAR\sTOTES\sLES\sRESPOSTES\sQUE\sDONI"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<forall>)(OMPLIR\sPER\s)?A\sTOTES\sLES\sPERSONES"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)llegi(r|u)\s(?P<each>)cada\s(?P<utterance>)afirmació"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readorcode>)(llegir|codificar)\s(?P<each>)cada\s(?P<organization>)organització"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)nota\s(?P<for>)(per|per a)\s(?P<interviewer>)(l'entrevistador|entrevistador)\s?(?P<code>)(codificar)?"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"(?P<markorcode>)(marqueu|marcar|codificar)\s(?P<all>)tot(s|es)\s(?P<thepeople>)(les persones)?\s?(?P<theanswers>)(les respostes)?\s?(?P<that>)(el|els)?\s?que\s(?P<apply>)(corresponguin|calgui)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)anoteu\s(?P<with>)amb\s(?P<all>)tots\s(?P<details>)detalls"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)(preguntar)?\s?a\s(?P<all>)(tothom|tots)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<if>)si\s(?P<therespondent>)l'entrevistat"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)insisti(u|r)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
elif 'SPA' in country_language:
regex= r"(?P<malefemalerespondents>)(SOLO|SÓLO|VERSIÓN\sPARA)\s(PARA|A)\sENTREVISTAD(A|O)S\s?(MUJERES|HOMBRES)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<malefemalerespondents>)ENTREVISTAD(A|O)S\s?(MUJERES|HOMBRES)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<card>)tarjeta\s(?P<numberorletter>)(\d+|\w+)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<continue>)(seguir|siga)\s(con la|mostrando|con)\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<writewith>)ESCRIBIR\sCON\s(?P<details>)DETALLE(s)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<mark>)(CIRCUNDAR|marcar|ANOTAR)\s(?P<all>)(SOLO|TODOS|UNA|BAJO|cinco)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)preguntar:"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<show>)mostrar\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<attentioninterviewer>)ATENCIÓN\sENTREVISTADOR(/A)?:"
matches = re.search(regex, text_aux, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<answer>)RESPUESTA\s(?P<multiple>)(MÚLTIPLE|MULTIPLE)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)ELEGIR\s(?P<only>)SOLO\s(?P<one>)UNA"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<answer>)RESPUESTA\s(?P<spontaneous>)(ESPONTÁNEA|ESPONTANEA)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<askall>)A\sTODAS\sLAS\sPERSONAS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<fillforall>)RELLENAR\sPARA\sTODAS\sLAS\sPERSONAS\sENTREVISTADAS"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<if>)si\s(?P<the>)el\s(?P<respondent>)(encuestado|entrevistado)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<choose>)Elija\s(?P<an>)una\s(?P<answer>)respuesta\s(?P<fromthe>)de las que aparecen en esta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<answer>)responda\s(?P<using>)utilizando\s(?P<this>)esta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<continueusing>)(siga utilizando|siga usando|use otra vez|use)\s(?P<same>)(la misma)?\s?(?P<this>)(esta)?\s?(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ifyouthink>)Si\s(cree|piensa)\sque\s(?P<thispayis>)(su\sremuneración|sus\singresos|las\sdiferencias)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<now>)Ahora\s(?P<idlikeyouto>)querría\sque\s(?P<lookatthis>)examinase\sesta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<use>)(utilice|use)\s(?P<again>)(otra vez)?\s?(?P<please>)(por favor)?\s?(?P<theanswers>)las\srespuestas\s(?P<ofthis>)de\sesta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<use>)(utilice|use)\s(?P<again>)(otra vez)?\s?(?P<this>)esta\s(?P<card>)tarjeta(?P<please>)(por favor)?\s?(?P<toindicate>)para\sindicar(lo)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<use>)(utilice|use)\s(?P<toanswer>)para\scontestar"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<use>)(utilice|use)\s(?P<again>)(otra vez)?\s?(?P<this>)(esta)?\s?(?P<thesame>)(la misma)?\s?(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<use>)(utilice|use)\s(?P<again>)(otra vez)?\s?(?P<please>)(por favor)?\s?(?P<this>)(esta)?\s?(?P<thesame>)(la misma)?\s?(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<choose>)elija\s(?P<oneofthe>)una de las\s(?P<answers>)respuestas\s(?P<that>)que\s(?P<appearinthis>)aparecen en esta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<suggest>)sugerir\s(?P<categories>)categorías\s(?P<de>)de\s(?P<answer>)respuesta"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<look>)examinase(?P<this>)esta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"(?P<answers>)respuestas\s(?P<thatshow>)que\saparecen\s(?P<inthis>)en\sesta\s(?P<card>)tarjeta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<choose>)(escoja|elija)\s(?P<youranswer>)(su|una)\srespuesta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<choose>)(escoja|elija)\s(?P<please>)(por favor)?\s?(?P<upto>)hasta"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<mark>)marque\s(?P<one>)una\s(?P<option>)casilla"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"(?P<scalefrom>)escala\sde\s(?P<nton>)\d\sa\s\d+\s(?P<where>)en\sla\sque\s(?P<nmeans>)\d\ssignifica"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"(?P<scalefrom>)número\sde\s(?P<nton>)\d\sa\s\d+\s(?P<where>)donde\s(?P<nmeans>)\d\ses"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"(?P<thenmeans>)\d\s(significa\sque|quiere\sdecir)\s(?P<anything>).*\s(?P<andthenmeans>)(y)?\s?(el)?\s?\d+\s(significa\sque|quiere\sdecir|és|es)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"(?P<thenmeans>)\d\ssignifica\s(?P<anything>).*\s(?P<andthenmeans>)y\s(el)?\s?\d+\s(significa)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<note>)nota\s(?P<for>)(per|para)\s(?P<interviewer>)el entrevistador\s?(?P<code>)(codificar)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<mark>)marque\s(?P<option>)la casilla\s(?P<closer>)que mejor represente\s(?P<youropinion>)su opinión"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<readorcode>)(leer|codificar)\s(?P<each>)cada\s(?P<organization>)organización"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)ANOT(AR|E)\s(?P<with>)CON\s(?P<all>)TODO\s(?P<details>)DETALLE"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<note>)(ANOT(AR|E))?\s?(?P<onlyone>)(UNA\sSOLA|SÓLO\sUNA|SOLO\sUNA)\s(?P<answer>)RESPUESTA"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<dont>)(no)?\s?(?P<read>)leer\s?(?P<out>)(alto)?"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)leer\s(?P<eachone>)una a una\s(?P<and>)(y)?\s?(?P<note>)anotar"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<read>)(leer|LEA)\s(?P<eachofthe>)CADA\sUNA\sDE\sLAS\s(?P<statements>)AFIRMACIONES"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<ask>)(preguntar)?\s?a\s(?P<all>)todos"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<insist>)insistir"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<select>)elija\s(?P<onlyone>)sólo\suna"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<please>)(por favor)?\s?(?P<select>)elija\s(?P<theoption>)la\sopción"
matches = re.search(regex, text, re.IGNORECASE)
if matches and '?' not in text_aux and '...' not in text_aux and ':' not in text_aux and 'para indicar' not in text_aux and 'afirmaciones' not in text_aux:
return True
regex= r"^(?P<markorcode>)(Marcar|codificar)\s(?P<all>)tod(a|o)s\s(l(a|o)s)?\s?(RESPUESTAS)?\s?(?P<that>)que\s(se)?\s?(?P<apply>)(corresponda(n)?|MENCIONE(n)?)"
matches = re.search(regex, text, re.IGNORECASE)
if matches:
return True
return False