CWB
|
#include "parse_actions.h"
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdarg.h>
#include <unistd.h>
#include "../cl/globals.h"
#include "../cl/special-chars.h"
#include "../cl/attributes.h"
#include "../cl/macros.h"
#include "cqp.h"
#include "options.h"
#include "ranges.h"
#include "symtab.h"
#include "treemacros.h"
#include "tree.h"
#include "eval.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "builtins.h"
#include "groups.h"
#include "targets.h"
#include "attlist.h"
#include "concordance.h"
#include "output.h"
#include "print-modes.h"
#include "variables.h"
Functions | |
char * | mval_string_conversion (char *s) |
void | addHistoryLine (void) |
Add a line of CQP input to the history file. More... | |
void | resetQueryBuffer (void) |
Empties the query buffer and sets to 0 the pointer. More... | |
void | RaiseError (void) |
void | prepare_parse (void) |
CorpusList * | in_CorpusCommand (char *id, CorpusList *cl) |
void | after_CorpusCommand (CorpusList *cl) |
Set the current corpus and do the output if it was a query. More... | |
CorpusList * | in_UnnamedCorpusCommand (CorpusList *cl) |
This function is called after an UnnamedCorpusCommand rule is parsed. More... | |
CorpusList * | ActivateCorpus (CorpusList *cl) |
CorpusList * | after_CorpusSetExpr (CorpusList *cl) |
void | prepare_Query () |
This function sets things up to run a query. More... | |
CorpusList * | after_Query (CorpusList *cl) |
void | do_cat (CorpusList *cl, struct Redir *r, int first, int last) |
void | do_save (CorpusList *cl, struct Redir *r) |
void | do_attribute_show (char *name, int status) |
CorpusList * | do_translate (CorpusList *source, char *target_name) |
CorpusList * | do_setop (RangeSetOp op, CorpusList *c1, CorpusList *c2) |
void | prepare_do_subset (CorpusList *cl, FieldType field) |
CorpusList * | do_subset (FieldType field, Constrainttree boolt) |
void | do_set_target (CorpusList *cl, FieldType goal, FieldType source) |
void | do_set_complex_target (CorpusList *cl, FieldType field_to_set, SearchStrategy strategy, Constrainttree boolt, enum ctxtdir direction, int number, char *id, FieldType field, int inclusive) |
void | do_sleep (int duration) |
Puts the program to sleep. More... | |
void | do_exec (char *fname) |
Execute the commands contained within a specified text file. More... | |
void | do_delete_lines_num (CorpusList *cl, int start, int end) |
void | do_delete_lines (CorpusList *cl, FieldType f, int mode) |
void | do_reduce (CorpusList *cl, int number, int percent) |
void | do_cut (CorpusList *cl, int first, int last) |
void | do_info (CorpusList *cl) |
void | do_group (CorpusList *cl, FieldType target, int target_offset, char *t_att, FieldType source, int source_offset, char *s_att, int cut, int expand, int is_grouped, struct Redir *redir) |
void | do_group2 (CorpusList *cl, FieldType target, int target_offset, char *t_att, int cut, int expand, struct Redir *r) |
Like do_group, but with no source. More... | |
CorpusList * | do_StandardQuery (int cut_value, int keep_flag) |
CorpusList * | do_MUQuery (Evaltree evalt, int keep_flag, int cut_value) |
void | do_SearchPattern (Evaltree expr, Constrainttree constraint) |
Evaltree | reg_disj (Evaltree left, Evaltree right) |
Evaltree | reg_seq (Evaltree left, Evaltree right) |
int | do_AnchorPoint (FieldType field, int is_closing) |
int | do_XMLTag (char *s_name, int is_closing, int op, char *regex, int flags) |
int | do_NamedWfPattern (int is_target, char *label, int pat_idx) |
int | do_WordformPattern (Constrainttree boolt, int lookahead) |
Constrainttree | OptimizeStringConstraint (Constrainttree left, enum b_ops op, Constrainttree right) |
Constrainttree | do_StringConstraint (char *s, int flags) |
Constrainttree | Varref2IDList (Attribute *attr, enum b_ops op, char *varName) |
Constrainttree | do_SimpleVariableReference (char *varName) |
void | prepare_AlignmentConstraints (char *id) |
Constrainttree | bool_or (Constrainttree left, Constrainttree right) |
Constrainttree | bool_implies (Constrainttree left, Constrainttree right) |
Constrainttree | bool_and (Constrainttree left, Constrainttree right) |
Constrainttree | bool_not (Constrainttree left) |
Constrainttree | do_RelExpr (Constrainttree left, enum b_ops op, Constrainttree right) |
Constrainttree | do_RelExExpr (Constrainttree left) |
Constrainttree | do_LabelReference (char *label_name, int auto_delete) |
Constrainttree | do_IDReference (char *id_name, int auto_delete) |
Constrainttree | do_flagged_re_variable (char *varname, int flags) |
Implements expansion of a variable within the RE() operator. More... | |
Constrainttree | do_flagged_string (char *s, int flags) |
Constrainttree | do_mval_string (char *s, int op, int flags) |
Constrainttree | FunctionCall (char *f_name, ActualParamList *apl) |
void | do_Description (Context *context, int nr, char *name) |
Evaltree | do_MeetStatement (Evaltree left, Evaltree right, Context *context) |
Evaltree | do_UnionStatement (Evaltree left, Evaltree right) |
void | do_StructuralContext (Context *context, char *name) |
CorpusList * | do_TABQuery (Evaltree patterns) |
Evaltree | make_first_tabular_pattern (int pattern_index, Evaltree next) |
Evaltree | add_tabular_pattern (Evaltree patterns, Context *context, int pattern_index) |
void | do_OptDistance (Context *context, int l_bound, int u_bound) |
void | printSingleVariableValue (Variable v, int max_items) |
Prints the setting of a single Variable as an indented list. More... | |
void | do_PrintAllVariables () |
void | do_PrintVariableValue (char *varName) |
void | do_printVariableSize (char *varName) |
void | do_SetVariableValue (char *varName, char operator, char *varValues) |
void | do_AddSubVariables (char *var1Name, int add, char *var2Name) |
void | prepare_input (void) |
Get ready to parse a command. More... | |
void | expand_dataspace (CorpusList *cl) |
Expand the dataspace of a subcorpus. More... | |
void | push_regchr (char c) |
Add a character (in the sense of a byte) to the regex_string buffer. More... | |
void | debug_output (void) |
Prints out all the existing EvalEnvironments in the global array. More... | |
void | do_start_timer (void) |
Starts the timer running. More... | |
void | do_timing (char *msg) |
Shows the period since the timer started running. More... | |
void | do_size (CorpusList *cl, FieldType field) |
void | do_dump (CorpusList *cl, int first, int last, struct Redir *rd) |
Dump query result (or part of it) as TAB-delimited table of corpus positions. More... | |
int | do_undump (char *corpname, int extension_fields, int sort_ranges, struct InputRedir *rd) |
read TAB-delimited table of corpus positions and create named query result from it. More... | |
Variables | |
int | generate_code |
TODO would be very useful to have a desc for this. More... | |
int | within_gc |
TODO would be very useful to have a desc for this ; seems to be about whether or not we are within a global constraint. More... | |
CYCtype | last_cyc |
type of last corpus yielding command More... | |
CorpusList * | query_corpus = NULL |
The corpus (or subcorpus) which is "active" in the sense that the query will be executed within it. More... | |
CorpusList * | old_query_corpus = NULL |
Used for preserving former values of query_corpus (. More... | |
int | catch_unknown_ids = 0 |
Context | expansion |
This is used by the parser in response to CQP's "expand" operator, which incorporates context around the query hit into the match itself. More... | |
char | regex_string [CL_MAX_LINE_LENGTH] |
Buffer for storing regex strings. More... | |
int | regex_string_pos |
index into the regex string buffer, storing a current position. More... | |
int | sslen |
length of search string: is written to by evaltree2searchstr() but then seems never to be read. More... | |
struct timeval | timer_start_time |
Global variable for timing functions; not exported. More... | |
CorpusList* ActivateCorpus | ( | CorpusList * | cl | ) |
References Activation, cqpmessage(), CurEnv, Error, generate_code, inhibit_activation, last_cyc, Message, next_environment(), query_corpus, and evalenv::query_corpus.
References cl_malloc(), generate_code, e_tree::max_dist, e_tree::min_dist, e_tree::next, node, e_tree::patindex, ctxtsp::size, ctxtsp::size2, e_tree::tab_el, tabular, and e_tree::type.
void addHistoryLine | ( | void | ) |
Add a line of CQP input to the history file.
Supports parser rule: line -> command
The line that is added comes from QueryBuffer; the file it is written to is that named in cqp_history_file.
References cqp_history_file, cqpmessage(), Error, open_file(), QueryBuffer, reading_cqprc, silent, and write_history_file.
void after_CorpusCommand | ( | CorpusList * | cl | ) |
Set the current corpus and do the output if it was a query.
References Activation, auto_save, autoshow, catalog_corpus(), False, GlobalPrintMode, last_cyc, LastExpression, NoExpression, Query, QueryBuffer, QueryBufferOverflow, save_subcorpus(), cl::saved, set_current_corpus(), SetOperation, silent, cl::size, SUB, subquery, and cl::type.
CorpusList* after_CorpusSetExpr | ( | CorpusList * | cl | ) |
References cqpmessage(), CurEnv, Error, generate_code, last_cyc, next_environment(), evalenv::query_corpus, and SetOperation.
CorpusList* after_Query | ( | CorpusList * | cl | ) |
References cl_free, cl_strdup(), generate_code, last_cyc, cl::name, Query, cl::query_corpus, cl::query_text, QueryBuffer, QueryBufferOverflow, QueryBufferP, and within_gc.
Constrainttree bool_and | ( | Constrainttree | left, |
Constrainttree | right | ||
) |
References b_and, bnode, cnode, c_tree::constnode, free_booltree(), generate_code, c_tree::left, left, NEW_BNODE, c_tree::node, c_tree::op_id, c_tree::right, right, c_tree::type, and c_tree::val.
Constrainttree bool_implies | ( | Constrainttree | left, |
Constrainttree | right | ||
) |
References b_implies, bnode, bool_not(), cnode, c_tree::constnode, free_booltree(), generate_code, c_tree::left, left, NEW_BNODE, c_tree::node, c_tree::op_id, c_tree::right, right, try_optimization(), c_tree::type, and c_tree::val.
Constrainttree bool_not | ( | Constrainttree | left | ) |
References b_not, bnode, cnode, c_tree::constnode, free_booltree(), generate_code, id_list, c_tree::idlist, c_tree::left, left, c_tree::negated, NEW_BNODE, c_tree::node, c_tree::op_id, c_tree::right, c_tree::type, and c_tree::val.
Referenced by bool_implies().
Constrainttree bool_or | ( | Constrainttree | left, |
Constrainttree | right | ||
) |
References b_or, bnode, cnode, c_tree::constnode, free_booltree(), generate_code, c_tree::left, left, NEW_BNODE, c_tree::node, c_tree::op_id, c_tree::right, right, try_optimization(), c_tree::type, and c_tree::val.
void debug_output | ( | void | ) |
Prints out all the existing EvalEnvironments in the global array.
References eep, and show_environment().
Referenced by do_MUQuery(), do_StandardQuery(), and do_TABQuery().
void do_AddSubVariables | ( | char * | var1Name, |
int | add, | ||
char * | var2Name | ||
) |
References cl_free, cqpmessage(), Error, FindVariable(), GetVariableStrings(), VariableAddItem(), and VariableSubtractItem().
int do_AnchorPoint | ( | FieldType | field, |
int | is_closing | ||
) |
void do_attribute_show | ( | char * | name, |
int | status | ||
) |
References _context_description_block::alignedCorpora, ATT_STRUC, _context_description_block::attributes, CD, cl::corpus, cqpmessage(), current_corpus, DEFAULT_ATT_NAME, Error, find_attribute, FindInAL(), generate_code, _attlist::list, _attrbuf::next, _context_description_block::print_cpos, show_targets, _attrbuf::status, and _context_description_block::strucAttributes.
void do_cat | ( | CorpusList * | cl, |
struct Redir * | r, | ||
int | first, | ||
int | last | ||
) |
References catalog_corpus(), cqpmessage(), GlobalPrintMode, Message, and cl::name.
void do_cut | ( | CorpusList * | cl, |
int | first, | ||
int | last | ||
) |
References cqpmessage(), _Range::end, Error, generate_code, last, cl::range, RangeSetop(), RReduce, cl::size, _Range::start, SUB, touch_corpus(), cl::type, and Warning.
void do_delete_lines | ( | CorpusList * | cl, |
FieldType | f, | ||
int | mode | ||
) |
References cqpmessage(), create_bitfield(), delete_intervals(), destroy_bitfield(), Error, generate_code, KeywordField, cl::keywords, MatchEndField, MatchField, NoField, set_bit(), cl::size, SUB, TargetField, cl::targets, cl::type, and Warning.
void do_delete_lines_num | ( | CorpusList * | cl, |
int | start, | ||
int | end | ||
) |
References cqpmessage(), create_bitfield(), delete_intervals(), destroy_bitfield(), Error, generate_code, nr_bits_set(), SELECTED_LINES, set_bit(), cl::size, SUB, and cl::type.
void do_Description | ( | Context * | context, |
int | nr, | ||
char * | name | ||
) |
References ATT_STRUC, ctxtsp::attrib, cqpmessage(), Environment, Error, find_attribute, generate_code, ctxtsp::size, structure, ctxtsp::type, and word.
void do_dump | ( | CorpusList * | cl, |
int | first, | ||
int | last, | ||
struct Redir * | rd | ||
) |
Dump query result (or part of it) as TAB-delimited table of corpus positions.
cl | The result (as a subcorpus, naturally) |
first | Where in the result to begin dumping (index of cl->range) |
last | Where in the result to end dumping (index of cl->range) |
rd | Pointer to a Redir structure which contains information about where to dump to. |
References TCorpus::charset, cl_broken_pipe, close_stream(), cl::corpus, cqpmessage(), _Range::end, Error, cl::keywords, open_stream(), cl::range, cl::size, cl::sortidx, _Range::start, Redir::stream, and cl::targets.
void do_exec | ( | char * | fname | ) |
Execute the commands contained within a specified text file.
References cqp_parse_file(), cqpmessage(), Error, generate_code, Message, and open_file().
Constrainttree do_flagged_re_variable | ( | char * | varname, |
int | flags | ||
) |
Implements expansion of a variable within the RE() operator.
References TCorpus::charset, cl_free, cl_malloc(), cl_string_validate_encoding(), cl::corpus, cqpmessage(), do_flagged_string(), Error, FindVariable(), generate_code, GetVariableStrings(), IGNORE_REGEX, TCorpus::name, and Warning.
Constrainttree do_flagged_string | ( | char * | s, |
int | flags | ||
) |
References c_tree::canon, TCorpus::charset, cl_new_regex(), cl_string_latex2iso(), cl::corpus, cqpmessage(), c_tree::ctype, Error, _TabulationItem::flags, generate_code, IGNORE_REGEX, c_tree::leaf, NEW_BNODE, NORMAL, c_tree::pat_type, REGEXP, c_tree::rx, string_leaf, and c_tree::type.
Referenced by do_flagged_re_variable(), do_mval_string(), and do_StringConstraint().
void do_group | ( | CorpusList * | cl, |
FieldType | target, | ||
int | target_offset, | ||
char * | t_att, | ||
FieldType | source, | ||
int | source_offset, | ||
char * | s_att, | ||
int | cut, | ||
int | expand, | ||
int | is_grouped, | ||
struct Redir * | redir | ||
) |
References compute_grouping(), do_start_timer(), do_timing(), free_group(), and print_group().
void do_group2 | ( | CorpusList * | cl, |
FieldType | target, | ||
int | target_offset, | ||
char * | t_att, | ||
int | cut, | ||
int | expand, | ||
struct Redir * | r | ||
) |
Like do_group, but with no source.
References compute_grouping(), do_start_timer(), do_timing(), free_group(), NoField, and print_group().
Constrainttree do_IDReference | ( | char * | id_name, |
int | auto_delete | ||
) |
References ATT_POS, ATT_STRUC, c_tree::attr, cl_free, cl::corpus, cqpmessage(), CurEnv, c_tree::delete, Error, find_attribute, _label_entry::flags, generate_code, LAB_SPECIAL, LAB_USED, c_tree::label, labellookup(), evalenv::labels, NEW_BNODE, pa_ref, c_tree::pa_ref, sa_ref, c_tree::sa_ref, c_tree::type, Warning, and within_gc.
void do_info | ( | CorpusList * | cl | ) |
References corpus_info().
Constrainttree do_LabelReference | ( | char * | label_name, |
int | auto_delete | ||
) |
References ATT_POS, ATT_STRUC, c_tree::attr, cl_free, cl_struc_values(), cl::corpus, cqpmessage(), CurEnv, c_tree::delete, Error, find_attribute, _label_entry::flags, generate_code, LAB_SPECIAL, LAB_USED, c_tree::label, labellookup(), evalenv::labels, NEW_BNODE, pa_ref, c_tree::pa_ref, sa_ref, c_tree::sa_ref, c_tree::type, and Warning.
References ctxtsp::attrib, cl_malloc(), e_tree::cooc, cooc_meet, generate_code, e_tree::left, left, e_tree::lw, meet_union, e_tree::op_id, e_tree::right, right, e_tree::rw, ctxtsp::size, ctxtsp::size2, e_tree::struc, and e_tree::type.
CorpusList* do_MUQuery | ( | Evaltree | evalt, |
int | keep_flag, | ||
int | cut_value | ||
) |
Constrainttree do_mval_string | ( | char * | s, |
int | op, | ||
int | flags | ||
) |
References cl_malloc(), cqpmessage(), do_flagged_string(), Error, generate_code, IGNORE_REGEX, mval_string_conversion(), OP_CONTAINS, OP_MATCHES, and OP_NOT_MASK.
int do_NamedWfPattern | ( | int | is_target, |
char * | label, | ||
int | pat_idx | ||
) |
void do_OptDistance | ( | Context * | context, |
int | l_bound, | ||
int | u_bound | ||
) |
References ctxtsp::attrib, cqpmessage(), repeat_inf, ctxtsp::size, ctxtsp::size2, ctxtsp::type, Warning, and word.
void do_PrintAllVariables | ( | ) |
References printSingleVariableValue(), variables_iterator_new(), and variables_iterator_next().
void do_printVariableSize | ( | char * | varName | ) |
void do_PrintVariableValue | ( | char * | varName | ) |
References cqpmessage(), Error, FindVariable(), and printSingleVariableValue().
void do_reduce | ( | CorpusList * | cl, |
int | number, | ||
int | percent | ||
) |
References cl_runif(), cqpmessage(), create_bitfield(), delete_intervals(), destroy_bitfield(), Error, generate_code, set_bit(), cl::size, SUB, cl::type, UNSELECTED_LINES, and Warning.
Constrainttree do_RelExExpr | ( | Constrainttree | left | ) |
References bnode, cmp_ex, generate_code, c_tree::left, left, NEW_BNODE, c_tree::node, c_tree::op_id, c_tree::right, try_optimization(), and c_tree::type.
Constrainttree do_RelExpr | ( | Constrainttree | left, |
enum b_ops | op, | ||
Constrainttree | right | ||
) |
References c_tree::attr, bnode, cqpmessage(), c_tree::delete, Error, free_booltree(), generate_code, id_list, c_tree::idlist, c_tree::label, c_tree::left, left, NEW_BNODE, c_tree::node, c_tree::op_id, OptimizeStringConstraint(), pa_ref, c_tree::pa_ref, c_tree::right, right, string_leaf, try_optimization(), c_tree::type, var_ref, c_tree::varName, c_tree::varref, and Varref2IDList().
void do_save | ( | CorpusList * | cl, |
struct Redir * | r | ||
) |
References cqpmessage(), DEFAULT_LOCAL_PATH_ENV_VAR, LOCAL_CORP_PATH, Message, Redir::name, cl::name, save_subcorpus(), and Warning.
void do_SearchPattern | ( | Evaltree | expr, |
Constrainttree | constraint | ||
) |
void do_set_complex_target | ( | CorpusList * | cl, |
FieldType | field_to_set, | ||
SearchStrategy | strategy, | ||
Constrainttree | boolt, | ||
enum ctxtdir | direction, | ||
int | number, | ||
char * | id, | ||
FieldType | field, | ||
int | inclusive | ||
) |
References evaluate_target(), free_booltree(), generate_code, and old_query_corpus.
void do_set_target | ( | CorpusList * | cl, |
FieldType | goal, | ||
FieldType | source | ||
) |
References NoField, and set_target().
CorpusList* do_setop | ( | RangeSetOp | op, |
CorpusList * | c1, | ||
CorpusList * | c2 | ||
) |
References cl::corpus, cqpmessage(), make_temp_corpus(), Message, cl::mother_name, cl::name, RangeSetop(), and Warning.
void do_SetVariableValue | ( | char * | varName, |
char | operator, | ||
char * | varValues | ||
) |
References cl_string_latex2iso(), cqpmessage(), Error, FindVariable(), NewVariable(), SetVariableValue(), and Warning.
Constrainttree do_SimpleVariableReference | ( | char * | varName | ) |
References ATT_POS, cmp_eq, cl::corpus, cqpmessage(), def_unbr_attr, DEFAULT_ATT_NAME, Error, find_attribute, generate_code, cl::name, set_string_option_value(), and Varref2IDList().
void do_size | ( | CorpusList * | cl, |
FieldType | field | ||
) |
References KeywordField, cl::keywords, NoField, cl::size, TargetField, and cl::targets.
void do_sleep | ( | int | duration | ) |
Puts the program to sleep.
A wrapper round the standard sleep() function (or Sleep() in Windows).
duration | How many seconds to sleep for. |
CorpusList* do_StandardQuery | ( | int | cut_value, |
int | keep_flag | ||
) |
References cl_free, cqp_run_query(), cqpmessage(), create_bitfield(), current_corpus, debug_output(), delete_intervals(), destroy_bitfield(), do_start_timer(), Environment, Error, generate_code, longest_match, matching_strategy, Message, parseonly, evalenv::query_corpus, RangeSetop(), RLeftMaximalMatches, RMaximalMatches, RMinimalMatches, searchstr, set_bit(), shortest_match, cl::size, standard_match, SUB, traditional, cl::type, UNSELECTED_LINES, and Warning.
void do_start_timer | ( | void | ) |
Starts the timer running.
References timer_start_time, and timing.
Referenced by do_group(), do_group2(), do_MUQuery(), do_StandardQuery(), and do_TABQuery().
Constrainttree do_StringConstraint | ( | char * | s, |
int | flags | ||
) |
References ATT_POS, c_tree::attr, cmp_eq, cnode, cl::corpus, cqpmessage(), def_unbr_attr, DEFAULT_ATT_NAME, c_tree::delete, do_flagged_string(), Error, find_attribute, generate_code, c_tree::label, left, cl::name, NEW_BNODE, OptimizeStringConstraint(), pa_ref, c_tree::pa_ref, right, set_string_option_value(), and c_tree::type.
void do_StructuralContext | ( | Context * | context, |
char * | name | ||
) |
References ATT_STRUC, ctxtsp::attrib, cl::corpus, cqpmessage(), Error, find_attribute, generate_code, TCorpus::id, ctxtsp::size, ctxtsp::size2, structure, ctxtsp::type, and word.
CorpusList* do_subset | ( | FieldType | field, |
Constrainttree | boolt | ||
) |
References evaluate_subset(), free_booltree(), generate_code, progress_bar, progress_bar_clear_line(), and query_corpus.
CorpusList* do_TABQuery | ( | Evaltree | patterns | ) |
void do_timing | ( | char * | msg | ) |
Shows the period since the timer started running.
msg | A message to print along with the reading from the timer. |
References cqpmessage(), Info, timer_start_time, and timing.
Referenced by do_group(), do_group2(), and in_UnnamedCorpusCommand().
CorpusList* do_translate | ( | CorpusList * | source, |
char * | target_name | ||
) |
References ATT_ALIGN, CDA_OK, cderrno, cl_alg2cpos(), cl_calloc(), cl_cpos2alg(), cl_free, cl::corpus, cqpmessage(), _Range::end, Error, find_attribute, findcorpus(), generate_code, cl::keywords, make_temp_corpus(), cl::mother_name, cl::range, RangeSetop(), RangeSort(), TCorpus::registry_name, RReduce, s1, s2, cl::size, _Range::start, SYSTEM, cl::targets, and Warning.
int do_undump | ( | char * | corpname, |
int | extension_fields, | ||
int | sort_ranges, | ||
struct InputRedir * | rd | ||
) |
read TAB-delimited table of corpus positions and create named query result from it.
acceptable values for extension_fields and corresponding row formats: 0 = match matchend 1 = match matchend target 2 = match matchend target keyword
References assign_temp_to_sub(), cl_free, cl_malloc(), CL_MAX_LINE_LENGTH, close_input_stream(), cqpmessage(), current_corpus, drop_temp_corpora(), Error, findcorpus(), is_qualified(), line, make_temp_corpus(), cl::mother_name, cl::mother_size, new, open_input_stream(), RangeSort(), split_subcorpus_name(), InputRedir::stream, SYSTEM, cl::type, valid_subcorpus_name(), and Warning.
References cl_malloc(), e_tree::cooc, cooc_union, generate_code, e_tree::left, left, e_tree::lw, meet_union, e_tree::op_id, e_tree::right, right, e_tree::rw, and e_tree::type.
int do_WordformPattern | ( | Constrainttree | boolt, |
int | lookahead | ||
) |
References cnode, c_tree::constnode, cqpmessage(), CurEnv, Error, False, generate_code, MatchAll, evalenv::MaxPatIndex, MAXPATTERNS, Pattern, evalenv::patternlist, c_tree::type, and c_tree::val.
int do_XMLTag | ( | char * | s_name, |
int | is_closing, | ||
int | op, | ||
char * | regex, | ||
int | flags | ||
) |
References ATT_STRUC, TCorpus::charset, cl_free, cl_malloc(), cl_new_attribute, cl_new_regex(), cl_strdup(), cl_string_latex2iso(), cl_struc_values(), cl::corpus, cqpmessage(), CurEnv, Error, findlabel(), _label_entry::flags, _TabulationItem::flags, generate_code, IGNORE_REGEX, LAB_DEFINED, LAB_RDAT, LAB_USED, labellookup(), evalenv::labels, evalenv::MaxPatIndex, MAXPATTERNS, Message, mval_string_conversion(), cl::name, OP_CONTAINS, OP_EQUAL, OP_MATCHES, OP_NOT, OP_NOT_MASK, evalenv::patternlist, strict_regions, and Tag.
void expand_dataspace | ( | CorpusList * | cl | ) |
Expand the dataspace of a subcorpus.
This is done, e.g., by the CQP-syntax "expand" command, to include context into the matches found by a query.
Each corpus interval stored in the CorpusList is extended by an amount, and in a direction, dependant on the information in the global variable "expansion", a Context object (information which has been put there by the parser).
cl | The subcorpus to expand. |
References calculate_leftboundary(), calculate_rightboundary(), cqpmessage(), ctxtsp::direction, _Range::end, False, left, leftright, cl::needs_update, cl::range, RangeSetop(), right, RUniq, cl::saved, cl::size, ctxtsp::size, _Range::start, SYSTEM, True, cl::type, and Warning.
Referenced by in_UnnamedCorpusCommand().
Constrainttree FunctionCall | ( | char * | f_name, |
ActualParamList * | apl | ||
) |
CorpusList* in_CorpusCommand | ( | char * | id, |
CorpusList * | cl | ||
) |
References Assignment, cqpmessage(), current_corpus, duplicate_corpus(), is_qualified(), last_cyc, SYSTEM, True, cl::type, and Warning.
CorpusList* in_UnnamedCorpusCommand | ( | CorpusList * | cl | ) |
This function is called after an UnnamedCorpusCommand rule is parsed.
Seems to be a tidying-up function.
cl | The result of the corpus-yielding command (first component of this syntax rule). |
References Activation, assign_temp_to_sub(), cqpmessage(), do_timing(), drop_temp_corpora(), expand_dataspace(), free_environments(), generate_code, last_cyc, make_temp_corpus(), Message, Query, SetOperation, ctxtsp::size, SYSTEM, TEMP, cl::type, and Warning.
References cl_malloc(), generate_code, e_tree::max_dist, e_tree::min_dist, e_tree::next, node, e_tree::patindex, e_tree::tab_el, tabular, and e_tree::type.
char * mval_string_conversion | ( | char * | s | ) |
References cl_malloc(), cqpmessage(), Error, and generate_code.
Referenced by do_mval_string(), and do_XMLTag().
Constrainttree OptimizeStringConstraint | ( | Constrainttree | left, |
enum b_ops | op, | ||
Constrainttree | right | ||
) |
References _Attribute::any, c_tree::attr, bnode, c_tree::canon, catch_unknown_ids, CDA_OK, cderrno, cdperror_string, CID, cl_free, cl_idlist2freq(), cl_malloc(), cl_max_cpos(), cl_str2id(), cmp_eq, cmp_neq, cnode, collect_matching_ids, c_tree::constnode, cqpmessage(), c_tree::ctype, c_tree::delete, Error, generate_code, get_id_range, id_list, c_tree::idlist, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, left, MAKE_IDLIST_BOUND, c_tree::negated, NEW_BNODE, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, c_tree::pa_ref, c_tree::pat_type, REGEXP, c_tree::right, right, c_tree::type, and c_tree::val.
Referenced by do_RelExpr(), and do_StringConstraint().
void prepare_AlignmentConstraints | ( | char * | id | ) |
void prepare_do_subset | ( | CorpusList * | cl, |
FieldType | field | ||
) |
void prepare_input | ( | void | ) |
Get ready to parse a command.
This function is called before the processing of each parsed line that is recognised as a command.
Mostly it involves setting the global variables to their starting-state values.
References free_environments(), generate_code, last_cyc, LastExpression, NoExpression, regex_string_pos, and searchstr.
void prepare_parse | ( | void | ) |
References cqpmessage(), generate_code, old_query_corpus, and Warning.
void prepare_Query | ( | ) |
This function sets things up to run a query.
It is called as an "action" before any detected Query in the parser.
[AH 2010/8/2: I have added the code checking input character encoding. Anything that is not part of a query should be plain ASCII - if not, then the lexer/parser should pick it up as bad. Filenames, etc. are obvious exceptions - but we can't check the encoding of those, because there's no guarantee it will be the same as that of the corpus, which is the only thing whose encoding we know. So it's up to the user to type filenames in an encoding their OS will accept! Canonicalisation is done within the CL_Regex, not here.]
References access_corpus(), TCorpus::charset, cl_string_validate_encoding(), cl::corpus, cqpmessage(), CurEnv, current_corpus, eep, Environment, Error, generate_code, make_temp_corpus(), cl::mother_name, cl::name, next_environment(), query_corpus, evalenv::query_corpus, QueryBuffer, RangeSetop(), RNonOverlapping, searchstr, cl::size, Warning, and within_gc.
void printSingleVariableValue | ( | Variable | v, |
int | max_items | ||
) |
Prints the setting of a single Variable as an indented list.
References end_indented_list(), _variable_item::free, _variable_buf::items, _variable_buf::my_name, _variable_buf::nr_items, print_indented_list_item(), start_indented_list(), and _variable_item::sval.
Referenced by do_PrintAllVariables(), and do_PrintVariableValue().
void push_regchr | ( | char | c | ) |
Add a character (in the sense of a byte) to the regex_string buffer.
Doesn't seem to currently be in use.
References CL_MAX_LINE_LENGTH, cqpmessage(), regex_string, regex_string_pos, and Warning.
void RaiseError | ( | void | ) |
References generate_code, and resetQueryBuffer().
References generate_code, NEW_EVALNODE, re_disj, and repeat_none.
References generate_code, NEW_EVALNODE, re_od_concat, and repeat_none.
void resetQueryBuffer | ( | void | ) |
Empties the query buffer and sets to 0 the pointer.
Supports parser rule: line -> command
References QueryBuffer, QueryBufferOverflow, and QueryBufferP.
Referenced by RaiseError().
Constrainttree Varref2IDList | ( | Attribute * | attr, |
enum b_ops | op, | ||
char * | varName | ||
) |
References c_tree::attr, cmp_eq, cnode, c_tree::constnode, cl::corpus, cqpmessage(), c_tree::delete, Error, FindVariable(), generate_code, GetVariableItems(), id_list, c_tree::idlist, c_tree::items, c_tree::label, c_tree::negated, NEW_BNODE, node, c_tree::nr_items, c_tree::type, and c_tree::val.
Referenced by do_RelExpr(), and do_SimpleVariableReference().
int catch_unknown_ids = 0 |
Referenced by OptimizeStringConstraint().
Context expansion |
This is used by the parser in response to CQP's "expand" operator, which incorporates context around the query hit into the match itself.
Functions involved in carrying this out utilise info stored here by the parser.
Referenced by findcorpus().
int generate_code |
TODO would be very useful to have a desc for this.
A boolean; seems to be some kind of error-indicator (set to true if a query worked, false if it didn't, things like that).
When it is false, many actions simply have no effect, because they are set to only actually do anything "if (generate_code)".
Some functions will set it to 0 when an action works to block later actions.
In some cases, setting this to 0 is linked with "YYABORT" in comments.
Referenced by ActivateCorpus(), add_tabular_pattern(), after_CorpusSetExpr(), after_Query(), bool_and(), bool_implies(), bool_not(), bool_or(), do_AnchorPoint(), do_attribute_show(), do_cut(), do_delete_lines(), do_delete_lines_num(), do_Description(), do_exec(), do_flagged_re_variable(), do_flagged_string(), do_IDReference(), do_LabelReference(), do_MeetStatement(), do_MUQuery(), do_mval_string(), do_NamedWfPattern(), do_reduce(), do_RelExExpr(), do_RelExpr(), do_SearchPattern(), do_set_complex_target(), do_SimpleVariableReference(), do_StandardQuery(), do_StringConstraint(), do_StructuralContext(), do_subset(), do_TABQuery(), do_translate(), do_UnionStatement(), do_WordformPattern(), do_XMLTag(), FunctionCall(), in_UnnamedCorpusCommand(), make_first_tabular_pattern(), mval_string_conversion(), OptimizeStringConstraint(), prepare_AlignmentConstraints(), prepare_do_subset(), prepare_input(), prepare_parse(), prepare_Query(), RaiseError(), reg_disj(), reg_seq(), and Varref2IDList().
CYCtype last_cyc |
type of last corpus yielding command
Referenced by ActivateCorpus(), after_CorpusCommand(), after_CorpusSetExpr(), after_Query(), in_CorpusCommand(), in_UnnamedCorpusCommand(), and prepare_input().
CorpusList* old_query_corpus = NULL |
Used for preserving former values of query_corpus (.
Referenced by do_set_complex_target(), and prepare_parse().
CorpusList* query_corpus = NULL |
The corpus (or subcorpus) which is "active" in the sense that the query will be executed within it.
Referenced by ActivateCorpus(), do_subset(), prepare_Query(), and simulate().
char regex_string[CL_MAX_LINE_LENGTH] |
Buffer for storing regex strings.
As it says on the tin.
TODO Doesn't seem currently to be in use anywhere, except in one func which itself is not used.
Referenced by push_regchr().
int regex_string_pos |
index into the regex string buffer, storing a current position.
@ see regex_string_pos
Referenced by prepare_input(), and push_regchr().
int sslen |
length of search string: is written to by evaltree2searchstr() but then seems never to be read.
TODO .
Referenced by do_SearchPattern().
struct timeval timer_start_time |
Global variable for timing functions; not exported.
Referenced by do_start_timer(), and do_timing().
int within_gc |
TODO would be very useful to have a desc for this ; seems to be about whether or not we are within a global constraint.
Referenced by after_Query(), do_IDReference(), and prepare_Query().