CWB
|
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <sys/types.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "../cl/special-chars.h"
#include "cqp.h"
#include "ranges.h"
#include "options.h"
#include "tree.h"
#include "symtab.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "eval.h"
#include "builtins.h"
#include "output.h"
#include "matchlist.h"
Macros | |
#define | no_match -1 |
#define | RED_THRESHOLD 0.01 |
Functions | |
int | nr_positions (CorpusList *cp) |
Counts the number of token positions encompassed by all members of the ->range array of the CorpusList argument. More... | |
float | red_factor (CorpusList *cp, int *nr_pos) |
void | set_corpus_matchlists (CorpusList *cp, Matchlist *matchlist, int nr_lists, int keep_old_ranges) |
Set the appropriate values to the corpus id (given by its pointer to the symbol table). More... | |
int | get_corpus_positions (Attribute *attribute, char *wordform, Matchlist *matchlist) |
Gets a list of corpus positions where the given p-attribute has the specified form. More... | |
int | get_matched_corpus_positions (Attribute *attribute, char *regstr, int canonicalize, Matchlist *matchlist, int *restrictor_list, int restrictor_size) |
Get corpus positions matching a regular expression on a given attribute. More... | |
Boolean | eval_constraint (AVS avs, int corppos, RefTab labelrefs, RefTab target_labelrefs) |
int | get_label_referenced_position (LabelEntry label, RefTab rt, int corppos) |
Boolean | get_leaf_value (Constrainttree ctptr, RefTab rt, int corppos, DynCallResult *dcr, int deliver_strings) |
static int | intcompare (const void *i, const void *j) |
Comparison function used when calling qsort(). More... | |
Boolean | eval_bool (Constrainttree ctptr, RefTab rt, int corppos) |
int | mark_offrange_cells (Matchlist *matchlist, CorpusList *corpus) |
Boolean | calculate_initial_matchlist_1 (Constrainttree ctptr, Matchlist *matchlist, CorpusList *corpus) |
Gets the inital list of matches for a query. More... | |
Boolean | calculate_initial_matchlist (Constrainttree ctptr, Matchlist *matchlist, CorpusList *corpus) |
Wrapper around calculate_initial_matchlist1, qv. More... | |
Boolean | matchfirstpattern (AVS pattern, Matchlist *matchlist, CorpusList *corpus) |
void | simulate (Matchlist *matchlist, int *cut, int start_state, int start_offset, int *state_vector, int *target_vector, RefTab *reftab_vector, RefTab *reftab_target_vector, int start_transition) |
int | check_alignment_constraints (Matchlist *ml) |
void | simulate_dfa (int envidx, int cut, int keep_old_ranges) |
simulate the dfa More... | |
void | cqp_run_query (int cut, int keep_old_ranges) |
This function wraps round simulate_dfa (the only other thing it does is enforce the hard_cut limit). More... | |
int | eval_mu_tree (Evaltree et, Matchlist *ml) |
void | cqp_run_mu_query (int keep_old_ranges, int cut_value) |
void | cqp_run_tab_query (int implode) |
int | meet_mu (Matchlist *list1, Matchlist *list2, int lw, int rw, Attribute *struc) |
int | next_environment (void) |
Sets up a new environment in the global array. More... | |
int | free_environment (int thisenv) |
Frees an evaluation environment. More... | |
void | show_environment (int thisenv) |
Prints the contents of an EvalEnvironment object to STDOUT. More... | |
void | free_environments (void) |
Frees all eval environments in the global array, and sets the eep pointer to -1. More... | |
#define no_match -1 |
Referenced by check_alignment_constraints().
#define RED_THRESHOLD 0.01 |
Referenced by matchfirstpattern().
Boolean calculate_initial_matchlist | ( | Constrainttree | ctptr, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
Wrapper around calculate_initial_matchlist1, qv.
References calculate_initial_matchlist_1(), Complement, _Matchlist::is_inverted, mark_offrange_cells(), Reduce, and Setop().
Referenced by cqp_run_tab_query(), eval_mu_tree(), and matchfirstpattern().
Boolean calculate_initial_matchlist_1 | ( | Constrainttree | ctptr, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
Gets the inital list of matches for a query.
NB. This function is called recursively.
References c_tree::attr, b_and, b_implies, b_not, b_or, bnode, c_tree::canon, CID, cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, collect_matches, Complement, c_tree::constnode, cqpmessage(), c_tree::ctype, _Matchlist::end, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, free_matchlist(), func, get_corpus_positions(), get_matched_corpus_positions(), get_positions, id_list, c_tree::idlist, init_matchlist(), Intersection, _Matchlist::is_inverted, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, left, mark_offrange_cells(), _Matchlist::matches_whole_corpus, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, pa_ref, c_tree::pa_ref, c_tree::pat_type, cl::range, Reduce, REGEXP, c_tree::right, right, sa_ref, Setop(), cl::size, _Matchlist::start, string_leaf, _Matchlist::tabsize, True, c_tree::type, Union, and c_tree::val.
Referenced by calculate_initial_matchlist().
int check_alignment_constraints | ( | Matchlist * | ml | ) |
References CDA_OK, cderrno, cl_alg2cpos(), cl_cpos2alg(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), evalenv::dfa, eep, _Matchlist::end, Environment, evalenv, EvaluationIsRunning, free_matchlist(), Info, init_matchlist(), install_signal_handler(), evalenv::labels, dfa::Max_States, evalenv::negated, new_reftab(), no_match, simulate(), _Matchlist::start, _Matchlist::tabsize, and which_app.
Referenced by simulate_dfa().
void cqp_run_mu_query | ( | int | keep_old_ranges, |
int | cut_value | ||
) |
References cl_malloc(), cqpmessage(), _Matchlist::end, Environment, Error, eval_mu_tree(), evalenv::evaltree, free_matchlist(), init_matchlist(), mark_offrange_cells(), evalenv::query_corpus, Reduce, set_corpus_matchlists(), Setop(), _Matchlist::start, and _Matchlist::tabsize.
Referenced by do_MUQuery().
void cqp_run_query | ( | int | cut, |
int | keep_old_ranges | ||
) |
This function wraps round simulate_dfa (the only other thing it does is enforce the hard_cut limit).
References eep, hard_cut, and simulate_dfa().
Referenced by do_StandardQuery().
void cqp_run_tab_query | ( | int | implode | ) |
References calculate_initial_matchlist(), cl_malloc(), _Matchlist::end, Environment, evalenv::evaltree, free_matchlist(), hard_boundary, init_matchlist(), mark_offrange_cells(), e_tree::max_dist, e_tree::min_dist, e_tree::next, e_tree::patindex, evalenv::patternlist, evalenv::query_corpus, Reduce, repeat_inf, set_corpus_matchlists(), Setop(), _Matchlist::start, e_tree::tab_el, _Matchlist::tabsize, tabular, and e_tree::type.
Referenced by do_TABQuery().
Boolean eval_bool | ( | Constrainttree | ctptr, |
RefTab | rt, | ||
int | corppos | ||
) |
References ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, ATTAT_VAR, c_tree::attr, b_and, b_implies, b_not, b_or, bnode, CDA_OK, cderrno, _DCR::charres, cl_regex_match(), cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, c_tree::constnode, cqpmessage(), c_tree::delete, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, func, get_id_at_position, get_label_referenced_position(), get_leaf_value(), get_string_of_id, get_struc_attribute, id_list, c_tree::idlist, int_leaf, intcompare(), _DCR::intres, c_tree::is_closing, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, pa_ref, _DCR::parefres, c_tree::pat_type, _label_entry::ref, REGEXP, c_tree::right, c_tree::rx, sa_ref, sbound, c_tree::sbound, set_reftab(), STREQ, string_leaf, c_tree::strucattr, True, c_tree::type, _DCR::type, c_tree::val, and _DCR::value.
Referenced by calculate_initial_matchlist_1(), eval_bool(), eval_constraint(), evaluate_subset(), evaluate_target(), and simulate().
References Anchor, _avs::anchor, _avs::attr, cl_cpos2struc(), cl_regex_match(), cl_struc2cpos(), cl_struc2str(), _avs::con, _avs::constraint, corpus, dup_reftab(), _Range::end, eval_bool(), False, _avs::field, get_reftab(), _avs::is_closing, KeywordField, cl::keywords, _avs::label, MatchAll, _avs::matchall, MatchEndField, MatchField, _avs::negated, Pattern, evalenv::query_corpus, cl::range, _label_entry::ref, _avs::right_boundary, evalenv::rp, _avs::rx, set_reftab(), _Range::start, strict_regions, Tag, _avs::tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate().
References calculate_initial_matchlist(), e_tree::cooc, cooc_meet, cooc_union, CurEnv, EvaluationIsRunning, free_matchlist(), init_matchlist(), leaf, e_tree::leaf, e_tree::left, e_tree::lw, meet_mu(), meet_union, e_tree::op_id, e_tree::patindex, evalenv::patternlist, evalenv::query_corpus, e_tree::right, e_tree::rw, Setop(), e_tree::struc, e_tree::type, and Union.
Referenced by cqp_run_mu_query().
int free_environment | ( | int | thisenv | ) |
Frees an evaluation environment.
The environment must be one currently occupied within the global array.
thisenv | The eval environment to free. |
References Anchor, ctxtsp::attrib, cl_delete_regex(), cl_free, delete_symbol_table(), ctxtsp::direction, eep, Environment, False, free_booltree(), free_dfa(), free_evaltree(), evalenv::gconstraint, evalenv::has_target_indicator, evalenv::labels, leftright, MatchAll, evalenv::MaxPatIndex, NoField, Pattern, evalenv::patternlist, evalenv::query_corpus, evalenv::search_context, ctxtsp::size, Tag, dfa::TransTable, cl::type, ctxtsp::type, and word.
Referenced by free_environments().
void free_environments | ( | void | ) |
Frees all eval environments in the global array, and sets the eep pointer to -1.
References eep, and free_environment().
Referenced by in_UnnamedCorpusCommand(), and prepare_input().
Gets a list of corpus positions where the given p-attribute has the specified form.
Positions are placed into the "start" array of the matchlist.
attribute | The p-attribute to search. |
wordform | The form to search for. |
matchlist | Where to put the results. |
References CDA_OK, cl_errno, cl_idlist2cpos, cl_str2id(), initial_matchlist_debug, _Matchlist::matches_whole_corpus, silent, _Matchlist::start, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1().
int get_label_referenced_position | ( | LabelEntry | label, |
RefTab | rt, | ||
int | corppos | ||
) |
References eval_debug, get_reftab(), _label_entry::name, and _label_entry::ref.
Referenced by eval_bool(), and get_leaf_value().
Boolean get_leaf_value | ( | Constrainttree | ctptr, |
RefTab | rt, | ||
int | corppos, | ||
DynCallResult * | dcr, | ||
int | deliver_strings | ||
) |
References c_tree::args, ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, c_tree::attr, call_dynamic_attribute, call_predefined_function(), CDA_EPOSORNG, CDA_OK, cderrno, _DCR::charres, CID, cl_malloc(), cqpmessage(), c_tree::ctype, c_tree::delete, c_tree::dynattr, Error, eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, func, c_tree::func, get_id_at_position, get_label_referenced_position(), get_string_at_position, get_struc_attribute, int_leaf, _DCR::intres, c_tree::label, c_tree::leaf, _label_entry::name, _ActualParamList::next, c_tree::nr_args, pa_ref, c_tree::pa_ref, _ActualParamList::param, _DCR::parefres, c_tree::pat_type, c_tree::predef, _label_entry::ref, sa_ref, c_tree::sa_ref, set_reftab(), string_leaf, structure_value_at_position(), True, c_tree::type, _DCR::type, and _DCR::value.
Referenced by eval_bool().
int get_matched_corpus_positions | ( | Attribute * | attribute, |
char * | regstr, | ||
int | canonicalize, | ||
Matchlist * | matchlist, | ||
int * | restrictor_list, | ||
int | restrictor_size | ||
) |
Get corpus positions matching a regular expression on a given attribute.
get_matched_corpus_positions looks in a corpus which is to be loaded for a regular expression 'regstr' of a given p-attribute and returns the table of matching start indices (start_table) and the tablesize (tabsize).
attribute | The attribute to search on. May be NULL, in which case DEFAULT_ATT_NAME is used. |
regstr | String containing the regular expression. |
canonicalize | Flags to be passed to the CL regex engine. |
matchlist | Location where the list of matches will be placed. |
restrictor_list | Passed to cl_idlist2cpos_oldstyle |
restrictor_size | Passed to cl_idlist2cpos_oldstyle |
References ATT_POS, cl_free, cl_idlist2cpos_oldstyle(), cl_malloc(), cl_max_cpos(), cl_max_id(), cl_new_attribute_oldstyle(), cl_regex2id(), cl::corpus, DEFAULT_ATT_NAME, eval_debug, initial_matchlist_debug, _Matchlist::is_inverted, _Matchlist::matches_whole_corpus, evalenv::query_corpus, cl::range, silent, cl::size, _Matchlist::start, STREQ, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1(), and matchfirstpattern().
|
static |
Comparison function used when calling qsort().
Referenced by eval_bool().
int mark_offrange_cells | ( | Matchlist * | matchlist, |
CorpusList * | corpus | ||
) |
References _Matchlist::end, _Range::end, cl::mother_size, cl::range, cl::size, _Matchlist::start, and _Range::start.
Referenced by calculate_initial_matchlist(), calculate_initial_matchlist_1(), cqp_run_mu_query(), and cqp_run_tab_query().
Boolean matchfirstpattern | ( | AVS | pattern, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
References Anchor, _avs::anchor, _avs::attr, calculate_initial_matchlist(), cl_free, cl_malloc(), cl_max_struc(), cl_regex_match(), cl_struc2cpos(), cl_struc2str(), clear_all_bits(), _avs::con, _avs::constraint, cqpmessage(), create_bitfield(), destroy_bitfield(), _Matchlist::end, _Range::end, Error, EvaluationIsRunning, False, _avs::field, get_bit(), get_matched_corpus_positions(), _avs::is_closing, KeywordField, cl::keywords, MatchAll, MatchEndField, _Matchlist::matches_whole_corpus, MatchField, _avs::negated, Pattern, query_optimize, cl::range, red_factor(), RED_THRESHOLD, Reduce, _avs::rx, set_all_bits(), set_bit(), Setop(), silent, cl::size, _Matchlist::start, _Range::start, _Matchlist::tabsize, Tag, _avs::tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate_dfa().
References CDA_OK, cderrno, cl_free, cl_malloc(), cl_realloc(), _Matchlist::end, get_struc_attribute, _Matchlist::matches_whole_corpus, MIN, _Matchlist::start, and _Matchlist::tabsize.
Referenced by eval_mu_tree().
int next_environment | ( | void | ) |
Sets up a new environment in the global array.
The next slot upwards is used (and eep is incremented).
References ctxtsp::attrib, CurEnv, ctxtsp::direction, eep, Environment, evalenv::evaltree, evalenv::gconstraint, evalenv::has_target_indicator, init_dfa(), evalenv::labels, leftright, evalenv::match_label, evalenv::matchend_label, MAXENVIRONMENT, evalenv::MaxPatIndex, evalenv::negated, new_symbol_table(), evalenv::query_corpus, evalenv::search_context, ctxtsp::size, evalenv::target_label, ctxtsp::type, and word.
Referenced by ActivateCorpus(), after_CorpusSetExpr(), prepare_AlignmentConstraints(), and prepare_Query().
int nr_positions | ( | CorpusList * | cp | ) |
Counts the number of token positions encompassed by all members of the ->range array of the CorpusList argument.
That is, in oher words, it tells you the size of this corpus.
References _Range::end, cl::range, cl::size, and _Range::start.
Referenced by compose_kwic_line(), red_factor(), and remember_this_position().
float red_factor | ( | CorpusList * | cp, |
int * | nr_pos | ||
) |
References access_corpus(), ATT_POS, cl::corpus, DEFAULT_ATT_NAME, find_attribute, cl::mother_size, nr_positions(), and cl::size.
Referenced by matchfirstpattern().
void set_corpus_matchlists | ( | CorpusList * | cp, |
Matchlist * | matchlist, | ||
int | nr_lists, | ||
int | keep_old_ranges | ||
) |
Set the appropriate values to the corpus id (given by its pointer to the symbol table).
References cl_free, cl_malloc(), _Matchlist::end, _Range::end, cl::keywords, cl::range, RangeSetop(), RReduce, cl::size, cl::sortidx, _Matchlist::start, _Range::start, _Matchlist::tabsize, _Matchlist::target_positions, and cl::targets.
Referenced by cqp_run_mu_query(), cqp_run_tab_query(), and simulate_dfa().
void show_environment | ( | int | thisenv | ) |
Prints the contents of an EvalEnvironment object to STDOUT.
Which bits of information are printed depends on which of a group of debugging-variables are set to true.
The EvalEnvironment to print is specified as an index into the global array (Environment).
thisenv | Index into Environment indicating which EvalEnvironment should be displayed. |
References eep, Environment, print_booltree(), print_evaltree(), show_compdfa, show_complete_dfa(), show_evaltree, show_gconstraints, show_patlist, and show_patternlist().
Referenced by debug_output().
void simulate | ( | Matchlist * | matchlist, |
int * | cut, | ||
int | start_state, | ||
int | start_offset, | ||
int * | state_vector, | ||
int * | target_vector, | ||
RefTab * | reftab_vector, | ||
RefTab * | reftab_target_vector, | ||
int | start_transition | ||
) |
References evalenv::aligned, Anchor, _avs::anchor, _avs::attr, calculate_rightboundary(), CheckForInterrupts(), _avs::con, cqpmessage(), debug_simulation, evalenv::dfa, dup_reftab(), dfa::E_State, _Matchlist::end, _Range::end, Error, eval_bool(), eval_constraint(), EvaluationIsRunning, dfa::Final, free_matchlist(), evalenv::gconstraint, get_reftab(), get_struc_attribute, evalenv::has_target_indicator, _avs::is_closing, _avs::is_target, LAB_DEFINED, LAB_RDAT, LAB_USED, _avs::label, evalenv::labels, longest_match, _avs::lookahead, evalenv::match_label, MatchAll, _avs::matchall, evalenv::matchend_label, matching_strategy, dfa::Max_Input, dfa::Max_States, MIN, Pattern, evalenv::patternlist, print_label_values(), progress_bar, progress_bar_percentage(), query_corpus, evalenv::query_corpus, cl::range, _label_entry::ref, reset_reftab(), _avs::right_boundary, evalenv::rp, evalenv::search_context, set_reftab(), cl::size, _Matchlist::start, _Range::start, strict_regions, symbol_table_iterator(), symbol_table_new_iterator(), symtab_debug, _Matchlist::tabsize, Tag, _avs::tag, evalenv::target_label, _Matchlist::target_positions, dfa::TransTable, and _avs::type.
Referenced by check_alignment_constraints(), and simulate_dfa().
void simulate_dfa | ( | int | envidx, |
int | cut, | ||
int | keep_old_ranges | ||
) |
simulate the dfa
References evalenv::aligned, check_alignment_constraints(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), evalenv::dfa, dfa::E_State, eep, _Matchlist::end, Environment, Error, EvaluationIsRunning, dfa::Final, free_matchlist(), evalenv::has_target_indicator, Info, init_matchlist(), initial_matchlist_debug, install_signal_handler(), evalenv::labels, matchfirstpattern(), dfa::Max_Input, dfa::Max_States, new_reftab(), evalenv::patternlist, print_symbol_table(), progress_bar, progress_bar_clear_line(), progress_bar_message(), evalenv::query_corpus, Reduce, reset_reftab(), set_corpus_matchlists(), Setop(), show_matchlist(), show_matchlist_firstelements(), simulate(), cl::size, _Matchlist::start, _Matchlist::tabsize, _Matchlist::target_positions, dfa::TransTable, True, Union, Warning, and which_app.
Referenced by cqp_run_query().