CWB
Functions | Variables
parse_actions.c File Reference
#include "parse_actions.h"
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdarg.h>
#include <unistd.h>
#include "../cl/globals.h"
#include "../cl/special-chars.h"
#include "../cl/attributes.h"
#include "../cl/macros.h"
#include "cqp.h"
#include "options.h"
#include "ranges.h"
#include "symtab.h"
#include "treemacros.h"
#include "tree.h"
#include "eval.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "builtins.h"
#include "groups.h"
#include "targets.h"
#include "attlist.h"
#include "concordance.h"
#include "output.h"
#include "print-modes.h"
#include "variables.h"

Functions

char * mval_string_conversion (char *s)
 
void addHistoryLine (void)
 Add a line of CQP input to the history file. More...
 
void resetQueryBuffer (void)
 Empties the query buffer and sets to 0 the pointer. More...
 
void RaiseError (void)
 
void prepare_parse (void)
 
CorpusListin_CorpusCommand (char *id, CorpusList *cl)
 
void after_CorpusCommand (CorpusList *cl)
 Set the current corpus and do the output if it was a query. More...
 
CorpusListin_UnnamedCorpusCommand (CorpusList *cl)
 This function is called after an UnnamedCorpusCommand rule is parsed. More...
 
CorpusListActivateCorpus (CorpusList *cl)
 
CorpusListafter_CorpusSetExpr (CorpusList *cl)
 
void prepare_Query ()
 This function sets things up to run a query. More...
 
CorpusListafter_Query (CorpusList *cl)
 
void do_cat (CorpusList *cl, struct Redir *r, int first, int last)
 
void do_save (CorpusList *cl, struct Redir *r)
 
void do_attribute_show (char *name, int status)
 
CorpusListdo_translate (CorpusList *source, char *target_name)
 
CorpusListdo_setop (RangeSetOp op, CorpusList *c1, CorpusList *c2)
 
void prepare_do_subset (CorpusList *cl, FieldType field)
 
CorpusListdo_subset (FieldType field, Constrainttree boolt)
 
void do_set_target (CorpusList *cl, FieldType goal, FieldType source)
 
void do_set_complex_target (CorpusList *cl, FieldType field_to_set, SearchStrategy strategy, Constrainttree boolt, enum ctxtdir direction, int number, char *id, FieldType field, int inclusive)
 
void do_sleep (int duration)
 Puts the program to sleep. More...
 
void do_exec (char *fname)
 Execute the commands contained within a specified text file. More...
 
void do_delete_lines_num (CorpusList *cl, int start, int end)
 
void do_delete_lines (CorpusList *cl, FieldType f, int mode)
 
void do_reduce (CorpusList *cl, int number, int percent)
 
void do_cut (CorpusList *cl, int first, int last)
 
void do_info (CorpusList *cl)
 
void do_group (CorpusList *cl, FieldType target, int target_offset, char *t_att, FieldType source, int source_offset, char *s_att, int cut, int expand, int is_grouped, struct Redir *redir)
 
void do_group2 (CorpusList *cl, FieldType target, int target_offset, char *t_att, int cut, int expand, struct Redir *r)
 Like do_group, but with no source. More...
 
CorpusListdo_StandardQuery (int cut_value, int keep_flag)
 
CorpusListdo_MUQuery (Evaltree evalt, int keep_flag, int cut_value)
 
void do_SearchPattern (Evaltree expr, Constrainttree constraint)
 
Evaltree reg_disj (Evaltree left, Evaltree right)
 
Evaltree reg_seq (Evaltree left, Evaltree right)
 
int do_AnchorPoint (FieldType field, int is_closing)
 
int do_XMLTag (char *s_name, int is_closing, int op, char *regex, int flags)
 
int do_NamedWfPattern (int is_target, char *label, int pat_idx)
 
int do_WordformPattern (Constrainttree boolt, int lookahead)
 
Constrainttree OptimizeStringConstraint (Constrainttree left, enum b_ops op, Constrainttree right)
 
Constrainttree do_StringConstraint (char *s, int flags)
 
Constrainttree Varref2IDList (Attribute *attr, enum b_ops op, char *varName)
 
Constrainttree do_SimpleVariableReference (char *varName)
 
void prepare_AlignmentConstraints (char *id)
 
Constrainttree bool_or (Constrainttree left, Constrainttree right)
 
Constrainttree bool_implies (Constrainttree left, Constrainttree right)
 
Constrainttree bool_and (Constrainttree left, Constrainttree right)
 
Constrainttree bool_not (Constrainttree left)
 
Constrainttree do_RelExpr (Constrainttree left, enum b_ops op, Constrainttree right)
 
Constrainttree do_RelExExpr (Constrainttree left)
 
Constrainttree do_LabelReference (char *label_name, int auto_delete)
 
Constrainttree do_IDReference (char *id_name, int auto_delete)
 
Constrainttree do_flagged_re_variable (char *varname, int flags)
 Implements expansion of a variable within the RE() operator. More...
 
Constrainttree do_flagged_string (char *s, int flags)
 
Constrainttree do_mval_string (char *s, int op, int flags)
 
Constrainttree FunctionCall (char *f_name, ActualParamList *apl)
 
void do_Description (Context *context, int nr, char *name)
 
Evaltree do_MeetStatement (Evaltree left, Evaltree right, Context *context)
 
Evaltree do_UnionStatement (Evaltree left, Evaltree right)
 
void do_StructuralContext (Context *context, char *name)
 
CorpusListdo_TABQuery (Evaltree patterns)
 
Evaltree make_first_tabular_pattern (int pattern_index, Evaltree next)
 
Evaltree add_tabular_pattern (Evaltree patterns, Context *context, int pattern_index)
 
void do_OptDistance (Context *context, int l_bound, int u_bound)
 
void printSingleVariableValue (Variable v, int max_items)
 Prints the setting of a single Variable as an indented list. More...
 
void do_PrintAllVariables ()
 
void do_PrintVariableValue (char *varName)
 
void do_printVariableSize (char *varName)
 
void do_SetVariableValue (char *varName, char operator, char *varValues)
 
void do_AddSubVariables (char *var1Name, int add, char *var2Name)
 
void prepare_input (void)
 Get ready to parse a command. More...
 
void expand_dataspace (CorpusList *cl)
 Expand the dataspace of a subcorpus. More...
 
void push_regchr (char c)
 Add a character (in the sense of a byte) to the regex_string buffer. More...
 
void debug_output (void)
 Prints out all the existing EvalEnvironments in the global array. More...
 
void do_start_timer (void)
 Starts the timer running. More...
 
void do_timing (char *msg)
 Shows the period since the timer started running. More...
 
void do_size (CorpusList *cl, FieldType field)
 
void do_dump (CorpusList *cl, int first, int last, struct Redir *rd)
 Dump query result (or part of it) as TAB-delimited table of corpus positions. More...
 
int do_undump (char *corpname, int extension_fields, int sort_ranges, struct InputRedir *rd)
 read TAB-delimited table of corpus positions and create named query result from it. More...
 

Variables

int generate_code
 TODO would be very useful to have a desc for this. More...
 
int within_gc
 TODO would be very useful to have a desc for this ; seems to be about whether or not we are within a global constraint. More...
 
CYCtype last_cyc
 type of last corpus yielding command More...
 
CorpusListquery_corpus = NULL
 The corpus (or subcorpus) which is "active" in the sense that the query will be executed within it. More...
 
CorpusListold_query_corpus = NULL
 Used for preserving former values of query_corpus (. More...
 
int catch_unknown_ids = 0
 
Context expansion
 This is used by the parser in response to CQP's "expand" operator, which incorporates context around the query hit into the match itself. More...
 
char regex_string [CL_MAX_LINE_LENGTH]
 Buffer for storing regex strings. More...
 
int regex_string_pos
 index into the regex string buffer, storing a current position. More...
 
int sslen
 length of search string: is written to by evaltree2searchstr() but then seems never to be read. More...
 
struct timeval timer_start_time
 Global variable for timing functions; not exported. More...
 

Function Documentation

CorpusList* ActivateCorpus ( CorpusList cl)
Evaltree add_tabular_pattern ( Evaltree  patterns,
Context context,
int  pattern_index 
)
void addHistoryLine ( void  )

Add a line of CQP input to the history file.

Supports parser rule: line -> command

The line that is added comes from QueryBuffer; the file it is written to is that named in cqp_history_file.

See also
QueryBuffer
cqp_history_file

References cqp_history_file, cqpmessage(), Error, open_file(), QueryBuffer, reading_cqprc, silent, and write_history_file.

void after_CorpusCommand ( CorpusList cl)
CorpusList* after_CorpusSetExpr ( CorpusList cl)
CorpusList* after_Query ( CorpusList cl)
Constrainttree bool_and ( Constrainttree  left,
Constrainttree  right 
)
Constrainttree bool_implies ( Constrainttree  left,
Constrainttree  right 
)
Constrainttree bool_not ( Constrainttree  left)
Constrainttree bool_or ( Constrainttree  left,
Constrainttree  right 
)
void debug_output ( void  )

Prints out all the existing EvalEnvironments in the global array.

See also
Environment

References eep, and show_environment().

Referenced by do_MUQuery(), do_StandardQuery(), and do_TABQuery().

void do_AddSubVariables ( char *  var1Name,
int  add,
char *  var2Name 
)
int do_AnchorPoint ( FieldType  field,
int  is_closing 
)
void do_attribute_show ( char *  name,
int  status 
)
void do_cat ( CorpusList cl,
struct Redir r,
int  first,
int  last 
)
void do_cut ( CorpusList cl,
int  first,
int  last 
)
void do_delete_lines ( CorpusList cl,
FieldType  f,
int  mode 
)
void do_delete_lines_num ( CorpusList cl,
int  start,
int  end 
)
void do_Description ( Context context,
int  nr,
char *  name 
)
void do_dump ( CorpusList cl,
int  first,
int  last,
struct Redir rd 
)

Dump query result (or part of it) as TAB-delimited table of corpus positions.

Parameters
clThe result (as a subcorpus, naturally)
firstWhere in the result to begin dumping (index of cl->range)
lastWhere in the result to end dumping (index of cl->range)
rdPointer to a Redir structure which contains information about where to dump to.

References TCorpus::charset, cl_broken_pipe, close_stream(), cl::corpus, cqpmessage(), _Range::end, Error, cl::keywords, open_stream(), cl::range, cl::size, cl::sortidx, _Range::start, Redir::stream, and cl::targets.

void do_exec ( char *  fname)

Execute the commands contained within a specified text file.

References cqp_parse_file(), cqpmessage(), Error, generate_code, Message, and open_file().

Constrainttree do_flagged_re_variable ( char *  varname,
int  flags 
)
Constrainttree do_flagged_string ( char *  s,
int  flags 
)
void do_group ( CorpusList cl,
FieldType  target,
int  target_offset,
char *  t_att,
FieldType  source,
int  source_offset,
char *  s_att,
int  cut,
int  expand,
int  is_grouped,
struct Redir redir 
)
void do_group2 ( CorpusList cl,
FieldType  target,
int  target_offset,
char *  t_att,
int  cut,
int  expand,
struct Redir r 
)

Like do_group, but with no source.

References compute_grouping(), do_start_timer(), do_timing(), free_group(), NoField, and print_group().

Constrainttree do_IDReference ( char *  id_name,
int  auto_delete 
)
void do_info ( CorpusList cl)

References corpus_info().

Constrainttree do_LabelReference ( char *  label_name,
int  auto_delete 
)
Evaltree do_MeetStatement ( Evaltree  left,
Evaltree  right,
Context context 
)
CorpusList* do_MUQuery ( Evaltree  evalt,
int  keep_flag,
int  cut_value 
)
Constrainttree do_mval_string ( char *  s,
int  op,
int  flags 
)
int do_NamedWfPattern ( int  is_target,
char *  label,
int  pat_idx 
)
void do_OptDistance ( Context context,
int  l_bound,
int  u_bound 
)
void do_PrintAllVariables ( )
void do_printVariableSize ( char *  varName)
void do_PrintVariableValue ( char *  varName)
void do_reduce ( CorpusList cl,
int  number,
int  percent 
)
Constrainttree do_RelExExpr ( Constrainttree  left)
Constrainttree do_RelExpr ( Constrainttree  left,
enum b_ops  op,
Constrainttree  right 
)
void do_save ( CorpusList cl,
struct Redir r 
)
void do_SearchPattern ( Evaltree  expr,
Constrainttree  constraint 
)
void do_set_complex_target ( CorpusList cl,
FieldType  field_to_set,
SearchStrategy  strategy,
Constrainttree  boolt,
enum ctxtdir  direction,
int  number,
char *  id,
FieldType  field,
int  inclusive 
)
void do_set_target ( CorpusList cl,
FieldType  goal,
FieldType  source 
)

References NoField, and set_target().

CorpusList* do_setop ( RangeSetOp  op,
CorpusList c1,
CorpusList c2 
)
void do_SetVariableValue ( char *  varName,
char  operator,
char *  varValues 
)
Constrainttree do_SimpleVariableReference ( char *  varName)
void do_size ( CorpusList cl,
FieldType  field 
)
void do_sleep ( int  duration)

Puts the program to sleep.

A wrapper round the standard sleep() function (or Sleep() in Windows).

Parameters
durationHow many seconds to sleep for.
CorpusList* do_StandardQuery ( int  cut_value,
int  keep_flag 
)
void do_start_timer ( void  )

Starts the timer running.

References timer_start_time, and timing.

Referenced by do_group(), do_group2(), do_MUQuery(), do_StandardQuery(), and do_TABQuery().

Constrainttree do_StringConstraint ( char *  s,
int  flags 
)
void do_StructuralContext ( Context context,
char *  name 
)
CorpusList* do_subset ( FieldType  field,
Constrainttree  boolt 
)
CorpusList* do_TABQuery ( Evaltree  patterns)
void do_timing ( char *  msg)

Shows the period since the timer started running.

Parameters
msgA message to print along with the reading from the timer.

References cqpmessage(), Info, timer_start_time, and timing.

Referenced by do_group(), do_group2(), and in_UnnamedCorpusCommand().

CorpusList* do_translate ( CorpusList source,
char *  target_name 
)
int do_undump ( char *  corpname,
int  extension_fields,
int  sort_ranges,
struct InputRedir rd 
)

read TAB-delimited table of corpus positions and create named query result from it.

acceptable values for extension_fields and corresponding row formats: 0 = match matchend 1 = match matchend target 2 = match matchend target keyword

References assign_temp_to_sub(), cl_free, cl_malloc(), CL_MAX_LINE_LENGTH, close_input_stream(), cqpmessage(), current_corpus, drop_temp_corpora(), Error, findcorpus(), is_qualified(), line, make_temp_corpus(), cl::mother_name, cl::mother_size, new, open_input_stream(), RangeSort(), split_subcorpus_name(), InputRedir::stream, SYSTEM, cl::type, valid_subcorpus_name(), and Warning.

Evaltree do_UnionStatement ( Evaltree  left,
Evaltree  right 
)
int do_WordformPattern ( Constrainttree  boolt,
int  lookahead 
)
int do_XMLTag ( char *  s_name,
int  is_closing,
int  op,
char *  regex,
int  flags 
)
void expand_dataspace ( CorpusList cl)

Expand the dataspace of a subcorpus.

This is done, e.g., by the CQP-syntax "expand" command, to include context into the matches found by a query.

Each corpus interval stored in the CorpusList is extended by an amount, and in a direction, dependant on the information in the global variable "expansion", a Context object (information which has been put there by the parser).

See also
expansion
Parameters
clThe subcorpus to expand.

References calculate_leftboundary(), calculate_rightboundary(), cqpmessage(), ctxtsp::direction, _Range::end, False, left, leftright, cl::needs_update, cl::range, RangeSetop(), right, RUniq, cl::saved, cl::size, ctxtsp::size, _Range::start, SYSTEM, True, cl::type, and Warning.

Referenced by in_UnnamedCorpusCommand().

Constrainttree FunctionCall ( char *  f_name,
ActualParamList apl 
)
CorpusList* in_CorpusCommand ( char *  id,
CorpusList cl 
)
CorpusList* in_UnnamedCorpusCommand ( CorpusList cl)

This function is called after an UnnamedCorpusCommand rule is parsed.

Seems to be a tidying-up function.

Parameters
clThe result of the corpus-yielding command (first component of this syntax rule).
Returns
Modified valuse of cl. May be NULL.

References Activation, assign_temp_to_sub(), cqpmessage(), do_timing(), drop_temp_corpora(), expand_dataspace(), free_environments(), generate_code, last_cyc, make_temp_corpus(), Message, Query, SetOperation, ctxtsp::size, SYSTEM, TEMP, cl::type, and Warning.

Evaltree make_first_tabular_pattern ( int  pattern_index,
Evaltree  next 
)
char * mval_string_conversion ( char *  s)

References cl_malloc(), cqpmessage(), Error, and generate_code.

Referenced by do_mval_string(), and do_XMLTag().

Constrainttree OptimizeStringConstraint ( Constrainttree  left,
enum b_ops  op,
Constrainttree  right 
)
void prepare_AlignmentConstraints ( char *  id)
void prepare_do_subset ( CorpusList cl,
FieldType  field 
)
void prepare_input ( void  )

Get ready to parse a command.

This function is called before the processing of each parsed line that is recognised as a command.

Mostly it involves setting the global variables to their starting-state values.

References free_environments(), generate_code, last_cyc, LastExpression, NoExpression, regex_string_pos, and searchstr.

void prepare_parse ( void  )
void prepare_Query ( )

This function sets things up to run a query.

It is called as an "action" before any detected Query in the parser.

[AH 2010/8/2: I have added the code checking input character encoding. Anything that is not part of a query should be plain ASCII - if not, then the lexer/parser should pick it up as bad. Filenames, etc. are obvious exceptions - but we can't check the encoding of those, because there's no guarantee it will be the same as that of the corpus, which is the only thing whose encoding we know. So it's up to the user to type filenames in an encoding their OS will accept! Canonicalisation is done within the CL_Regex, not here.]

References access_corpus(), TCorpus::charset, cl_string_validate_encoding(), cl::corpus, cqpmessage(), CurEnv, current_corpus, eep, Environment, Error, generate_code, make_temp_corpus(), cl::mother_name, cl::name, next_environment(), query_corpus, evalenv::query_corpus, QueryBuffer, RangeSetop(), RNonOverlapping, searchstr, cl::size, Warning, and within_gc.

void printSingleVariableValue ( Variable  v,
int  max_items 
)
void push_regchr ( char  c)

Add a character (in the sense of a byte) to the regex_string buffer.

Doesn't seem to currently be in use.

See also
regex_string

References CL_MAX_LINE_LENGTH, cqpmessage(), regex_string, regex_string_pos, and Warning.

void RaiseError ( void  )
Evaltree reg_disj ( Evaltree  left,
Evaltree  right 
)
Evaltree reg_seq ( Evaltree  left,
Evaltree  right 
)
void resetQueryBuffer ( void  )

Empties the query buffer and sets to 0 the pointer.

Supports parser rule: line -> command

See also
QueryBuffer
QueryBufferP

References QueryBuffer, QueryBufferOverflow, and QueryBufferP.

Referenced by RaiseError().

Constrainttree Varref2IDList ( Attribute attr,
enum b_ops  op,
char *  varName 
)

Variable Documentation

int catch_unknown_ids = 0
Context expansion

This is used by the parser in response to CQP's "expand" operator, which incorporates context around the query hit into the match itself.

Functions involved in carrying this out utilise info stored here by the parser.

Referenced by findcorpus().

int generate_code
CYCtype last_cyc
CorpusList* old_query_corpus = NULL

Used for preserving former values of query_corpus (.

See also
query_corpus), so it can be reset to a former value).

Referenced by do_set_complex_target(), and prepare_parse().

CorpusList* query_corpus = NULL

The corpus (or subcorpus) which is "active" in the sense that the query will be executed within it.

Referenced by ActivateCorpus(), do_subset(), prepare_Query(), and simulate().

char regex_string[CL_MAX_LINE_LENGTH]

Buffer for storing regex strings.

As it says on the tin.

TODO Doesn't seem currently to be in use anywhere, except in one func which itself is not used.

Referenced by push_regchr().

int regex_string_pos

index into the regex string buffer, storing a current position.

@ see regex_string_pos

Referenced by prepare_input(), and push_regchr().

int sslen

length of search string: is written to by evaltree2searchstr() but then seems never to be read.

TODO .

Referenced by do_SearchPattern().

struct timeval timer_start_time

Global variable for timing functions; not exported.

See also
do_start_timer
do_timing

Referenced by do_start_timer(), and do_timing().

int within_gc

TODO would be very useful to have a desc for this ; seems to be about whether or not we are within a global constraint.

Referenced by after_Query(), do_IDReference(), and prepare_Query().