CWB
|
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <stdarg.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include "../cl/globals.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "concordance.h"
#include "cqp.h"
#include "options.h"
#include "output.h"
#include "corpmanag.h"
#include "print-modes.h"
#include "print_align.h"
#include "ascii-print.h"
#include "sgml-print.h"
#include "html-print.h"
#include "latex-print.h"
#include <sys/types.h>
#include <pwd.h>
Functions | |
void | print_corpus_info_header (CorpusList *cl, FILE *stream, PrintMode mode, int force) |
FILE * | open_temporary_file (char *tmp_name_buffer) |
Creates, and opens for text-mode write, a temporary file. More... | |
FILE * | open_file (char *name, char *mode) |
This function is a wrapper round fopen() which provides checks for different shorthands for a "home" directory, such as ~ or $HOME. More... | |
FILE * | open_pager (char *cmd, CorpusCharset charset) |
Create a pipe to a new instance of a specified program to be used as an output pager. More... | |
int | open_stream (struct Redir *rd, CorpusCharset charset) |
Callback handler for SIGPIPE now moved to <cl_broken_pipe> More... | |
int | close_stream (struct Redir *rd) |
Closes the (output) stream within a Redir structure. More... | |
int | open_input_stream (struct InputRedir *rd) |
int | close_input_stream (struct InputRedir *rd) |
void | print_output (CorpusList *cl, FILE *fd, int interactive, ContextDescriptor *cd, int first, int last, PrintMode mode) |
void | catalog_corpus (CorpusList *cl, struct Redir *rd, int first, int last, PrintMode mode) |
Prints a corpus, typically (some of) the matches of a query. More... | |
void | cqpmessage (MessageType type, char *format,...) |
Print a message to output (for instance a debug message). More... | |
void | corpus_info (CorpusList *cl) |
Outputs a blob of information on the mother-corpus of the specified cl. More... | |
void | free_tabulation_list (void) |
free global list of tabulation items (before building new one) More... | |
TabulationItem | new_tabulation_item (void) |
allocate and initialize new tabulation item More... | |
void | append_tabulation_item (TabulationItem item) |
append tabulation item to end of current list More... | |
int | pt_get_anchor_cpos (CorpusList *cl, int n, FieldType anchor, int offset) |
Gets the cpos of one of the "anchors" of a particular query result. More... | |
int | pt_validate_anchor (CorpusList *cl, FieldType anchor) |
int | print_tabulation (CorpusList *cl, int first, int last, struct Redir *rd) |
tabulate specified query result, using settings from global list of tabulation items; return value indicates whether tabulation was successful (otherwise, generates error message) More... | |
Variables | |
TabulationItem | TabulationList = NULL |
Global list of tabulation items for use with the "tabulate" operator. More... | |
void append_tabulation_item | ( | TabulationItem | item | ) |
append tabulation item to end of current list
References _TabulationItem::next, and TabulationList.
void catalog_corpus | ( | CorpusList * | cl, |
struct Redir * | rd, | ||
int | first, | ||
int | last, | ||
PrintMode | mode | ||
) |
Prints a corpus, typically (some of) the matches of a query.
(Not sure why it's called "catalog"; is this a pun on the cat keyword? – AH 2012-07-17) (I suspect that it's a misinterpretation of what "cat" stands for. – SE 2016-07-20)
The query is represented by a subcorpus (cl); only results #first..last; will be printed; use (0,-1) for entire corpus.
cl | The corpus/subcorpus/query to output. |
rd | Block of output redirection info; if NULL, default settings will be used. |
first | Offset of first match to print. |
last | Offset of last match to print. |
mode | Print mode to use. |
References access_corpus(), CD, TCorpus::charset, close_stream(), cl::corpus, cqpmessage(), _Range::end, Error, False, GlobalPrintMode, GlobalPrintOptions, Redir::is_paging, Redir::mode, Redir::name, open_stream(), print_corpus_info_header(), _print_option_rec_::print_header, print_output(), PrintASCII, PrintBINARY, PrintHTML, printNrMatches, cl::range, rangeoutput, cl::size, _Range::start, Redir::stream, True, and verify_context_descriptor().
Referenced by after_CorpusCommand(), and do_cat().
int close_input_stream | ( | struct InputRedir * | rd | ) |
References cl_close_stream(), and InputRedir::stream.
Referenced by do_undump().
int close_stream | ( | struct Redir * | rd | ) |
Closes the (output) stream within a Redir structure.
If output was being sent to a pipe, SIGPIPE is set back to the SIG_IGN handler.
rd | The Redir stream to close. |
References cl_close_stream(), Redir::is_paging, and Redir::stream.
Referenced by catalog_corpus(), corpus_info(), do_dump(), print_group(), print_tabulation(), PrintContextDescriptor(), and SortSubcorpus().
void corpus_info | ( | CorpusList * | cl | ) |
Outputs a blob of information on the mother-corpus of the specified cl.
References ascii, buf, TCorpus::charset, cl_charset_name(), cl_corpus_property(), cl_first_corpus_property(), CL_MAX_LINE_LENGTH, cl_next_corpus_property(), close_stream(), cl::corpus, corpus_info(), cqpmessage(), findcorpus(), Info, TCorpus::info_file, cl::mother_name, cl::mother_size, cl::name, open_file(), open_stream(), TCorpusProperty::property, Redir::stream, SYSTEM, cl::type, unknown_charset, TCorpusProperty::value, and Warning.
Referenced by corpus_info(), and do_info().
void cqpmessage | ( | MessageType | type, |
char * | format, | ||
... | |||
) |
Print a message to output (for instance a debug message).
type | Specifies what type of message (messages of some types are not always printed) |
format | Format string (and ...) are passed as arguments to vfprintf(). |
References Error, Info, Message, silent, verbose_parser, and Warning.
Referenced by ActivateCorpus(), add_host_to_list(), add_hosts_in_subnet_to_list(), add_user_to_list(), addHistoryLine(), after_CorpusSetExpr(), attach_subcorpus(), calculate_initial_matchlist_1(), call_predefined_function(), catalog_corpus(), check_alignment_constraints(), check_labels(), compute_grouping(), ComputeGroupExternally(), ComputeGroupInternally(), ComputePrintStructures(), copy_intervals(), corpus_info(), cqp_parse_string(), cqp_run_mu_query(), define_macro(), do_AddSubVariables(), do_AnchorPoint(), do_attribute_show(), do_cat(), do_cut(), do_delete_lines(), do_delete_lines_num(), do_Description(), do_dump(), do_exec(), do_flagged_re_variable(), do_flagged_string(), do_IDReference(), do_LabelReference(), do_MUQuery(), do_mval_string(), do_NamedWfPattern(), do_OptDistance(), do_printVariableSize(), do_PrintVariableValue(), do_reduce(), do_RelExpr(), do_save(), do_SearchPattern(), do_setop(), do_SetVariableValue(), do_SimpleVariableReference(), do_StandardQuery(), do_StringConstraint(), do_StructuralContext(), do_timing(), do_translate(), do_undump(), do_WordformPattern(), do_XMLTag(), duplicate_corpus(), ensure_corpus_size(), eval_bool(), evaluate_subset(), evaluate_target(), execute_side_effects(), expand_dataspace(), expand_macro(), field_type_to_name(), findcorpus(), FunctionCall(), get_leaf_value(), in_CorpusCommand(), in_UnnamedCorpusCommand(), init_macros(), initialize_cqp(), install_signal_handler(), load_corpusnames(), load_macro_file(), MacroHashAdd(), MacroHashDelete(), MacroHashLookup(), matchfirstpattern(), mval_string_conversion(), open_input_stream(), open_stream(), OptimizeStringConstraint(), prepare_AlignmentConstraints(), prepare_do_subset(), prepare_parse(), prepare_Query(), print_concordance_line(), print_group(), print_macro_definition(), print_output(), print_tabulation(), pt_validate_anchor(), push_regchr(), RangeSort(), save_subcorpus(), save_unsaved_subcorpora(), set_reftab(), set_target(), SetVariableValue(), simulate(), simulate_dfa(), SortExternally(), SortSubcorpus(), SortSubcorpusRandomize(), string_to_strategy(), Varref2IDList(), verify_context_descriptor(), VerifyVariable(), and yy_input_char().
void free_tabulation_list | ( | void | ) |
free global list of tabulation items (before building new one)
References _TabulationItem::attribute_name, cl_free, _TabulationItem::next, and TabulationList.
Referenced by print_tabulation().
TabulationItem new_tabulation_item | ( | void | ) |
allocate and initialize new tabulation item
References _TabulationItem::anchor1, _TabulationItem::anchor2, ATT_NONE, _TabulationItem::attribute, _TabulationItem::attribute_name, _TabulationItem::attribute_type, cl_malloc(), _TabulationItem::flags, _TabulationItem::next, NoField, _TabulationItem::offset1, and _TabulationItem::offset2.
FILE* open_file | ( | char * | name, |
char * | mode | ||
) |
This function is a wrapper round fopen() which provides checks for different shorthands for a "home" directory, such as ~ or $HOME.
Its arguments and return values are the same as fopen().
TODO: The function is retained for backward compatibility. Its use should be replaced by cl_open_stream() with automagic, but care has to be taken to change the corresponding fclose() calls to cl_close_stream().
References CL_MAX_FILENAME_LENGTH.
Referenced by addHistoryLine(), attach_subcorpus(), check_stamp(), corpus_info(), do_exec(), parse_options(), and save_subcorpus().
int open_input_stream | ( | struct InputRedir * | rd | ) |
References cl_close_stream(), cl_errno, cl_error_string(), cl_free, cl_malloc(), cl_open_stream(), CL_STREAM_MAGIC, CL_STREAM_MAGIC_NOPIPE, CL_STREAM_PIPE, CL_STREAM_READ, CL_STREAM_STDIO, cqpmessage(), Error, insecure, InputRedir::name, and InputRedir::stream.
Referenced by do_undump().
FILE* open_pager | ( | char * | cmd, |
CorpusCharset | charset | ||
) |
Create a pipe to a new instance of a specified program to be used as an output pager.
If cmd is different from the program specified in the global variable "tested_pager", run a test first.
This would normally be something like "more" or "less".
cmd | Program command to start pager procress. |
charset | Charset to which to set the pager-charset-environment variable |
References ascii, cl_free, cl_open_stream(), cl_strdup(), CL_STREAM_PIPE, CL_STREAM_WRITE, less_charset_variable, tested_pager, and utf8.
Referenced by open_stream().
int open_stream | ( | struct Redir * | rd, |
CorpusCharset | charset | ||
) |
Callback handler for SIGPIPE now moved to <cl_broken_pipe>
int broken_pipe = 0;
static void bp_signal_handler(int signum) { #ifndef MINGW broken_pipe = 1; if (signal(SIGPIPE, bp_signal_handler) == SIG_ERR) perror("Can't reinstall signal handler for broken pipe"); #endif } Open the (output) stream within a Redir(ection) structure.
If output is sent to a pipe, a signal handler for SIGPIPE is automatically installed and configured to set the global variable broken_pipe to True. Output functions should check this variable and abort if it is set. The signal handler is uninstalled when close_pipe is called, which may lead to undesired behaviour if multiple streams are open at the same time.
rd | Redir structure to be opened. |
charset | The charset to be used. Only has an effect if the stream to be opened is to an output pager. |
References cl_close_stream(), cl_errno, cl_error_string(), cl_open_stream(), cl_strdup(), CL_STREAM_APPEND, CL_STREAM_MAGIC, CL_STREAM_MAGIC_NOPIPE, CL_STREAM_STDIO, CL_STREAM_WRITE, CQP_FALLBACK_PAGER, cqpmessage(), Error, False, insecure, Redir::is_paging, Redir::mode, mode, Redir::name, open_pager(), pager, paging, set_integer_option_value(), set_string_option_value(), Redir::stream, True, and Warning.
Referenced by catalog_corpus(), corpus_info(), do_dump(), print_group(), print_tabulation(), PrintContextDescriptor(), and SortSubcorpus().
FILE* open_temporary_file | ( | char * | tmp_name_buffer | ) |
Creates, and opens for text-mode write, a temporary file.
Temporary files have the prefix "$PID.cqpt." (where $PID = the process ID of this copy of CQP) and are placed in the directory defined as TEMPDIR_PATH.
tmp_nam_buffer | A pre-allocated buffer which will be overwritten with the name of the temporary file. This should be at least TEMP_FILENAME_BUFSIZE bytes in size. If opening is unsuccessful, this will be set to "". |
References cl_free, TEMP_FILENAME_BUFSIZE, and TEMPDIR_PATH.
Referenced by ComputeGroupExternally(), and SortExternally().
void print_corpus_info_header | ( | CorpusList * | cl, |
FILE * | stream, | ||
PrintMode | mode, | ||
int | force | ||
) |
void print_output | ( | CorpusList * | cl, |
FILE * | fd, | ||
int | interactive, | ||
ContextDescriptor * | cd, | ||
int | first, | ||
int | last, | ||
PrintMode | mode | ||
) |
References ascii_print_output(), cqpmessage(), Error, html_print_output(), latex_print_output(), PrintASCII, PrintHTML, PrintLATEX, PrintSGML, and sgml_print_output().
Referenced by catalog_corpus().
int print_tabulation | ( | CorpusList * | cl, |
int | first, | ||
int | last, | ||
struct Redir * | rd | ||
) |
tabulate specified query result, using settings from global list of tabulation items; return value indicates whether tabulation was successful (otherwise, generates error message)
References _TabulationItem::anchor1, _TabulationItem::anchor2, ATT_NONE, ATT_POS, ATT_STRUC, _TabulationItem::attribute, _TabulationItem::attribute_name, _TabulationItem::attribute_type, CDA_CPOSUNDEF, TCorpus::charset, cl_broken_pipe, cl_cpos2str(), cl_cpos2struc2str(), cl_free, cl_new_attribute, cl_strdup(), cl_string_canonical(), cl_struc_values(), close_stream(), cl::corpus, cqpmessage(), Error, _TabulationItem::flags, free_tabulation_list(), last, cl::name, _TabulationItem::next, _TabulationItem::offset1, _TabulationItem::offset2, open_stream(), pt_get_anchor_cpos(), pt_validate_anchor(), cl::size, Redir::stream, and TabulationList.
int pt_get_anchor_cpos | ( | CorpusList * | cl, |
int | n, | ||
FieldType | anchor, | ||
int | offset | ||
) |
Gets the cpos of one of the "anchors" of a particular query result.
Used for tabulation.
cl | The query being tabulated. |
n | The number of the match we are requesting an anchor for (where first match is 0). |
anchor | Which of the anchors of the query match we are requesting. |
offset | Integer offset from the anchor that we are requesting. |
References CDA_CPOSUNDEF, _Range::end, KeywordField, cl::keywords, MatchEndField, MatchField, cl::mother_size, NoField, cl::range, cl::sortidx, _Range::start, TargetField, and cl::targets.
Referenced by print_tabulation().
int pt_validate_anchor | ( | CorpusList * | cl, |
FieldType | anchor | ||
) |
References cqpmessage(), Error, KeywordField, cl::keywords, MatchEndField, MatchField, cl::name, NoField, cl::range, TargetField, and cl::targets.
Referenced by print_tabulation().
TabulationItem TabulationList = NULL |
Global list of tabulation items for use with the "tabulate" operator.
Referenced by append_tabulation_item(), free_tabulation_list(), and print_tabulation().