CWB
Functions | Variables
output.c File Reference
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <stdarg.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include "../cl/globals.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "concordance.h"
#include "cqp.h"
#include "options.h"
#include "output.h"
#include "corpmanag.h"
#include "print-modes.h"
#include "print_align.h"
#include "ascii-print.h"
#include "sgml-print.h"
#include "html-print.h"
#include "latex-print.h"
#include <sys/types.h>
#include <pwd.h>

Functions

void print_corpus_info_header (CorpusList *cl, FILE *stream, PrintMode mode, int force)
 
FILE * open_temporary_file (char *tmp_name_buffer)
 Creates, and opens for text-mode write, a temporary file. More...
 
FILE * open_file (char *name, char *mode)
 This function is a wrapper round fopen() which provides checks for different shorthands for a "home" directory, such as ~ or $HOME. More...
 
FILE * open_pager (char *cmd, CorpusCharset charset)
 Create a pipe to a new instance of a specified program to be used as an output pager. More...
 
int open_stream (struct Redir *rd, CorpusCharset charset)
 Callback handler for SIGPIPE now moved to <cl_broken_pipe> More...
 
int close_stream (struct Redir *rd)
 Closes the (output) stream within a Redir structure. More...
 
int open_input_stream (struct InputRedir *rd)
 
int close_input_stream (struct InputRedir *rd)
 
void print_output (CorpusList *cl, FILE *fd, int interactive, ContextDescriptor *cd, int first, int last, PrintMode mode)
 
void catalog_corpus (CorpusList *cl, struct Redir *rd, int first, int last, PrintMode mode)
 Prints a corpus, typically (some of) the matches of a query. More...
 
void cqpmessage (MessageType type, char *format,...)
 Print a message to output (for instance a debug message). More...
 
void corpus_info (CorpusList *cl)
 Outputs a blob of information on the mother-corpus of the specified cl. More...
 
void free_tabulation_list (void)
 free global list of tabulation items (before building new one) More...
 
TabulationItem new_tabulation_item (void)
 allocate and initialize new tabulation item More...
 
void append_tabulation_item (TabulationItem item)
 append tabulation item to end of current list More...
 
int pt_get_anchor_cpos (CorpusList *cl, int n, FieldType anchor, int offset)
 Gets the cpos of one of the "anchors" of a particular query result. More...
 
int pt_validate_anchor (CorpusList *cl, FieldType anchor)
 
int print_tabulation (CorpusList *cl, int first, int last, struct Redir *rd)
 tabulate specified query result, using settings from global list of tabulation items; return value indicates whether tabulation was successful (otherwise, generates error message) More...
 

Variables

TabulationItem TabulationList = NULL
 Global list of tabulation items for use with the "tabulate" operator. More...
 

Function Documentation

void append_tabulation_item ( TabulationItem  item)

append tabulation item to end of current list

References _TabulationItem::next, and TabulationList.

void catalog_corpus ( CorpusList cl,
struct Redir rd,
int  first,
int  last,
PrintMode  mode 
)

Prints a corpus, typically (some of) the matches of a query.

(Not sure why it's called "catalog"; is this a pun on the cat keyword? – AH 2012-07-17) (I suspect that it's a misinterpretation of what "cat" stands for. – SE 2016-07-20)

The query is represented by a subcorpus (cl); only results #first..last; will be printed; use (0,-1) for entire corpus.

Parameters
clThe corpus/subcorpus/query to output.
rdBlock of output redirection info; if NULL, default settings will be used.
firstOffset of first match to print.
lastOffset of last match to print.
modePrint mode to use.

References access_corpus(), CD, TCorpus::charset, close_stream(), cl::corpus, cqpmessage(), _Range::end, Error, False, GlobalPrintMode, GlobalPrintOptions, Redir::is_paging, Redir::mode, Redir::name, open_stream(), print_corpus_info_header(), _print_option_rec_::print_header, print_output(), PrintASCII, PrintBINARY, PrintHTML, printNrMatches, cl::range, rangeoutput, cl::size, _Range::start, Redir::stream, True, and verify_context_descriptor().

Referenced by after_CorpusCommand(), and do_cat().

int close_input_stream ( struct InputRedir rd)

References cl_close_stream(), and InputRedir::stream.

Referenced by do_undump().

int close_stream ( struct Redir rd)

Closes the (output) stream within a Redir structure.

If output was being sent to a pipe, SIGPIPE is set back to the SIG_IGN handler.

Parameters
rdThe Redir stream to close.
Returns
True for all OK, false if closing did not work. If rd does not actually have an open stream, nothing is done, and that counts as a success.

References cl_close_stream(), Redir::is_paging, and Redir::stream.

Referenced by catalog_corpus(), corpus_info(), do_dump(), print_group(), print_tabulation(), PrintContextDescriptor(), and SortSubcorpus().

void corpus_info ( CorpusList cl)
void cqpmessage ( MessageType  type,
char *  format,
  ... 
)

Print a message to output (for instance a debug message).

See also
MessageType
Parameters
typeSpecifies what type of message (messages of some types are not always printed)
formatFormat string (and ...) are passed as arguments to vfprintf().

References Error, Info, Message, silent, verbose_parser, and Warning.

Referenced by ActivateCorpus(), add_host_to_list(), add_hosts_in_subnet_to_list(), add_user_to_list(), addHistoryLine(), after_CorpusSetExpr(), attach_subcorpus(), calculate_initial_matchlist_1(), call_predefined_function(), catalog_corpus(), check_alignment_constraints(), check_labels(), compute_grouping(), ComputeGroupExternally(), ComputeGroupInternally(), ComputePrintStructures(), copy_intervals(), corpus_info(), cqp_parse_string(), cqp_run_mu_query(), define_macro(), do_AddSubVariables(), do_AnchorPoint(), do_attribute_show(), do_cat(), do_cut(), do_delete_lines(), do_delete_lines_num(), do_Description(), do_dump(), do_exec(), do_flagged_re_variable(), do_flagged_string(), do_IDReference(), do_LabelReference(), do_MUQuery(), do_mval_string(), do_NamedWfPattern(), do_OptDistance(), do_printVariableSize(), do_PrintVariableValue(), do_reduce(), do_RelExpr(), do_save(), do_SearchPattern(), do_setop(), do_SetVariableValue(), do_SimpleVariableReference(), do_StandardQuery(), do_StringConstraint(), do_StructuralContext(), do_timing(), do_translate(), do_undump(), do_WordformPattern(), do_XMLTag(), duplicate_corpus(), ensure_corpus_size(), eval_bool(), evaluate_subset(), evaluate_target(), execute_side_effects(), expand_dataspace(), expand_macro(), field_type_to_name(), findcorpus(), FunctionCall(), get_leaf_value(), in_CorpusCommand(), in_UnnamedCorpusCommand(), init_macros(), initialize_cqp(), install_signal_handler(), load_corpusnames(), load_macro_file(), MacroHashAdd(), MacroHashDelete(), MacroHashLookup(), matchfirstpattern(), mval_string_conversion(), open_input_stream(), open_stream(), OptimizeStringConstraint(), prepare_AlignmentConstraints(), prepare_do_subset(), prepare_parse(), prepare_Query(), print_concordance_line(), print_group(), print_macro_definition(), print_output(), print_tabulation(), pt_validate_anchor(), push_regchr(), RangeSort(), save_subcorpus(), save_unsaved_subcorpora(), set_reftab(), set_target(), SetVariableValue(), simulate(), simulate_dfa(), SortExternally(), SortSubcorpus(), SortSubcorpusRandomize(), string_to_strategy(), Varref2IDList(), verify_context_descriptor(), VerifyVariable(), and yy_input_char().

void free_tabulation_list ( void  )

free global list of tabulation items (before building new one)

References _TabulationItem::attribute_name, cl_free, _TabulationItem::next, and TabulationList.

Referenced by print_tabulation().

TabulationItem new_tabulation_item ( void  )
FILE* open_file ( char *  name,
char *  mode 
)

This function is a wrapper round fopen() which provides checks for different shorthands for a "home" directory, such as ~ or $HOME.

Its arguments and return values are the same as fopen().

TODO: The function is retained for backward compatibility. Its use should be replaced by cl_open_stream() with automagic, but care has to be taken to change the corresponding fclose() calls to cl_close_stream().

References CL_MAX_FILENAME_LENGTH.

Referenced by addHistoryLine(), attach_subcorpus(), check_stamp(), corpus_info(), do_exec(), parse_options(), and save_subcorpus().

int open_input_stream ( struct InputRedir rd)
FILE* open_pager ( char *  cmd,
CorpusCharset  charset 
)

Create a pipe to a new instance of a specified program to be used as an output pager.

If cmd is different from the program specified in the global variable "tested_pager", run a test first.

This would normally be something like "more" or "less".

See also
tested_pager
less_charset_variable
Parameters
cmdProgram command to start pager procress.
charsetCharset to which to set the pager-charset-environment variable
Returns
Writable stream for the pipe to the pager, or NULL if a test of the pager program failed; must be closed with cl_close_stream()

References ascii, cl_free, cl_open_stream(), cl_strdup(), CL_STREAM_PIPE, CL_STREAM_WRITE, less_charset_variable, tested_pager, and utf8.

Referenced by open_stream().

int open_stream ( struct Redir rd,
CorpusCharset  charset 
)

Callback handler for SIGPIPE now moved to <cl_broken_pipe>

int broken_pipe = 0;

static void bp_signal_handler(int signum) { #ifndef MINGW broken_pipe = 1; if (signal(SIGPIPE, bp_signal_handler) == SIG_ERR) perror("Can't reinstall signal handler for broken pipe"); #endif } Open the (output) stream within a Redir(ection) structure.

If output is sent to a pipe, a signal handler for SIGPIPE is automatically installed and configured to set the global variable broken_pipe to True. Output functions should check this variable and abort if it is set. The signal handler is uninstalled when close_pipe is called, which may lead to undesired behaviour if multiple streams are open at the same time.

Parameters
rdRedir structure to be opened.
charsetThe charset to be used. Only has an effect if the stream to be opened is to an output pager.
Returns
True for success, false for failure.

References cl_close_stream(), cl_errno, cl_error_string(), cl_open_stream(), cl_strdup(), CL_STREAM_APPEND, CL_STREAM_MAGIC, CL_STREAM_MAGIC_NOPIPE, CL_STREAM_STDIO, CL_STREAM_WRITE, CQP_FALLBACK_PAGER, cqpmessage(), Error, False, insecure, Redir::is_paging, Redir::mode, mode, Redir::name, open_pager(), pager, paging, set_integer_option_value(), set_string_option_value(), Redir::stream, True, and Warning.

Referenced by catalog_corpus(), corpus_info(), do_dump(), print_group(), print_tabulation(), PrintContextDescriptor(), and SortSubcorpus().

FILE* open_temporary_file ( char *  tmp_name_buffer)

Creates, and opens for text-mode write, a temporary file.

Temporary files have the prefix "$PID.cqpt." (where $PID = the process ID of this copy of CQP) and are placed in the directory defined as TEMPDIR_PATH.

See also
TEMPDIR_PATH
TEMP_FILENAME_BUFSIZE
Parameters
tmp_nam_bufferA pre-allocated buffer which will be overwritten with the name of the temporary file. This should be at least TEMP_FILENAME_BUFSIZE bytes in size. If opening is unsuccessful, this will be set to "".
Returns
A stream (FILE *) to the opened temporary file, or NULL if unsuccessful.

References cl_free, TEMP_FILENAME_BUFSIZE, and TEMPDIR_PATH.

Referenced by ComputeGroupExternally(), and SortExternally().

void print_corpus_info_header ( CorpusList cl,
FILE *  stream,
PrintMode  mode,
int  force 
)
void print_output ( CorpusList cl,
FILE *  fd,
int  interactive,
ContextDescriptor cd,
int  first,
int  last,
PrintMode  mode 
)
int print_tabulation ( CorpusList cl,
int  first,
int  last,
struct Redir rd 
)
int pt_get_anchor_cpos ( CorpusList cl,
int  n,
FieldType  anchor,
int  offset 
)

Gets the cpos of one of the "anchors" of a particular query result.

Used for tabulation.

Parameters
clThe query being tabulated.
nThe number of the match we are requesting an anchor for (where first match is 0).
anchorWhich of the anchors of the query match we are requesting.
offsetInteger offset from the anchor that we are requesting.
Returns
The cpos of the requested position, which may fall outside the bounds of the corpus if an offset has been specified; or CDA_CPOSUNDEF if the anchor has not been set.

References CDA_CPOSUNDEF, _Range::end, KeywordField, cl::keywords, MatchEndField, MatchField, cl::mother_size, NoField, cl::range, cl::sortidx, _Range::start, TargetField, and cl::targets.

Referenced by print_tabulation().

int pt_validate_anchor ( CorpusList cl,
FieldType  anchor 
)

Variable Documentation

TabulationItem TabulationList = NULL

Global list of tabulation items for use with the "tabulate" operator.

Referenced by append_tabulation_item(), free_tabulation_list(), and print_tabulation().