CWB
Data Structures | Macros | Functions | Variables
concordance.c File Reference
#include <stdio.h>
#include <glib.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "concordance.h"
#include "attlist.h"
#include "options.h"

Data Structures

struct  SAttRegion
 Represents a single s-attribuite region and its annotation. More...
 

Macros

#define MAXKWICLINELEN   65535
 maximum length of a KWIC line (bytes). More...
 
#define MAX_S_ATTRS   1024 /* max. number of s-attribute; same as MAX_ATTRS in <utils/decode.c> and MAXRANGES in <utils/encode.c> */
 

Functions

static char * srev (char *s)
 Reverses the argument string (destructively, that is, in situ). More...
 
void get_print_attribute_values (ContextDescriptor *cd, int position, ClAutoString s, int *sp, int max_sp, int add_position_number, PrintDescriptionRecord *pdr)
 Prints s-attribute values into a ClAutoString, for use in a printed concordance line. More...
 
void sort_s_att_regions (void)
 
void get_position_values (ContextDescriptor *cd, int position, ClAutoString s, int *sp, int max_sp, int add_position_number, ConcLineLayout orientation, PrintDescriptionRecord *pdr, int nr_mappings, Mapping *mappings)
 Get values at the given corpus position. More...
 
void remember_this_position (int position, int this_token_start, int this_token_end, int *position_list, int nr_positions, int *returned_positions)
 Adds the asserted number of positions (nr_positions) specified by this_token_start and this_token_end to an array of integers in returned_positions. More...
 
char * get_field_separators (int position, ConcLineField *fields, int nr_fields, int at_end, PrintDescriptionRecord *pdr)
 This oddly-named function prints a series of separators for "fields" to an internal buffer. More...
 
void setup_kwic_line_memory (void)
 Initialises the two empty auto-growing strings used for concordance line concstruction. More...
 
void cleanup_kwic_line_memory (void)
 Frees the memory used for building a KWIC line for display. More...
 
char * compose_kwic_line (Corpus *corpus, int match_start, int match_end, ContextDescriptor *cd, int *length, int *s_mb, int *s_me, char *left_marker, char *right_marker, int *position_list, int nr_positions, int *returned_positions, ConcLineField *fields, int nr_fields, ConcLineLayout orientation, PrintDescriptionRecord *pdr, int nr_mappings, Mapping *mappings)
 Builds a string for a concordance output line. More...
 

Variables

SAttRegion s_att_regions [MAX_S_ATTRS]
 
int sar_sort_index [MAX_S_ATTRS]
 
int N_sar = 0
 
static ClAutoString scratch = NULL
 Scratch string used by get_field_separators only. More...
 
static ClAutoString line = NULL
 Used to build the concordance line (main line buffer);. More...
 
static ClAutoString token = NULL
 Used to build the concordance line (token buffer);. More...
 

Macro Definition Documentation

#define MAX_S_ATTRS   1024 /* max. number of s-attribute; same as MAX_ATTRS in <utils/decode.c> and MAXRANGES in <utils/encode.c> */
#define MAXKWICLINELEN   65535

maximum length of a KWIC line (bytes).

Referenced by compose_kwic_line().

Function Documentation

void cleanup_kwic_line_memory ( void  )

Frees the memory used for building a KWIC line for display.

Best used when CQP shuts down.

References cl_autostring_delete().

Referenced by main().

char* compose_kwic_line ( Corpus corpus,
int  match_start,
int  match_end,
ContextDescriptor cd,
int *  length,
int *  s_mb,
int *  s_me,
char *  left_marker,
char *  right_marker,
int *  position_list,
int  nr_positions,
int *  returned_positions,
ConcLineField fields,
int  nr_fields,
ConcLineLayout  orientation,
PrintDescriptionRecord pdr,
int  nr_mappings,
Mapping mappings 
)

Builds a string for a concordance output line.

'position_list' is a list of (corpus) positions. The string start and beginning positions for these corpus positions are written into returned_positions, which must be exactly two times as large as the position list. The number of positions must be in nr_positions.

Parameters
match_startA corpus position
match_endA corpus position
fieldsArray of ConcLineFields object (each of which specifies one of the 4 anchors).
nr_fieldsNumber of items in the "fields" array.

roughnotes: I THINK returned_positions is just a blob of memory that the func is being allowd to use.

Returns
String containing the output line.

References AddNameToAL(), _print_descr_rec_::AfterField, _print_descr_rec_::AfterToken, ALIGN_CONTEXT, ATT_POS, _attrbuf::attribute, _context_description_block::attributes, _print_descr_rec_::BeforeField, _print_descr_rec_::BeforeToken, CHAR_CONTEXT, TCorpus::charset, cl_alg2cpos(), cl_autostring_concat(), cl_autostring_ptr(), cl_autostring_truncate(), cl_charset_strlen(), cl_cpos2alg(), cl_cpos2struc(), cl_max_cpos(), cl_max_struc(), cl_strdup(), cl_string_utf8_continuation_byte(), cl_struc2cpos(), ConcLineHorizontal, ClAutoString::data, DEFAULT_ATT_NAME, FindInAL(), get_field_separators(), get_position_values(), get_print_attribute_values(), _context_description_block::left_structure, _context_description_block::left_type, _context_description_block::left_width, ClAutoString::len, _attlist::list, _attlist::list_valid, MAX, MAXKWICLINELEN, MIN, NewAttributeList(), _attrbuf::next, nr_positions(), _context_description_block::print_cpos, remember_this_position(), _context_description_block::right_structure, _context_description_block::right_type, _context_description_block::right_width, setup_kwic_line_memory(), srev(), _attrbuf::status, STRUC_CONTEXT, text_size, _print_descr_rec_::TokenSeparator, utf8, VerifyList(), word, and WORD_CONTEXT.

Referenced by html_print_output(), latex_print_output(), print_concordance_line(), printAlignedStrings(), and sgml_print_output().

char* get_field_separators ( int  position,
ConcLineField fields,
int  nr_fields,
int  at_end,
PrintDescriptionRecord pdr 
)

This oddly-named function prints a series of separators for "fields" to an internal buffer.

"Field" in this poition means one of the 4 anchor points (begin, end, target, keyword).

Parameters
positionThe corpus position (cpos) whose field-sepaators we want.
fieldsPointer to array of ConcLineFields object (each of which specifies one of the 4 anchors).
nr_fieldsNumber of items in the "fields" array.
at_endBoolean: if true, we get the end-separators for the fields at this cpos; if false, we get the beginning-separators for the fields at this cpos.
pdrThe PDR for the current concordance printout.
Returns
A pointer to a module-internal static string buffer containing the requested string. Do not free it or alter it. The buffer's content will change when this function is called again. The function will return NULL if the requested string would have been zero-length.

References cl_autostring_concat(), cl_autostring_new(), cl_autostring_truncate(), ClAutoString::data, ClAutoString::len, _print_descr_rec_::printField, and _ConcLineField::type.

Referenced by compose_kwic_line().

void get_position_values ( ContextDescriptor cd,
int  position,
ClAutoString  s,
int *  sp,
int  max_sp,
int  add_position_number,
ConcLineLayout  orientation,
PrintDescriptionRecord pdr,
int  nr_mappings,
Mapping mappings 
)
void get_print_attribute_values ( ContextDescriptor cd,
int  position,
ClAutoString  s,
int *  sp,
int  max_sp,
int  add_position_number,
PrintDescriptionRecord pdr 
)

Prints s-attribute values into a ClAutoString, for use in a printed concordance line.

(Note, the function is called "attribute values" but it very specifically means s-attribvutes.)

The s-attributes that will be printed are determined by the contents of the ContextDescriptor object; the PrintDescriptionRecord determines what they look like.

Parameters
cdPrint settings (context size/type; atts to print)
positionThe CPOS to be used in the position at the start
sResults will be concatenated onto this string.
spDepracated argument: not used by func
max_spDepracated argument: not used by func
add_position_numberBoolean: whether or not to make a position number (start of concordance line)
pdrPrint settings (of main mode) to use

References _print_descr_rec_::AfterPrintStructures, _Attribute::any, _attrbuf::attribute, _print_descr_rec_::BeforePrintStructures, cl_autostring_concat(), cl_cpos2struc2str(), CL_MAX_LINE_LENGTH, _print_descr_rec_::CPOSPrintFormat, _attlist::list, _attrbuf::next, _print_descr_rec_::PrintStructureSeparator, _context_description_block::printStructureTags, _print_descr_rec_::printToken, _attrbuf::status, _print_descr_rec_::StructureBeginPrefix, and _print_descr_rec_::StructureBeginSuffix.

Referenced by compose_kwic_line().

void remember_this_position ( int  position,
int  this_token_start,
int  this_token_end,
int *  position_list,
int  nr_positions,
int *  returned_positions 
)

Adds the asserted number of positions (nr_positions) specified by this_token_start and this_token_end to an array of integers in returned_positions.

References nr_positions().

Referenced by compose_kwic_line().

void setup_kwic_line_memory ( void  )

Initialises the two empty auto-growing strings used for concordance line concstruction.

References cl_autostring_new(), and cl_autostring_truncate().

Referenced by compose_kwic_line().

void sort_s_att_regions ( void  )
static char* srev ( char *  s)
static

Reverses the argument string (destructively, that is, in situ).

Cf. the non-standard (microsoft) function strrev.

This does not respect UTF-8, so anything reversed must be re-reversed before output or there will be invalid byte sequences.

Parameters
sThe string to modify.
Returns
A pointer to the modified string (same memory area as the argument string).

References buf.

Referenced by compose_kwic_line().

Variable Documentation

ClAutoString line = NULL
static

Used to build the concordance line (main line buffer);.

See also
compose_kwic_line
int N_sar = 0
SAttRegion s_att_regions[MAX_S_ATTRS]
int sar_sort_index[MAX_S_ATTRS]
ClAutoString scratch = NULL
static

Scratch string used by get_field_separators only.

ClAutoString token = NULL
static