CWB
|
#include <stdio.h>
#include <glib.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "concordance.h"
#include "attlist.h"
#include "options.h"
Data Structures | |
struct | SAttRegion |
Represents a single s-attribuite region and its annotation. More... | |
Macros | |
#define | MAXKWICLINELEN 65535 |
maximum length of a KWIC line (bytes). More... | |
#define | MAX_S_ATTRS 1024 /* max. number of s-attribute; same as MAX_ATTRS in <utils/decode.c> and MAXRANGES in <utils/encode.c> */ |
Functions | |
static char * | srev (char *s) |
Reverses the argument string (destructively, that is, in situ). More... | |
void | get_print_attribute_values (ContextDescriptor *cd, int position, ClAutoString s, int *sp, int max_sp, int add_position_number, PrintDescriptionRecord *pdr) |
Prints s-attribute values into a ClAutoString, for use in a printed concordance line. More... | |
void | sort_s_att_regions (void) |
void | get_position_values (ContextDescriptor *cd, int position, ClAutoString s, int *sp, int max_sp, int add_position_number, ConcLineLayout orientation, PrintDescriptionRecord *pdr, int nr_mappings, Mapping *mappings) |
Get values at the given corpus position. More... | |
void | remember_this_position (int position, int this_token_start, int this_token_end, int *position_list, int nr_positions, int *returned_positions) |
Adds the asserted number of positions (nr_positions) specified by this_token_start and this_token_end to an array of integers in returned_positions. More... | |
char * | get_field_separators (int position, ConcLineField *fields, int nr_fields, int at_end, PrintDescriptionRecord *pdr) |
This oddly-named function prints a series of separators for "fields" to an internal buffer. More... | |
void | setup_kwic_line_memory (void) |
Initialises the two empty auto-growing strings used for concordance line concstruction. More... | |
void | cleanup_kwic_line_memory (void) |
Frees the memory used for building a KWIC line for display. More... | |
char * | compose_kwic_line (Corpus *corpus, int match_start, int match_end, ContextDescriptor *cd, int *length, int *s_mb, int *s_me, char *left_marker, char *right_marker, int *position_list, int nr_positions, int *returned_positions, ConcLineField *fields, int nr_fields, ConcLineLayout orientation, PrintDescriptionRecord *pdr, int nr_mappings, Mapping *mappings) |
Builds a string for a concordance output line. More... | |
Variables | |
SAttRegion | s_att_regions [MAX_S_ATTRS] |
int | sar_sort_index [MAX_S_ATTRS] |
int | N_sar = 0 |
static ClAutoString | scratch = NULL |
Scratch string used by get_field_separators only. More... | |
static ClAutoString | line = NULL |
Used to build the concordance line (main line buffer);. More... | |
static ClAutoString | token = NULL |
Used to build the concordance line (token buffer);. More... | |
#define MAX_S_ATTRS 1024 /* max. number of s-attribute; same as MAX_ATTRS in <utils/decode.c> and MAXRANGES in <utils/encode.c> */ |
#define MAXKWICLINELEN 65535 |
maximum length of a KWIC line (bytes).
Referenced by compose_kwic_line().
void cleanup_kwic_line_memory | ( | void | ) |
Frees the memory used for building a KWIC line for display.
Best used when CQP shuts down.
References cl_autostring_delete().
Referenced by main().
char* compose_kwic_line | ( | Corpus * | corpus, |
int | match_start, | ||
int | match_end, | ||
ContextDescriptor * | cd, | ||
int * | length, | ||
int * | s_mb, | ||
int * | s_me, | ||
char * | left_marker, | ||
char * | right_marker, | ||
int * | position_list, | ||
int | nr_positions, | ||
int * | returned_positions, | ||
ConcLineField * | fields, | ||
int | nr_fields, | ||
ConcLineLayout | orientation, | ||
PrintDescriptionRecord * | pdr, | ||
int | nr_mappings, | ||
Mapping * | mappings | ||
) |
Builds a string for a concordance output line.
'position_list' is a list of (corpus) positions. The string start and beginning positions for these corpus positions are written into returned_positions, which must be exactly two times as large as the position list. The number of positions must be in nr_positions.
match_start | A corpus position |
match_end | A corpus position |
fields | Array of ConcLineFields object (each of which specifies one of the 4 anchors). |
nr_fields | Number of items in the "fields" array. |
roughnotes: I THINK returned_positions is just a blob of memory that the func is being allowd to use.
References AddNameToAL(), _print_descr_rec_::AfterField, _print_descr_rec_::AfterToken, ALIGN_CONTEXT, ATT_POS, _attrbuf::attribute, _context_description_block::attributes, _print_descr_rec_::BeforeField, _print_descr_rec_::BeforeToken, CHAR_CONTEXT, TCorpus::charset, cl_alg2cpos(), cl_autostring_concat(), cl_autostring_ptr(), cl_autostring_truncate(), cl_charset_strlen(), cl_cpos2alg(), cl_cpos2struc(), cl_max_cpos(), cl_max_struc(), cl_strdup(), cl_string_utf8_continuation_byte(), cl_struc2cpos(), ConcLineHorizontal, ClAutoString::data, DEFAULT_ATT_NAME, FindInAL(), get_field_separators(), get_position_values(), get_print_attribute_values(), _context_description_block::left_structure, _context_description_block::left_type, _context_description_block::left_width, ClAutoString::len, _attlist::list, _attlist::list_valid, MAX, MAXKWICLINELEN, MIN, NewAttributeList(), _attrbuf::next, nr_positions(), _context_description_block::print_cpos, remember_this_position(), _context_description_block::right_structure, _context_description_block::right_type, _context_description_block::right_width, setup_kwic_line_memory(), srev(), _attrbuf::status, STRUC_CONTEXT, text_size, _print_descr_rec_::TokenSeparator, utf8, VerifyList(), word, and WORD_CONTEXT.
Referenced by html_print_output(), latex_print_output(), print_concordance_line(), printAlignedStrings(), and sgml_print_output().
char* get_field_separators | ( | int | position, |
ConcLineField * | fields, | ||
int | nr_fields, | ||
int | at_end, | ||
PrintDescriptionRecord * | pdr | ||
) |
This oddly-named function prints a series of separators for "fields" to an internal buffer.
"Field" in this poition means one of the 4 anchor points (begin, end, target, keyword).
position | The corpus position (cpos) whose field-sepaators we want. |
fields | Pointer to array of ConcLineFields object (each of which specifies one of the 4 anchors). |
nr_fields | Number of items in the "fields" array. |
at_end | Boolean: if true, we get the end-separators for the fields at this cpos; if false, we get the beginning-separators for the fields at this cpos. |
pdr | The PDR for the current concordance printout. |
References cl_autostring_concat(), cl_autostring_new(), cl_autostring_truncate(), ClAutoString::data, ClAutoString::len, _print_descr_rec_::printField, and _ConcLineField::type.
Referenced by compose_kwic_line().
void get_position_values | ( | ContextDescriptor * | cd, |
int | position, | ||
ClAutoString | s, | ||
int * | sp, | ||
int | max_sp, | ||
int | add_position_number, | ||
ConcLineLayout | orientation, | ||
PrintDescriptionRecord * | pdr, | ||
int | nr_mappings, | ||
Mapping * | mappings | ||
) |
Get values at the given corpus position.
References _print_descr_rec_::AfterLine, SAttRegion::annot, _Attribute::any, _attrbuf::attribute, _mapping::attribute, _context_description_block::attributes, _print_descr_rec_::AttributeSeparator, CDA_OK, cderrno, cl_autostring_concat(), cl_autostring_truncate(), cl_cpos2str(), cl_cpos2struc(), CL_MAX_LINE_LENGTH, cl_struc2cpos(), cl_struc2str(), cl_struc_values(), ConcLineHorizontal, ConcLineVertical, _print_descr_rec_::CPOSPrintFormat, ClAutoString::data, SAttRegion::end, get_id_at_position, _attlist::list, map_id_to_class_number(), N_sar, SAttRegion::name, _attrbuf::next, _print_descr_rec_::printToken, sar_sort_index, show_tag_attributes, sort_s_att_regions(), SAttRegion::start, _attrbuf::status, _context_description_block::strucAttributes, _print_descr_rec_::StructureBeginPrefix, _print_descr_rec_::StructureBeginSuffix, _print_descr_rec_::StructureEndPrefix, _print_descr_rec_::StructureEndSuffix, and word.
Referenced by compose_kwic_line().
void get_print_attribute_values | ( | ContextDescriptor * | cd, |
int | position, | ||
ClAutoString | s, | ||
int * | sp, | ||
int | max_sp, | ||
int | add_position_number, | ||
PrintDescriptionRecord * | pdr | ||
) |
Prints s-attribute values into a ClAutoString, for use in a printed concordance line.
(Note, the function is called "attribute values" but it very specifically means s-attribvutes.)
The s-attributes that will be printed are determined by the contents of the ContextDescriptor object; the PrintDescriptionRecord determines what they look like.
cd | Print settings (context size/type; atts to print) |
position | The CPOS to be used in the position at the start |
s | Results will be concatenated onto this string. |
sp | Depracated argument: not used by func |
max_sp | Depracated argument: not used by func |
add_position_number | Boolean: whether or not to make a position number (start of concordance line) |
pdr | Print settings (of main mode) to use |
References _print_descr_rec_::AfterPrintStructures, _Attribute::any, _attrbuf::attribute, _print_descr_rec_::BeforePrintStructures, cl_autostring_concat(), cl_cpos2struc2str(), CL_MAX_LINE_LENGTH, _print_descr_rec_::CPOSPrintFormat, _attlist::list, _attrbuf::next, _print_descr_rec_::PrintStructureSeparator, _context_description_block::printStructureTags, _print_descr_rec_::printToken, _attrbuf::status, _print_descr_rec_::StructureBeginPrefix, and _print_descr_rec_::StructureBeginSuffix.
Referenced by compose_kwic_line().
void remember_this_position | ( | int | position, |
int | this_token_start, | ||
int | this_token_end, | ||
int * | position_list, | ||
int | nr_positions, | ||
int * | returned_positions | ||
) |
Adds the asserted number of positions (nr_positions) specified by this_token_start and this_token_end to an array of integers in returned_positions.
References nr_positions().
Referenced by compose_kwic_line().
void setup_kwic_line_memory | ( | void | ) |
Initialises the two empty auto-growing strings used for concordance line concstruction.
References cl_autostring_new(), and cl_autostring_truncate().
Referenced by compose_kwic_line().
void sort_s_att_regions | ( | void | ) |
References SAttRegion::end, N_sar, sar_sort_index, and SAttRegion::start.
Referenced by get_position_values().
|
static |
Reverses the argument string (destructively, that is, in situ).
Cf. the non-standard (microsoft) function strrev.
This does not respect UTF-8, so anything reversed must be re-reversed before output or there will be invalid byte sequences.
s | The string to modify. |
References buf.
Referenced by compose_kwic_line().
|
static |
Used to build the concordance line (main line buffer);.
int N_sar = 0 |
Referenced by get_position_values(), and sort_s_att_regions().
SAttRegion s_att_regions[MAX_S_ATTRS] |
int sar_sort_index[MAX_S_ATTRS] |
Referenced by get_position_values(), and sort_s_att_regions().
|
static |
Scratch string used by get_field_separators only.
|
static |
Used to build the concordance line (token buffer);.
Referenced by ComputePrintStructures(), encode_add_wattr_line(), expand_macro(), main(), ParsePrintOptions(), range_open(), read_mapping(), SortExternally(), and SortSubcorpus().