CWB
|
#include <ctype.h>
#include "../cl/globals.h"
#include "../cl/cl.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
Data Structures | |
struct | SAttRegion |
Represents a single s-attribuite region and its annotation. More... | |
Macros | |
#define | MAX_ATTRS 1024 |
Maximum number of attributes that can be printed. More... | |
#define | MAX_PRINT_VALUES 1024 |
Maximum number of attributes whose "surrounding values" can be printed in matchlist mode. More... | |
Typedefs | |
typedef enum _output_modes | OutputMode |
Enumerations | |
enum | _output_modes { StandardMode, LispMode, EncodeMode, ConclineMode, XMLMode } |
Functions | |
void | decode_cleanup (int error_code) |
Cleans up memory prior to an error-prompted exit. More... | |
void | decode_usage (int exit_code) |
Prints a usage message and exits the program. More... | |
int | is_num (char *s) |
Check whether a string represents a number. More... | |
const char * | decode_string_escape (const char *s) |
Escapes a string according to the currently active global mode. More... | |
void | decode_print_xml_declaration (void) |
Prints an XML declaration, using character set specification obtained from the global corpus variable. More... | |
void | decode_sort_s_att_regions (void) |
Sorts s_att_regions[MAX_ATTRS] in ascending 'nested' order, using sar_sort_index[] (which is automatically initialised). More... | |
int | decode_attribute_is_in_list (Attribute *attr, Attribute **att_list, int att_list_size) |
Determines whether or not a given Attribute is in an array of Attributes. More... | |
int | decode_add_attribute (Attribute *attr) |
Adds a specified Attribute to the global print_list array. More... | |
void | decode_verify_print_value_list (void) |
Check the context of the global printValues array, to check that no s-attribute in it is declared more in the main print_list_index as well. More... | |
void | decode_print_surrounding_s_att_values (int position) |
Prints a starting tag for each s-attribute. More... | |
void | decode_print_token_sequence (int start_position, int end_position, Attribute *context) |
Prints out the requested attributes for a sequence of tokens (or a single token if end_position == -1). More... | |
int | main (int argc, char **argv) |
Main function for cwb-decode. More... | |
Variables | |
char * | progname = NULL |
char * | registry_directory = NULL |
char * | corpus_id = NULL |
Corpus * | corpus = NULL |
Attribute * | print_list [MAX_ATTRS] |
array of attributes selected by user for printing More... | |
int | print_list_index = 0 |
Number of atts added to print_list (so far); used with less-than, = top limit for scrolling that array. More... | |
SAttRegion | s_att_regions [MAX_ATTRS] |
int | sar_sort_index [MAX_ATTRS] |
index used for bubble-sorting list of regions More... | |
int | N_sar = 0 |
number of regions currently in list (may change for each token printed) More... | |
Attribute * | printValues [MAX_PRINT_VALUES] |
List of s-attributes whose values are to be printed. More... | |
int | printValuesIndex = 0 |
Number of atts added to printValues (so far); used with less-than, = top limit for scrolling that array. More... | |
int | first_token |
cpos of token to begin output at More... | |
int | last |
cpos of token to end output at (inclusive; ie this one gets printed!) More... | |
int | maxlast |
maximum ending cpos (deduced from size of p-attribute); More... | |
int | printnum = 0 |
whether or not token numbers are to be printed (-n option) More... | |
OutputMode | mode = StandardMode |
global variable for overall output mode More... | |
int | xml_compatible = 0 |
xml-style, for (cwb-encode -x ...); EncodeMode only, selected by -Cx More... | |
#define MAX_ATTRS 1024 |
Maximum number of attributes that can be printed.
Referenced by decode_add_attribute().
#define MAX_PRINT_VALUES 1024 |
Maximum number of attributes whose "surrounding values" can be printed in matchlist mode.
Referenced by main().
typedef enum _output_modes OutputMode |
enum _output_modes |
int decode_add_attribute | ( | Attribute * | attr | ) |
Adds a specified Attribute to the global print_list array.
Aborts the program if that array is already full.
References _Attribute::any, corpus_id, decode_attribute_is_in_list(), decode_cleanup(), MAX_ATTRS, and print_list_index.
Referenced by main().
Determines whether or not a given Attribute is in an array of Attributes.
attr | The attribute to look for. |
att_list | Pointer to the first member of the array (i.e. array name). |
att_list_size | Upper bound of the array (the last member the function checks is attlist[attlist_size-1]). |
Referenced by decode_add_attribute(), and decode_verify_print_value_list().
void decode_cleanup | ( | int | error_code | ) |
Cleans up memory prior to an error-prompted exit.
error_code | Value to be returned by the program when it exits. |
References cl_delete_corpus().
Referenced by decode_add_attribute(), decode_print_token_sequence(), decode_usage(), and main().
void decode_print_surrounding_s_att_values | ( | int | position | ) |
Prints a starting tag for each s-attribute.
References _Attribute::any, cl_cpos2struc(), cl_struc2str(), ConclineMode, decode_string_escape(), EncodeMode, LispMode, mode, printValuesIndex, StandardMode, and XMLMode.
Referenced by main().
void decode_print_token_sequence | ( | int | start_position, |
int | end_position, | ||
Attribute * | context | ||
) |
Prints out the requested attributes for a sequence of tokens (or a single token if end_position == -1).
If the -c flag was used (and, thus, the context parameter is not NULL), then the sequence is extended to the entire s-attribute region (in matchlist mode).
References SAttRegion::annot, _Attribute::any, ATT_ALIGN, ATT_DYN, ATT_POS, ATT_STRUC, CDA_OK, cl_alg2cpos(), cl_cpos2alg(), cl_cpos2str(), cl_cpos2struc(), cl_cpos2struc2cpos(), cl_errno, cl_error(), cl_struc2cpos(), cl_struc2str(), cl_struc_values(), ConclineMode, decode_cleanup(), decode_sort_s_att_regions(), decode_string_escape(), EncodeMode, SAttRegion::end, LispMode, mode, N_sar, SAttRegion::name, print_list_index, printnum, sar_sort_index, StandardMode, SAttRegion::start, _Attribute::type, and XMLMode.
Referenced by main().
void decode_print_xml_declaration | ( | void | ) |
void decode_sort_s_att_regions | ( | void | ) |
Sorts s_att_regions[MAX_ATTRS] in ascending 'nested' order, using sar_sort_index[] (which is automatically initialised).
Since only regions which begin or end at the current token are considered, such an ordering is always possible; without knowing the current token, we sort by end position descending, then by start position ascending, which gives us:
The function uses bubble sort in order to retain the existing order of identical regions.
References SAttRegion::end, N_sar, sar_sort_index, and SAttRegion::start.
Referenced by decode_print_token_sequence().
const char* decode_string_escape | ( | const char * | s | ) |
Escapes a string according to the currently active global mode.
In XMLMode, this function converts the string to an encoded XML string; all 'critical' characters are replaced by entity references, and C0 control characters are replaced with blanks. (This also happens in other modes - i.e. compact - if the global xml_compatible variable is true.)
In LispMode, it converts the string to a Lisp string with the required escapes (probably!)
In any other mode, it does nothing, and just returns the argument pointer.
It is safe to use this function without checking for a NULL argument, as NULLs will just be returned as NULLs.
Warning: returns pointer to static internal buffer of fixed size; in particular, don't use it twice in a single argument list!
s | String to encode. |
References CL_MAX_LINE_LENGTH, LispMode, mode, xml_compatible, and XMLMode.
Referenced by decode_print_surrounding_s_att_values(), and decode_print_token_sequence().
void decode_usage | ( | int | exit_code | ) |
Prints a usage message and exits the program.
exit_code | Value to be returned by the program when it exits. |
References decode_cleanup(), progname, and VERSION.
Referenced by main().
void decode_verify_print_value_list | ( | void | ) |
Check the context of the global printValues array, to check that no s-attribute in it is declared more in the main print_list_index as well.
If an attribute is found to be declared in nboth, a warning is printed.
References corpus_id, decode_attribute_is_in_list(), print_list_index, and printValuesIndex.
Referenced by main().
int is_num | ( | char * | s | ) |
Check whether a string represents a number.
s | The string to check. |
Referenced by main().
int main | ( | int | argc, |
char ** | argv | ||
) |
Main function for cwb-decode.
argc | Number of command-line arguments. |
argv | Command-line arguments. |
References _Attribute::any, ATT_ALIGN, ATT_POS, ATT_STRUC, TCorpus::attributes, cl_close_stream(), cl_error(), cl_max_cpos(), CL_MAX_LINE_LENGTH, cl_new_attribute, cl_new_corpus(), cl_open_stream(), cl_standard_registry(), CL_STREAM_MAGIC, CL_STREAM_READ, cl_struc_values(), ConclineMode, corpus_id, decode_add_attribute(), decode_cleanup(), decode_print_surrounding_s_att_values(), decode_print_token_sequence(), decode_print_xml_declaration(), decode_usage(), decode_verify_print_value_list(), EncodeMode, first_token, input_file, input_filename, is_num(), last, LispMode, MAX_PRINT_VALUES, maxlast, mode, printnum, printValuesIndex, progname, registry_directory, token, xml_compatible, and XMLMode.
Corpus* corpus = NULL |
char* corpus_id = NULL |
Referenced by decode_add_attribute(), decode_verify_print_value_list(), and main().
int first_token |
cpos of token to begin output at
Referenced by main().
int last |
cpos of token to end output at (inclusive; ie this one gets printed!)
Referenced by ascii_print_output(), cl_path_get_component(), do_cqi_cqp_dump_subcorpus(), do_cut(), encode_strtok(), html_print_output(), latex_print_output(), main(), print_tabulation(), and sgml_print_output().
int maxlast |
maximum ending cpos (deduced from size of p-attribute);
Referenced by main().
OutputMode mode = StandardMode |
global variable for overall output mode
Referenced by cl_open_stream(), decode_print_surrounding_s_att_values(), decode_print_token_sequence(), decode_string_escape(), main(), and open_stream().
int N_sar = 0 |
number of regions currently in list (may change for each token printed)
Referenced by decode_print_token_sequence(), and decode_sort_s_att_regions().
int print_list_index = 0 |
Number of atts added to print_list (so far); used with less-than, = top limit for scrolling that array.
Referenced by decode_add_attribute(), decode_print_token_sequence(), and decode_verify_print_value_list().
int printnum = 0 |
whether or not token numbers are to be printed (-n option)
Referenced by decode_print_token_sequence(), and main().
Attribute* printValues[MAX_PRINT_VALUES] |
List of s-attributes whose values are to be printed.
int printValuesIndex = 0 |
Number of atts added to printValues (so far); used with less-than, = top limit for scrolling that array.
Referenced by decode_print_surrounding_s_att_values(), decode_verify_print_value_list(), and main().
char* progname = NULL |
Referenced by decode_usage(), and main().
char* registry_directory = NULL |
Referenced by main().
SAttRegion s_att_regions[MAX_ATTRS] |
int sar_sort_index[MAX_ATTRS] |
index used for bubble-sorting list of regions
Referenced by decode_print_token_sequence(), and decode_sort_s_att_regions().
int xml_compatible = 0 |
xml-style, for (cwb-encode -x ...); EncodeMode only, selected by -Cx
Referenced by decode_string_escape(), and main().