CWB
Data Structures | Macros | Typedefs | Enumerations | Functions | Variables
cwb-decode.c File Reference
#include <ctype.h>
#include "../cl/globals.h"
#include "../cl/cl.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"

Data Structures

struct  SAttRegion
 Represents a single s-attribuite region and its annotation. More...
 

Macros

#define MAX_ATTRS   1024
 Maximum number of attributes that can be printed. More...
 
#define MAX_PRINT_VALUES   1024
 Maximum number of attributes whose "surrounding values" can be printed in matchlist mode. More...
 

Typedefs

typedef enum _output_modes OutputMode
 

Enumerations

enum  _output_modes {
  StandardMode, LispMode, EncodeMode, ConclineMode,
  XMLMode
}
 

Functions

void decode_cleanup (int error_code)
 Cleans up memory prior to an error-prompted exit. More...
 
void decode_usage (int exit_code)
 Prints a usage message and exits the program. More...
 
int is_num (char *s)
 Check whether a string represents a number. More...
 
const char * decode_string_escape (const char *s)
 Escapes a string according to the currently active global mode. More...
 
void decode_print_xml_declaration (void)
 Prints an XML declaration, using character set specification obtained from the global corpus variable. More...
 
void decode_sort_s_att_regions (void)
 Sorts s_att_regions[MAX_ATTRS] in ascending 'nested' order, using sar_sort_index[] (which is automatically initialised). More...
 
int decode_attribute_is_in_list (Attribute *attr, Attribute **att_list, int att_list_size)
 Determines whether or not a given Attribute is in an array of Attributes. More...
 
int decode_add_attribute (Attribute *attr)
 Adds a specified Attribute to the global print_list array. More...
 
void decode_verify_print_value_list (void)
 Check the context of the global printValues array, to check that no s-attribute in it is declared more in the main print_list_index as well. More...
 
void decode_print_surrounding_s_att_values (int position)
 Prints a starting tag for each s-attribute. More...
 
void decode_print_token_sequence (int start_position, int end_position, Attribute *context)
 Prints out the requested attributes for a sequence of tokens (or a single token if end_position == -1). More...
 
int main (int argc, char **argv)
 Main function for cwb-decode. More...
 

Variables

char * progname = NULL
 
char * registry_directory = NULL
 
char * corpus_id = NULL
 
Corpuscorpus = NULL
 
Attributeprint_list [MAX_ATTRS]
 array of attributes selected by user for printing More...
 
int print_list_index = 0
 Number of atts added to print_list (so far); used with less-than, = top limit for scrolling that array. More...
 
SAttRegion s_att_regions [MAX_ATTRS]
 
int sar_sort_index [MAX_ATTRS]
 index used for bubble-sorting list of regions More...
 
int N_sar = 0
 number of regions currently in list (may change for each token printed) More...
 
AttributeprintValues [MAX_PRINT_VALUES]
 List of s-attributes whose values are to be printed. More...
 
int printValuesIndex = 0
 Number of atts added to printValues (so far); used with less-than, = top limit for scrolling that array. More...
 
int first_token
 cpos of token to begin output at More...
 
int last
 cpos of token to end output at (inclusive; ie this one gets printed!) More...
 
int maxlast
 maximum ending cpos (deduced from size of p-attribute); More...
 
int printnum = 0
 whether or not token numbers are to be printed (-n option) More...
 
OutputMode mode = StandardMode
 global variable for overall output mode More...
 
int xml_compatible = 0
 xml-style, for (cwb-encode -x ...); EncodeMode only, selected by -Cx More...
 

Macro Definition Documentation

#define MAX_ATTRS   1024

Maximum number of attributes that can be printed.

Referenced by decode_add_attribute().

#define MAX_PRINT_VALUES   1024

Maximum number of attributes whose "surrounding values" can be printed in matchlist mode.

Referenced by main().

Typedef Documentation

typedef enum _output_modes OutputMode

Enumeration Type Documentation

Enumerator
StandardMode 
LispMode 
EncodeMode 
ConclineMode 
XMLMode 

Function Documentation

int decode_add_attribute ( Attribute attr)

Adds a specified Attribute to the global print_list array.

Aborts the program if that array is already full.

Returns
Boolean.

References _Attribute::any, corpus_id, decode_attribute_is_in_list(), decode_cleanup(), MAX_ATTRS, and print_list_index.

Referenced by main().

int decode_attribute_is_in_list ( Attribute attr,
Attribute **  att_list,
int  att_list_size 
)

Determines whether or not a given Attribute is in an array of Attributes.

Parameters
attrThe attribute to look for.
att_listPointer to the first member of the array (i.e. array name).
att_list_sizeUpper bound of the array (the last member the function checks is attlist[attlist_size-1]).
Returns
Boolean.

Referenced by decode_add_attribute(), and decode_verify_print_value_list().

void decode_cleanup ( int  error_code)

Cleans up memory prior to an error-prompted exit.

Parameters
error_codeValue to be returned by the program when it exits.

References cl_delete_corpus().

Referenced by decode_add_attribute(), decode_print_token_sequence(), decode_usage(), and main().

void decode_print_surrounding_s_att_values ( int  position)

Prints a starting tag for each s-attribute.

References _Attribute::any, cl_cpos2struc(), cl_struc2str(), ConclineMode, decode_string_escape(), EncodeMode, LispMode, mode, printValuesIndex, StandardMode, and XMLMode.

Referenced by main().

void decode_print_token_sequence ( int  start_position,
int  end_position,
Attribute context 
)

Prints out the requested attributes for a sequence of tokens (or a single token if end_position == -1).

If the -c flag was used (and, thus, the context parameter is not NULL), then the sequence is extended to the entire s-attribute region (in matchlist mode).

References SAttRegion::annot, _Attribute::any, ATT_ALIGN, ATT_DYN, ATT_POS, ATT_STRUC, CDA_OK, cl_alg2cpos(), cl_cpos2alg(), cl_cpos2str(), cl_cpos2struc(), cl_cpos2struc2cpos(), cl_errno, cl_error(), cl_struc2cpos(), cl_struc2str(), cl_struc_values(), ConclineMode, decode_cleanup(), decode_sort_s_att_regions(), decode_string_escape(), EncodeMode, SAttRegion::end, LispMode, mode, N_sar, SAttRegion::name, print_list_index, printnum, sar_sort_index, StandardMode, SAttRegion::start, _Attribute::type, and XMLMode.

Referenced by main().

void decode_print_xml_declaration ( void  )

Prints an XML declaration, using character set specification obtained from the global corpus variable.

References arabic, charset, cl_corpus_charset(), cyrillic, greek, hebrew, latin1, latin2, latin3, latin4, latin5, latin6, latin7, latin8, latin9, unknown_charset, and utf8.

Referenced by main().

void decode_sort_s_att_regions ( void  )

Sorts s_att_regions[MAX_ATTRS] in ascending 'nested' order, using sar_sort_index[] (which is automatically initialised).

Since only regions which begin or end at the current token are considered, such an ordering is always possible; without knowing the current token, we sort by end position descending, then by start position ascending, which gives us:

  • first the regions corresponding to start tags, beginning with the 'largest' region
  • then the regions corresponding to end tags, again beginning with the 'largest' region

The function uses bubble sort in order to retain the existing order of identical regions.

References SAttRegion::end, N_sar, sar_sort_index, and SAttRegion::start.

Referenced by decode_print_token_sequence().

const char* decode_string_escape ( const char *  s)

Escapes a string according to the currently active global mode.

In XMLMode, this function converts the string to an encoded XML string; all 'critical' characters are replaced by entity references, and C0 control characters are replaced with blanks. (This also happens in other modes - i.e. compact - if the global xml_compatible variable is true.)

In LispMode, it converts the string to a Lisp string with the required escapes (probably!)

In any other mode, it does nothing, and just returns the argument pointer.

It is safe to use this function without checking for a NULL argument, as NULLs will just be returned as NULLs.

Warning: returns pointer to static internal buffer of fixed size; in particular, don't use it twice in a single argument list!

See also
EncodeMode
Parameters
sString to encode.
Returns
Pointer to encoded string in static internal buffer; or, the argument s iff the mode is not one that requires any encoding. If the argument is NULL, NULL is returned.

References CL_MAX_LINE_LENGTH, LispMode, mode, xml_compatible, and XMLMode.

Referenced by decode_print_surrounding_s_att_values(), and decode_print_token_sequence().

void decode_usage ( int  exit_code)

Prints a usage message and exits the program.

Parameters
exit_codeValue to be returned by the program when it exits.

References decode_cleanup(), progname, and VERSION.

Referenced by main().

void decode_verify_print_value_list ( void  )

Check the context of the global printValues array, to check that no s-attribute in it is declared more in the main print_list_index as well.

If an attribute is found to be declared in nboth, a warning is printed.

References corpus_id, decode_attribute_is_in_list(), print_list_index, and printValuesIndex.

Referenced by main().

int is_num ( char *  s)

Check whether a string represents a number.

Parameters
sThe string to check.
Returns
Boolean: true iff s contains only digits.

Referenced by main().

int main ( int  argc,
char **  argv 
)

Variable Documentation

Corpus* corpus = NULL
char* corpus_id = NULL
int first_token

cpos of token to begin output at

Referenced by main().

int last
int maxlast

maximum ending cpos (deduced from size of p-attribute);

Referenced by main().

int N_sar = 0

number of regions currently in list (may change for each token printed)

Referenced by decode_print_token_sequence(), and decode_sort_s_att_regions().

Attribute* print_list[MAX_ATTRS]

array of attributes selected by user for printing

int print_list_index = 0

Number of atts added to print_list (so far); used with less-than, = top limit for scrolling that array.

Referenced by decode_add_attribute(), decode_print_token_sequence(), and decode_verify_print_value_list().

int printnum = 0

whether or not token numbers are to be printed (-n option)

Referenced by decode_print_token_sequence(), and main().

Attribute* printValues[MAX_PRINT_VALUES]

List of s-attributes whose values are to be printed.

int printValuesIndex = 0

Number of atts added to printValues (so far); used with less-than, = top limit for scrolling that array.

Referenced by decode_print_surrounding_s_att_values(), decode_verify_print_value_list(), and main().

char* progname = NULL

Referenced by decode_usage(), and main().

char* registry_directory = NULL

Referenced by main().

SAttRegion s_att_regions[MAX_ATTRS]
int sar_sort_index[MAX_ATTRS]

index used for bubble-sorting list of regions

Referenced by decode_print_token_sequence(), and decode_sort_s_att_regions().

int xml_compatible = 0

xml-style, for (cwb-encode -x ...); EncodeMode only, selected by -Cx

Referenced by decode_string_escape(), and main().