CWB
|
#include <ctype.h>
#include <math.h>
#include <stdarg.h>
#include <sys/types.h>
#include <time.h>
#include <dirent.h>
#include <errno.h>
#include <sys/stat.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/storage.h"
#include "../cl/lexhash.h"
#include "../cl/endian.h"
#include "../cl/attributes.h"
#include <sys/time.h>
Data Structures | |
struct | _Range |
The Range object represents a range of corpus positions - for instance, the range enclosed by an instance of an s-attribute. More... | |
struct | WAttr |
WAttr object: represents a P-attribute being encoded. More... | |
Macros | |
#define | UMASK 0644 |
User privileges of new files (octal format) More... | |
#define | UNDEF_VALUE "__UNDEF__" |
Default string used as value of P-attributes when a value is missing ie if a tab-delimited field is empty. More... | |
#define | FIELDSEPS "\t\n" |
Default string containing the characters that can function as field separators. More... | |
#define | MAXRANGES 1024 |
max number of s-attributes; also max number of p-attributes (-> could change this to implementation as a linked list) More... | |
#define | REP_CHECK_LEXHASH_SIZE 1000 |
nr of buckets of lexhashes used for checking duplicate errors (undeclared element and attribute names in XML tags) More... | |
#define | MAX_INPUT_LINE_LENGTH 65536 |
Input buffer size. More... | |
#define | DEFAULT_INFILE_EXTENSION ".vrt" |
Normal extension for CWB input text files. More... | |
#define | STRUC_RNG "%s" SUBDIR_SEP_STRING "%s.rng" |
CL naming convention for S-attribute RNG files. More... | |
#define | STRUC_AVX "%s" SUBDIR_SEP_STRING "%s.avx" |
CL naming convention for S-attribute AVX (attribute-value index) files. More... | |
#define | STRUC_AVS "%s" SUBDIR_SEP_STRING "%s.avs" |
CL naming convention for S-attribute AVS (attribute values) files. More... | |
#define | POS_CORPUS "%s" SUBDIR_SEP_STRING "%s.corpus" |
CL naming convention for P-attribute Corpus files. More... | |
#define | POS_LEX "%s" SUBDIR_SEP_STRING "%s.lexicon" |
CL naming convention for P-attribute Lexicon files. More... | |
#define | POS_LEXIDX "%s" SUBDIR_SEP_STRING "%s.lexicon.idx" |
CL naming convention for P-attribute Lexicon-index files. More... | |
Typedefs | |
typedef struct _Range | Range |
Range object: represents an S-attribute being encoded, and holds some information about the currently-being-processed instance of that S-attribute. More... | |
Functions | |
char * | encode_strtok (register char *s, register const char *delim) |
A replacement for the strtok() function which doesn't skip empty fields. More... | |
void | encode_print_time (FILE *stream, char *msg) |
Prints a message plus the current time to the specified file/stream. More... | |
void | encode_usage (void) |
Prints a usage message and exits the program. More... | |
void | encode_print_input_lineno (void) |
Prints the input line number (and input filename, if applicable) on STDERR, for error messages and warnings. More... | |
void | encode_error (char *format,...) |
Prints an error message to STDERR, automatically adding a message on the location of the error in the corpus. More... | |
cl_string_list | encode_scan_directory (char *dir) |
Get a list of files in a given directory. More... | |
int | range_find (char *name) |
Gets the index (in the ranges array) of a specified S-attribute. More... | |
void | range_print_registry_line (Range *rng, FILE *fd, int print_comment) |
Prints registry lines for a given s-attribute, and its children, if any, to the specified file handle. More... | |
Range * | range_declare (char *name, char *directory, int store_values, int null_attribute) |
Creates a Range object to store a specified s-attribute (and, if appropriate, does the same for children-attributes). More... | |
void | range_close (Range *rng, int end_pos) |
Closes a currently open instance of an S-attribute. More... | |
void | range_open (Range *rng, int start_pos, char *annot) |
Opens an instance of the given S-attribute. More... | |
int | wattr_find (char *name) |
Finds a p-attribute (in the global wattrs array). More... | |
int | wattr_declare (char *name, char *directory, int nr_buckets) |
Sets up a new p-attribute, including opening corpus, lex and index file handles. More... | |
void | wattr_close_all (void) |
Closes all three file handles for each of the wattr objects in cwb-encode's global array. More... | |
void | encode_parse_options (int argc, char **argv) |
Parses program options and sets global variables. More... | |
void | encode_add_wattr_line (char *str) |
Processes a token data line. More... | |
int | encode_get_input_line (char *buffer, int bufsize) |
Reads one input line into the specified buffer (either from stdin, or from one or more input files). More... | |
void | encode_generate_registry_file (char *registry_file) |
Writes a registry file for the corpus that has been encoded. More... | |
int | main (int argc, char **argv) |
Main function for cwb-encode. More... | |
Variables | |
char * | field_separators = FIELDSEPS |
string containing the characters that can function as field separators More... | |
char * | undef_value = UNDEF_VALUE |
string used as value of P-attributes when a value is missing ie if a tab-delimited field is empty More... | |
int | debug = 0 |
debug mode on or off? More... | |
int | silent = 0 |
hide messages More... | |
int | verbose = 0 |
show progress (this is not the opposite of silent!) More... | |
int | xml_aware = 0 |
substitute XML entities in p-attributes & ignore <? and <! lines More... | |
int | skip_empty_lines = 0 |
skip empty lines when encoding? More... | |
unsigned | line = 0 |
corpus position currently being encoded (ie cpos of next token) More... | |
int | strip_blanks = 0 |
strip leading and trailing blanks from input and token annotations More... | |
cl_string_list | input_files = NULL |
list of input file(s) (-f option(s)) More... | |
int | nr_input_files = 0 |
number of input files (length of list after option processing) More... | |
int | current_input_file = 0 |
index of input file currently being processed More... | |
char * | current_input_file_name = NULL |
filename of current input file, for error messages More... | |
FILE * | input_fd = NULL |
file handle for current input file (or pipe) (text mode!) More... | |
unsigned long | input_line = 0 |
input line number (reset for each new file) for error messages More... | |
char * | registry_file = NULL |
if set, auto-generate registry file named {registry_file}, listing declared attributes More... | |
char * | directory = NULL |
corpus data directory (no longer defaults to current directory) More... | |
char * | corpus_character_set = "latin1" |
character set label that is inserted into the registry file More... | |
CorpusCharset | encoding_charset |
a charset object to be generated from corpus_character_set More... | |
int | clean_strings = 0 |
clean up input strings by replacing invalid bytes with '?' More... | |
Range | ranges [MAXRANGES] |
A global array for keeping track of S-attributes being encoded. More... | |
int | range_ptr = 0 |
WAttr | wattrs [MAXRANGES] |
A global array for keeping track of P-attributes being encoded. More... | |
int | wattr_ptr = 0 |
cl_lexhash | undeclared_sattrs = NULL |
lookup hash for undeclared s-attributes and s-attributes declared with -S that have annotations (which will be ignored), so warnings are issued only once More... | |
char * | progname = NULL |
name of the currently running program More... | |
#define DEFAULT_INFILE_EXTENSION ".vrt" |
Normal extension for CWB input text files.
(must have exactly 4 characters; .gz/.bz2 may be added to this if the file is compressed.)
Referenced by encode_scan_directory(), and encode_usage().
#define FIELDSEPS "\t\n" |
Default string containing the characters that can function as field separators.
#define MAX_INPUT_LINE_LENGTH 65536 |
Input buffer size.
If we have XML tags with attributes, input lines can become pretty long (but there's basically just a single buffer)
Referenced by encode_get_input_line(), and main().
#define MAXRANGES 1024 |
max number of s-attributes; also max number of p-attributes (-> could change this to implementation as a linked list)
Referenced by encode_parse_options(), and range_declare().
#define POS_CORPUS "%s" SUBDIR_SEP_STRING "%s.corpus" |
CL naming convention for P-attribute Corpus files.
Referenced by wattr_declare().
#define POS_LEX "%s" SUBDIR_SEP_STRING "%s.lexicon" |
CL naming convention for P-attribute Lexicon files.
Referenced by wattr_declare().
#define POS_LEXIDX "%s" SUBDIR_SEP_STRING "%s.lexicon.idx" |
CL naming convention for P-attribute Lexicon-index files.
Referenced by wattr_declare().
#define REP_CHECK_LEXHASH_SIZE 1000 |
nr of buckets of lexhashes used for checking duplicate errors (undeclared element and attribute names in XML tags)
Referenced by main(), and range_declare().
#define STRUC_AVS "%s" SUBDIR_SEP_STRING "%s.avs" |
CL naming convention for S-attribute AVS (attribute values) files.
Referenced by range_declare().
#define STRUC_AVX "%s" SUBDIR_SEP_STRING "%s.avx" |
CL naming convention for S-attribute AVX (attribute-value index) files.
Referenced by range_declare().
#define STRUC_RNG "%s" SUBDIR_SEP_STRING "%s.rng" |
CL naming convention for S-attribute RNG files.
Referenced by range_declare().
#define UMASK 0644 |
User privileges of new files (octal format)
#define UNDEF_VALUE "__UNDEF__" |
Default string used as value of P-attributes when a value is missing ie if a tab-delimited field is empty.
Range object: represents an S-attribute being encoded, and holds some information about the currently-being-processed instance of that S-attribute.
TODO should probably be called an SAttr or SAttEncoder or something.
void encode_add_wattr_line | ( | char * | str | ) |
Processes a token data line.
That is, it processes a line that is not an XML line.
Note that this is destructive - the argument character string will be changed in situ via an strtok-like mechanim.
str | A string containing the line to process. |
References cl_free, cl_lexhash_add(), cl_lexhash_id(), cl_make_set(), CL_MAX_LINE_LENGTH, cl_strdup(), cl_xml_entity_decode(), encode_error(), encode_print_input_lineno(), encode_strtok(), field_separators, _cl_lexhash_entry::id, NwriteInt(), WAttr::position, silent, strip_blanks, token, undef_value, wattr_ptr, and xml_aware.
Referenced by main().
void encode_error | ( | char * | format, |
... | |||
) |
Prints an error message to STDERR, automatically adding a message on the location of the error in the corpus.
Then exits the program.
format | Format-specifying string of the error message. |
... | Additional arguments, printf-style. |
References current_input_file, encode_print_input_lineno(), and input_line.
Referenced by encode_add_wattr_line(), encode_generate_registry_file(), encode_get_input_line(), encode_parse_options(), encode_scan_directory(), main(), range_close(), range_declare(), wattr_close_all(), and wattr_declare().
void encode_generate_registry_file | ( | char * | registry_file | ) |
Writes a registry file for the corpus that has been encoded.
Part of cwb-encode; not a library function.
registry_file | String containing the path of the file to write. |
References cl_free, cl_id_tolower(), cl_id_toupper(), cl_id_validate(), cl_malloc(), cl_path_registry_quote(), cl_strdup(), corpus_character_set, debug, directory, encode_error(), INFOFILE_DEFAULT_NAME, range_print_registry_line(), range_ptr, SUBDIR_SEPARATOR, and wattr_ptr.
Referenced by main().
int encode_get_input_line | ( | char * | buffer, |
int | bufsize | ||
) |
Reads one input line into the specified buffer (either from stdin, or from one or more input files).
The input files are not passed to the function, but are taken from the program global variables.
This function returns False when the last input file has been completely read, and automatically closes files.
If the line that is read is not valid according to the character set specified for the corpus, then an error will be printed and the program shut down.
buffer | Where to load the line to. Assumed to be MAX_INPUT_LINE_LENGTH long. |
bufsize | Not currently used, but should be MAX_INPUT_LINE_LENGTH in case of future use! |
References cl_close_stream(), cl_error(), cl_open_stream(), cl_strcpy(), CL_STREAM_MAGIC, CL_STREAM_READ, cl_string_canonical(), cl_string_list_get(), cl_string_validate_encoding(), cl_string_zap_controls(), clean_strings, corpus_character_set, current_input_file, current_input_file_name, encode_error(), encoding_charset, input_fd, input_line, MAX_INPUT_LINE_LENGTH, nr_input_files, REQUIRE_NFC, and utf8.
Referenced by main().
void encode_parse_options | ( | int | argc, |
char ** | argv | ||
) |
Parses program options and sets global variables.
References cl_charset_name_canonical(), cl_delete_string_list(), cl_string_list_append(), cl_string_list_get(), cl_string_list_size(), clean_strings, corpus_character_set, debug, DEFAULT_ATT_NAME, directory, encode_error(), encode_scan_directory(), encode_usage(), MAXRANGES, progname, range_declare(), range_find(), range_ptr, registry_file, silent, skip_empty_lines, strip_blanks, undef_value, verbose, wattr_declare(), wattr_find(), wattr_ptr, and xml_aware.
Referenced by main().
void encode_print_input_lineno | ( | void | ) |
Prints the input line number (and input filename, if applicable) on STDERR, for error messages and warnings.
References current_input_file_name, input_line, and nr_input_files.
Referenced by encode_add_wattr_line(), encode_error(), main(), range_close(), and range_open().
void encode_print_time | ( | FILE * | stream, |
char * | msg | ||
) |
Prints a message plus the current time to the specified file/stream.
stream | Stream to print to. |
msg | Message to incorporate into the string that is printed. |
Referenced by main().
cl_string_list encode_scan_directory | ( | char * | dir | ) |
Get a list of files in a given directory.
This function only lists files with .vrt or .vrt.gz extensions, and only files identified by POSIX stat() as "regular".
(Note that .vrt is dependent on DEFAULT_INFILE_EXTENSION.)
dir | Path of directory to look in. |
References cl_free, cl_malloc(), cl_new_string_list(), cl_string_list_append(), cl_string_list_qsort(), DEFAULT_INFILE_EXTENSION, encode_error(), and SUBDIR_SEPARATOR.
Referenced by encode_parse_options().
char* encode_strtok | ( | register char * | s, |
register const char * | delim | ||
) |
A replacement for the strtok() function which doesn't skip empty fields.
s | The string to split. |
delim | Delimiters to use in splitting. |
References last.
Referenced by encode_add_wattr_line().
void encode_usage | ( | void | ) |
Prints a usage message and exits the program.
References DEFAULT_INFILE_EXTENSION, progname, undef_value, and VERSION.
Referenced by encode_parse_options().
int main | ( | int | argc, |
char ** | argv | ||
) |
Main function for cwb-encode.
As well as the entry point to the program, this contains the main loop for each line of the corpus to be encoded.
The string of each line is sent to one of a number of different functions, depending on what is found in that string!
argc | Number of command-line arguments. |
argv | Command-line arguments. |
References _Range::automatic, _Range::avs, _Range::avx, buf, cl_charset_from_name(), cl_lexhash_add(), cl_lexhash_freq(), CL_MAX_CORPUS_SIZE, cl_new_lexhash(), cl_new_string_list(), cl_set_debug_level(), cl_string_list_get(), cl_string_list_size(), cl_xml_is_name_char, COMMA_SEP_THOUSANDS_CONVSPEC, corpus_character_set, debug, _Range::element_drop_count, encode_add_wattr_line(), encode_error(), encode_generate_registry_file(), encode_get_input_line(), encode_parse_options(), encode_print_input_lineno(), encode_print_time(), encoding_charset, _Range::fd, input_line, _Range::is_open, line, MAX_INPUT_LINE_LENGTH, _Range::max_recursion, _Range::name, nr_input_files, _Range::null_attribute, progname, range_close(), range_find(), range_open(), range_ptr, _Range::recursion_level, registry_file, REP_CHECK_LEXHASH_SIZE, silent, skip_empty_lines, _Range::store_values, strip_blanks, verbose, wattr_close_all(), and xml_aware.
void range_close | ( | Range * | rng, |
int | end_pos | ||
) |
Closes a currently open instance of an S-attribute.
rng | Pointer to the S-attribute to close. |
end_pos | The corpus position at which this instance closes. |
References _Range::annot, _Range::avs, _Range::avx, cl_free, cl_lexhash_add(), cl_lexhash_find(), CL_MAX_LINE_LENGTH, cl_strdup(), cl_string_list_get(), cl_string_list_size(), _cl_lexhash_entry::data, _Range::el_attributes, _Range::el_atts_list, encode_error(), encode_print_input_lineno(), _Range::fd, _Range::has_children, _cl_lexhash_entry::_cl_lexhash_entry_data::integer, _Range::is_open, _Range::lh, _Range::max_recursion, _Range::name, _Range::null_attribute, _Range::num, NwriteInt(), _Range::offset, _cl_lexhash_entry::_cl_lexhash_entry_data::pointer, _Range::recursion_children, _Range::recursion_level, silent, _Range::start_pos, and _Range::store_values.
Referenced by main(), and range_open().
Range* range_declare | ( | char * | name, |
char * | directory, | ||
int | store_values, | ||
int | null_attribute | ||
) |
Creates a Range object to store a specified s-attribute (and, if appropriate, does the same for children-attributes).
The new Range object is placed in a global variable, but a pointer is also returned. So you can ignore the return value or not, as you prefer.
This is the function where the command-line formalism for defining s-attributes is defined.
name | The string from the user specifying the name of this attribute, recursion and any "attributes" of this XML element - e.g. "text:0+id" |
directory | The directory where the CWB data files will go. |
store_values | boolean: indicates whether this s-attribute was specified with -V (true) or -S (false) when the program was invoked. |
null_attribute | boolean: this is a null attribute, i.e. an XML element to be ignored. |
References _Range::annot, _Range::automatic, _Range::avs, _Range::avx, buf, cl_calloc(), cl_free, cl_lexhash_add(), cl_lexhash_id(), CL_MAX_LINE_LENGTH, cl_new_lexhash(), cl_new_string_list(), cl_strcpy(), cl_strdup(), cl_string_list_append(), _cl_lexhash_entry::data, debug, _Range::dir, _Range::el_attributes, _Range::el_atts_list, _Range::el_undeclared_attributes, _Range::element_drop_count, encode_error(), _Range::fd, _Range::feature_set, _Range::has_children, _Range::in_registry, _Range::is_open, _Range::lh, _Range::max_recursion, MAXRANGES, _Range::name, _Range::null_attribute, _Range::num, _Range::offset, _cl_lexhash_entry::_cl_lexhash_entry_data::pointer, range_ptr, _Range::recursion_children, _Range::recursion_level, REP_CHECK_LEXHASH_SIZE, _Range::start_pos, _Range::store_values, STRUC_AVS, STRUC_AVX, and STRUC_RNG.
Referenced by encode_parse_options().
int range_find | ( | char * | name | ) |
Gets the index (in the ranges array) of a specified S-attribute.
name | The S-attribute to search for. |
References range_ptr.
Referenced by encode_parse_options(), and main().
void range_open | ( | Range * | rng, |
int | start_pos, | ||
char * | annot | ||
) |
Opens an instance of the given S-attribute.
If rng has element attribute children, range_open() will mess around with the string annotation (otherwise not).
rng | The S-attribute to open. |
start_pos | The corpus position at which this instance begins. |
annot | The annotation string (the XML element's att-val pairs). |
References _Range::annot, cl_free, cl_lexhash_add(), cl_lexhash_find(), cl_lexhash_freq(), cl_make_set(), cl_strdup(), cl_string_list_get(), cl_string_list_size(), cl_xml_entity_decode(), cl_xml_is_name_char, _cl_lexhash_entry::data, _Range::el_attributes, _Range::el_atts_list, _Range::el_undeclared_attributes, _Range::element_drop_count, encode_print_input_lineno(), _Range::feature_set, _Range::has_children, _cl_lexhash_entry::_cl_lexhash_entry_data::integer, _Range::is_open, line, _Range::max_recursion, _Range::name, _Range::null_attribute, _cl_lexhash_entry::_cl_lexhash_entry_data::pointer, range_close(), _Range::recursion_children, _Range::recursion_level, silent, _Range::start_pos, _Range::store_values, strip_blanks, and token.
Referenced by main().
void range_print_registry_line | ( | Range * | rng, |
FILE * | fd, | ||
int | print_comment | ||
) |
Prints registry lines for a given s-attribute, and its children, if any, to the specified file handle.
rng | The s-attribute in question. |
fd | Stream for the registry file to write the line to. |
print_comment | Boolean: if true, a comment on the original XML tags is printed. |
References cl_lexhash_find(), cl_string_list_get(), cl_string_list_size(), _cl_lexhash_entry::data, _Range::el_attributes, _Range::el_atts_list, _Range::has_children, _Range::in_registry, _Range::max_recursion, _Range::name, _Range::null_attribute, _cl_lexhash_entry::_cl_lexhash_entry_data::pointer, _Range::recursion_children, and _Range::store_values.
Referenced by encode_generate_registry_file().
void wattr_close_all | ( | void | ) |
Closes all three file handles for each of the wattr objects in cwb-encode's global array.
References encode_error(), and wattr_ptr.
Referenced by main().
int wattr_declare | ( | char * | name, |
char * | directory, | ||
int | nr_buckets | ||
) |
Sets up a new p-attribute, including opening corpus, lex and index file handles.
Note: corpus_fd is a binary file, lex_fd is a text file(*), and lexidx_fd is a binary file.
(*) But lexicon items are delimited by '\0' not by '
'. Therefore '
' is never written, so the text/binary distinction doesn't matter much.
name | Identifier string of the p-attribute |
directory | Directory in which CWB data files are to be created. |
nr_buckets | Number of buckets in the lexhash of the new p-attribute (value passed to cl_new_lexhash() ) |
References CL_MAX_LINE_LENGTH, cl_new_lexhash(), cl_strdup(), DEFAULT_ATT_NAME, encode_error(), WAttr::feature_set, WAttr::lh, WAttr::name, POS_CORPUS, POS_LEX, POS_LEXIDX, WAttr::position, and wattr_ptr.
Referenced by encode_parse_options().
int wattr_find | ( | char * | name | ) |
Finds a p-attribute (in the global wattrs array).
Returns the index (in wattrs) of the P-attribute with the given name.
name | The P-attribute to search for. |
References wattr_ptr.
Referenced by encode_parse_options().
int clean_strings = 0 |
clean up input strings by replacing invalid bytes with '?'
Referenced by encode_get_input_line(), and encode_parse_options().
char* corpus_character_set = "latin1" |
character set label that is inserted into the registry file
Referenced by encode_generate_registry_file(), encode_get_input_line(), encode_parse_options(), and main().
int current_input_file = 0 |
index of input file currently being processed
Referenced by encode_error(), and encode_get_input_line().
char* current_input_file_name = NULL |
filename of current input file, for error messages
Referenced by encode_get_input_line(), and encode_print_input_lineno().
int debug = 0 |
debug mode on or off?
Referenced by encode_generate_registry_file(), encode_parse_options(), main(), and range_declare().
char* directory = NULL |
corpus data directory (no longer defaults to current directory)
Referenced by encode_generate_registry_file(), encode_parse_options(), and sencode_parse_options().
CorpusCharset encoding_charset |
a charset object to be generated from corpus_character_set
Referenced by encode_get_input_line(), and main().
char* field_separators = FIELDSEPS |
string containing the characters that can function as field separators
Referenced by encode_add_wattr_line().
FILE* input_fd = NULL |
file handle for current input file (or pipe) (text mode!)
Referenced by encode_get_input_line().
cl_string_list input_files = NULL |
list of input file(s) (-f option(s))
unsigned long input_line = 0 |
input line number (reset for each new file) for error messages
Referenced by encode_error(), encode_get_input_line(), encode_print_input_lineno(), load_macro_file(), and main().
unsigned line = 0 |
corpus position currently being encoded (ie cpos of next token)
Referenced by alignshow_print_next_region(), alignshow_skip_next_region(), do_undump(), evaluate_subset(), evaluate_target(), html_print_output(), latex_print_output(), load_macro_file(), main(), preprocess_input_line(), PrintAttributes(), range_open(), sgml_print_output(), and SortExternally().
int nr_input_files = 0 |
number of input files (length of list after option processing)
Referenced by encode_get_input_line(), encode_print_input_lineno(), and main().
char* progname = NULL |
name of the currently running program
Referenced by encode_parse_options(), encode_usage(), and main().
int range_ptr = 0 |
Referenced by encode_generate_registry_file(), encode_parse_options(), main(), range_declare(), and range_find().
char* registry_file = NULL |
if set, auto-generate registry file named {registry_file}, listing declared attributes
Referenced by encode_parse_options(), and main().
int silent = 0 |
hide messages
Referenced by encode_add_wattr_line(), encode_parse_options(), main(), range_close(), and range_open().
int skip_empty_lines = 0 |
skip empty lines when encoding?
Referenced by encode_parse_options(), and main().
int strip_blanks = 0 |
strip leading and trailing blanks from input and token annotations
Referenced by encode_add_wattr_line(), encode_parse_options(), main(), and range_open().
cl_lexhash undeclared_sattrs = NULL |
lookup hash for undeclared s-attributes and s-attributes declared with -S that have annotations (which will be ignored), so warnings are issued only once
char* undef_value = UNDEF_VALUE |
string used as value of P-attributes when a value is missing ie if a tab-delimited field is empty
Referenced by encode_add_wattr_line(), encode_parse_options(), and encode_usage().
int verbose = 0 |
show progress (this is not the opposite of silent!)
Referenced by encode_parse_options(), and main().
int wattr_ptr = 0 |
Referenced by encode_add_wattr_line(), encode_generate_registry_file(), encode_parse_options(), wattr_close_all(), wattr_declare(), and wattr_find().
int xml_aware = 0 |
substitute XML entities in p-attributes & ignore <? and <! lines
Referenced by encode_add_wattr_line(), encode_parse_options(), and main().