CWB
|
Regular expression to DFA converter – originally written by markh. @csd 4.csd .uwm .eduMore...
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <assert.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "eval.h"
#include "options.h"
#include "regex2dfa.h"
Data Structures | |
struct | StackCard |
struct | state |
struct | item |
struct | exp |
struct | symbol |
struct | equation |
struct | Equiv |
Macros | |
#define | MAX_CHAR 0x4000 |
#define | MAX_ERRORS 25 |
The maximum number of errors that the regex2dfa module will allow before killing the program. More... | |
#define | HASH_MAX 0x200 |
#define | NN 0x200 |
TODO needs a comment! More... | |
#define | EQU_EXTEND 0x200 |
#define | STACK_MAX 200 |
#define | X_EXTEND 4 |
#define | TOP ((SP - 1)->Tag) |
#define | POP() ((--SP)->Q) |
Typedefs | |
typedef unsigned char | byte |
typedef struct symbol * | Symbol |
typedef struct exp * | Exp |
typedef struct equation * | Equation |
typedef struct state * | State |
typedef struct item * | Item |
Enumerations | |
enum | ExpTag { SymX, ZeroX, OneX, AndX, OrX, StarX, PlusX, OptX } |
enum | StackTag { RULE, EQU, PAR, OPT, OR, AND } |
enum | Lexical { EndT, CommaT, RParT, RBrT, EqualT, BarT, ZeroT, OneT, IdenT, LParT, LBrT, PlusT, StarT } |
Functions | |
static int | GET (void) |
Gets the next character from the searchstr, and increments its pointer; returns EOF if we are at the end of the string. More... | |
static void | UNGET (int Ch) |
Ungets a character from the search string (by decrementing the index into the search string; the actual string itself isn't modified). More... | |
static void | REGEX2DFA_ERROR (char *Format,...) |
Prints an error message to stdout, and exits the program if there are now just too many errors. More... | |
Lexical | LEX (void) |
Gets the Lexical symbol corresponding to the next non-whitespace character in the searchstr. More... | |
byte | Hash (char *S) |
Creates a one-byte hash of the string S. More... | |
Symbol | LookUp (char *S) |
Look up the symbol contained in string S in the global hash table. More... | |
int | DUP (int A, int B) |
void | Store (Symbol S, int Q) |
int | MakeExp (int Q, ExpTag Tag,...) |
void | PUSH (StackTag Tag, int Q) |
int | Parse (void) |
the regex parser proper: private function More... | |
void | PushQ (int Q) |
void | PopQ (void) |
int | AddState (int States, int *SList) |
void | AddBuf (Symbol LHS, int Q) |
void | FormState (int Q) |
void | AddEquiv (int L, int R) |
void | MergeStates (void) |
void | WriteStates (void) |
Write states to stdout. More... | |
void | init_dfa (DFA *dfa) |
Initialises the members of the given DFA object. More... | |
void | free_dfa (DFA *dfa) |
Frees all the memory associated with this DFA. More... | |
void | show_complete_dfa (DFA dfa) |
Prints the contents of a DFA to stdout. More... | |
void | init (void) |
Initialises the global variables in the regex2dfa module. More... | |
void | regex2dfa (char *rxs, DFA *automaton) |
Converts a regular expression to a DFA. More... | |
Variables | |
char * | searchstr |
Global variable containing a search string that is to be converted to a DFA. More... | |
State | STab = (State)NULL |
int | Ss |
Item | IBuf = (Item)NULL |
int | Is |
int | IMax |
char * | Action [7] |
char * | LastW |
static char | ChArr [MAX_CHAR] |
char * | ChP |
int | LINE |
int | ERRORS |
The number of errors enocuntered while parsing a regex to a DFA. More... | |
Symbol | HashTab [HASH_MAX] |
Global hash table containing Symbols. More... | |
Symbol | FirstB |
Symbol | LastB |
Exp | ExpHash [NN] |
TODO needs a comment! More... | |
Equation | EquTab = (Equation)NULL |
int | Equs |
int | EquMax |
StackCard | Stack [STACK_MAX] |
StackCard * | SP |
int * | XStack = NULL |
int | Xs |
int | XMax |
struct Equiv * | ETab = NULL |
int | Es |
int | EMax |
int | currpos |
Index into searchstr showing the next character that will be read. More... | |
Regular expression to DFA converter – originally written by markh. @csd 4.csd .uwm .edu
Derived from the syntax:
Rule = (ID "=" Ex ",")* Ex.
Ex = "0" | "1" | ID | "(" Ex ")" | "[" Ex "]" | Ex "+" | Ex "*" | Ex Ex | Ex "|" Ex. *
with the usual precedence rules.
(Note, this is the token-sequence regex = not the string-level regex!
#define EQU_EXTEND 0x200 |
Referenced by MakeExp().
#define HASH_MAX 0x200 |
Referenced by init().
#define MAX_CHAR 0x4000 |
Referenced by LEX().
#define MAX_ERRORS 25 |
The maximum number of errors that the regex2dfa module will allow before killing the program.
Referenced by REGEX2DFA_ERROR().
#define STACK_MAX 200 |
Referenced by PUSH().
#define X_EXTEND 4 |
Referenced by PushQ().
typedef unsigned char byte |
enum ExpTag |
enum Lexical |
enum StackTag |
void AddBuf | ( | Symbol | LHS, |
int | Q | ||
) |
References cl_realloc(), IMax, Is, item::LHS, symbol::Name, item::RHS, and item::Size.
Referenced by FormState().
void AddEquiv | ( | int | L, |
int | R | ||
) |
References cl_realloc(), state::Class, EMax, Es, ETab, Equiv::L, and Equiv::R.
Referenced by MergeStates().
int AddState | ( | int | States, |
int * | SList | ||
) |
References cl_realloc(), state::Class, state::SList, Ss, and state::States.
Referenced by FormState().
void FormState | ( | int | Q | ) |
References AddBuf(), AddState(), AndX, exp::Arg, exp::Body, cl_malloc(), state::Empty, IMax, Is, exp::Leaf, state::LHS, item::LHS, MakeExp(), OneX, OptX, OrX, PlusX, PopQ(), PushQ(), state::RHS, state::Shifts, state::ShList, state::SList, SP, Ss, StarX, state::States, SymX, exp::Tag, equation::Value, Xs, XStack, and ZeroX.
Referenced by regex2dfa().
void free_dfa | ( | DFA * | dfa | ) |
Frees all the memory associated with this DFA.
References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by free_environment().
|
static |
byte Hash | ( | char * | S | ) |
void init | ( | void | ) |
void init_dfa | ( | DFA * | dfa | ) |
Initialises the members of the given DFA object.
References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by next_environment().
Lexical LEX | ( | void | ) |
Symbol LookUp | ( | char * | S | ) |
Look up the symbol contained in string S in the global hash table.
References cl_malloc(), cl_strdup(), symbol::Hash, Hash(), symbol::Name, symbol::Next, and symbol::Tail.
Referenced by Parse().
int MakeExp | ( | int | Q, |
ExpTag | Tag, | ||
... | |||
) |
References AndX, exp::Arg, exp::Body, cl_malloc(), cl_realloc(), exp::Class, DUP(), EQU_EXTEND, EquMax, Equs, exp::Hash, symbol::Hash, equation::Hash, Hash(), exp::Leaf, OneX, OptX, OrX, PlusX, equation::Stack, StarX, SymX, exp::Tag, Tag, exp::Tail, equation::Value, and ZeroX.
Referenced by FormState(), and Parse().
void MergeStates | ( | void | ) |
References AddEquiv(), state::Class, EMax, state::Empty, Es, ETab, Equiv::L, state::LHS, Equiv::R, state::RHS, state::Shifts, state::ShList, SP, and Ss.
Referenced by regex2dfa().
int Parse | ( | void | ) |
the regex parser proper: private function
References Action, AND, AndX, EQU, EqualT, IdenT, LastW, LBrT, LEX(), LookUp(), LParT, MakeExp(), OneT, OneX, OPT, OptX, OR, OrX, PAR, PlusX, POP, PUSH(), REGEX2DFA_ERROR(), RULE, Stack, StarX, Store(), SymX, TOP, ZeroT, and ZeroX.
Referenced by regex2dfa().
void PopQ | ( | void | ) |
References equation::Stack, Xs, and XStack.
Referenced by FormState().
void PUSH | ( | StackTag | Tag, |
int | Q | ||
) |
References StackCard::Q, REGEX2DFA_ERROR(), STACK_MAX, StackCard::Tag, and Tag.
Referenced by Parse().
void PushQ | ( | int | Q | ) |
References cl_realloc(), equation::Stack, X_EXTEND, XMax, Xs, and XStack.
Referenced by FormState().
void regex2dfa | ( | char * | rxs, |
DFA * | automaton | ||
) |
Converts a regular expression to a DFA.
Public function.
rxs | The regular expression. |
automaton | Pointer to the DFA object to write to. |
References C, cl_malloc(), state::Class, dfa::E_State, eep, state::Empty, Environment, ERRORS, False, dfa::Final, FormState(), init(), state::LHS, dfa::Max_Input, dfa::Max_States, evalenv::MaxPatIndex, MergeStates(), symbol::Name, Parse(), state::RHS, searchstr, state::Shifts, state::ShList, show_dfa, SP, Ss, dfa::TransTable, True, and WriteStates().
Referenced by do_SearchPattern().
|
static |
void show_complete_dfa | ( | DFA | dfa | ) |
Prints the contents of a DFA to stdout.
References dfa::E_State, dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by show_environment().
void Store | ( | Symbol | S, |
int | Q | ||
) |
References exp::Body, cl_malloc(), exp::Class, exp::Hash, symbol::Hash, exp::Leaf, SymX, exp::Tag, and exp::Tail.
Referenced by Parse().
|
static |
void WriteStates | ( | void | ) |
Write states to stdout.
Private function.
References C, state::Class, state::Empty, state::LHS, symbol::Name, state::RHS, state::Shifts, state::ShList, SP, and Ss.
Referenced by regex2dfa().
char* Action[7] |
Referenced by Parse().
int currpos |
int EMax |
Referenced by AddEquiv(), and MergeStates().
int ERRORS |
The number of errors enocuntered while parsing a regex to a DFA.
Referenced by init(), regex2dfa(), and REGEX2DFA_ERROR().
int Es |
Referenced by AddEquiv(), and MergeStates().
struct Equiv * ETab = NULL |
Referenced by AddEquiv(), init(), and MergeStates().
Symbol FirstB |
int IMax |
Referenced by AddBuf(), and FormState().
int Is |
Referenced by AddBuf(), and FormState().
Symbol LastB |
int LINE |
Referenced by init(), and REGEX2DFA_ERROR().
char* searchstr |
Global variable containing a search string that is to be converted to a DFA.
(Needs to be global; functions using the DFA write to it, and then the DFA parser reads from it. Declared as an external global in cqp.h so other parts of CQP can access it.)
Referenced by do_SearchPattern(), do_StandardQuery(), GET(), prepare_input(), prepare_Query(), and regex2dfa().
StackCard * SP |
Referenced by FormState(), MergeStates(), regex2dfa(), and WriteStates().
int Ss |
Referenced by AddState(), FormState(), init(), MergeStates(), regex2dfa(), and WriteStates().
int Xs |
Referenced by FormState(), init(), PopQ(), and PushQ().
int* XStack = NULL |
Referenced by FormState(), init(), PopQ(), and PushQ().