CWB
Data Structures | Macros | Typedefs | Enumerations | Functions | Variables
regex2dfa.c File Reference

Regular expression to DFA converter – originally written by markh.nosp@m.@csd.nosp@m.4.csd.nosp@m..uwm.nosp@m..edu. More...

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <assert.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "eval.h"
#include "options.h"
#include "regex2dfa.h"

Data Structures

struct  StackCard
 
struct  state
 
struct  item
 
struct  exp
 
struct  symbol
 
struct  equation
 
struct  Equiv
 

Macros

#define MAX_CHAR   0x4000
 
#define MAX_ERRORS   25
 The maximum number of errors that the regex2dfa module will allow before killing the program. More...
 
#define HASH_MAX   0x200
 
#define NN   0x200
 TODO needs a comment! More...
 
#define EQU_EXTEND   0x200
 
#define STACK_MAX   200
 
#define X_EXTEND   4
 
#define TOP   ((SP - 1)->Tag)
 
#define POP()   ((--SP)->Q)
 

Typedefs

typedef unsigned char byte
 
typedef struct symbolSymbol
 
typedef struct expExp
 
typedef struct equationEquation
 
typedef struct stateState
 
typedef struct itemItem
 

Enumerations

enum  ExpTag {
  SymX, ZeroX, OneX, AndX,
  OrX, StarX, PlusX, OptX
}
 
enum  StackTag {
  RULE, EQU, PAR, OPT,
  OR, AND
}
 
enum  Lexical {
  EndT, CommaT, RParT, RBrT,
  EqualT, BarT, ZeroT, OneT,
  IdenT, LParT, LBrT, PlusT,
  StarT
}
 

Functions

static int GET (void)
 Gets the next character from the searchstr, and increments its pointer; returns EOF if we are at the end of the string. More...
 
static void UNGET (int Ch)
 Ungets a character from the search string (by decrementing the index into the search string; the actual string itself isn't modified). More...
 
static void REGEX2DFA_ERROR (char *Format,...)
 Prints an error message to stdout, and exits the program if there are now just too many errors. More...
 
Lexical LEX (void)
 Gets the Lexical symbol corresponding to the next non-whitespace character in the searchstr. More...
 
byte Hash (char *S)
 Creates a one-byte hash of the string S. More...
 
Symbol LookUp (char *S)
 Look up the symbol contained in string S in the global hash table. More...
 
int DUP (int A, int B)
 
void Store (Symbol S, int Q)
 
int MakeExp (int Q, ExpTag Tag,...)
 
void PUSH (StackTag Tag, int Q)
 
int Parse (void)
 the regex parser proper: private function More...
 
void PushQ (int Q)
 
void PopQ (void)
 
int AddState (int States, int *SList)
 
void AddBuf (Symbol LHS, int Q)
 
void FormState (int Q)
 
void AddEquiv (int L, int R)
 
void MergeStates (void)
 
void WriteStates (void)
 Write states to stdout. More...
 
void init_dfa (DFA *dfa)
 Initialises the members of the given DFA object. More...
 
void free_dfa (DFA *dfa)
 Frees all the memory associated with this DFA. More...
 
void show_complete_dfa (DFA dfa)
 Prints the contents of a DFA to stdout. More...
 
void init (void)
 Initialises the global variables in the regex2dfa module. More...
 
void regex2dfa (char *rxs, DFA *automaton)
 Converts a regular expression to a DFA. More...
 

Variables

char * searchstr
 Global variable containing a search string that is to be converted to a DFA. More...
 
State STab = (State)NULL
 
int Ss
 
Item IBuf = (Item)NULL
 
int Is
 
int IMax
 
char * Action [7]
 
char * LastW
 
static char ChArr [MAX_CHAR]
 
char * ChP
 
int LINE
 
int ERRORS
 The number of errors enocuntered while parsing a regex to a DFA. More...
 
Symbol HashTab [HASH_MAX]
 Global hash table containing Symbols. More...
 
Symbol FirstB
 
Symbol LastB
 
Exp ExpHash [NN]
 TODO needs a comment! More...
 
Equation EquTab = (Equation)NULL
 
int Equs
 
int EquMax
 
StackCard Stack [STACK_MAX]
 
StackCardSP
 
int * XStack = NULL
 
int Xs
 
int XMax
 
struct EquivETab = NULL
 
int Es
 
int EMax
 
int currpos
 Index into searchstr showing the next character that will be read. More...
 

Detailed Description

Regular expression to DFA converter – originally written by markh.nosp@m.@csd.nosp@m.4.csd.nosp@m..uwm.nosp@m..edu.

Derived from the syntax:

Rule = (ID "=" Ex ",")* Ex.

Ex = "0" | "1" | ID | "(" Ex ")" | "[" Ex "]" | Ex "+" | Ex "*" | Ex Ex | Ex "|" Ex. *

with the usual precedence rules.

(Note, this is the token-sequence regex = not the string-level regex!

Macro Definition Documentation

#define EQU_EXTEND   0x200

Referenced by MakeExp().

#define HASH_MAX   0x200

Referenced by init().

#define MAX_CHAR   0x4000

Referenced by LEX().

#define MAX_ERRORS   25

The maximum number of errors that the regex2dfa module will allow before killing the program.

Referenced by REGEX2DFA_ERROR().

#define NN   0x200

TODO needs a comment!

Referenced by DUP(), and init().

#define POP ( )    ((--SP)->Q)

Referenced by Parse().

#define STACK_MAX   200

Referenced by PUSH().

#define TOP   ((SP - 1)->Tag)

Referenced by Parse().

#define X_EXTEND   4

Referenced by PushQ().

Typedef Documentation

typedef unsigned char byte
typedef struct equation* Equation
typedef struct exp* Exp
typedef struct item * Item
typedef struct state * State
typedef struct symbol* Symbol

Enumeration Type Documentation

enum ExpTag
Enumerator
SymX 
ZeroX 
OneX 
AndX 
OrX 
StarX 
PlusX 
OptX 
enum Lexical
Enumerator
EndT 
CommaT 
RParT 
RBrT 
EqualT 
BarT 
ZeroT 
OneT 
IdenT 
LParT 
LBrT 
PlusT 
StarT 
enum StackTag
Enumerator
RULE 
EQU 
PAR 
OPT 
OR 
AND 

Function Documentation

void AddBuf ( Symbol  LHS,
int  Q 
)

References cl_realloc(), IMax, Is, item::LHS, symbol::Name, item::RHS, and item::Size.

Referenced by FormState().

void AddEquiv ( int  L,
int  R 
)

References cl_realloc(), state::Class, EMax, Es, ETab, Equiv::L, and Equiv::R.

Referenced by MergeStates().

int AddState ( int  States,
int *  SList 
)

References cl_realloc(), state::Class, state::SList, Ss, and state::States.

Referenced by FormState().

int DUP ( int  A,
int  B 
)

References NN.

Referenced by MakeExp().

void FormState ( int  Q)
void free_dfa ( DFA dfa)

Frees all the memory associated with this DFA.

References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.

Referenced by free_environment().

static int GET ( void  )
static

Gets the next character from the searchstr, and increments its pointer; returns EOF if we are at the end of the string.

See also
searchstr
currpos

References currpos, and searchstr.

Referenced by LEX().

byte Hash ( char *  S)

Creates a one-byte hash of the string S.

Referenced by LookUp(), and MakeExp().

void init ( void  )

Initialises the global variables in the regex2dfa module.

References ChArr, ChP, currpos, EquMax, Equs, ERRORS, ETab, HASH_MAX, LINE, NN, Ss, XMax, Xs, and XStack.

Referenced by regex2dfa().

void init_dfa ( DFA dfa)

Initialises the members of the given DFA object.

References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.

Referenced by next_environment().

Lexical LEX ( void  )

Gets the Lexical symbol corresponding to the next non-whitespace character in the searchstr.

References BarT, ChArr, ChP, CommaT, EndT, EqualT, GET(), IdenT, LastW, LBrT, LParT, MAX_CHAR, OneT, PlusT, RBrT, REGEX2DFA_ERROR(), RParT, StarT, UNGET(), and ZeroT.

Referenced by Parse().

Symbol LookUp ( char *  S)

Look up the symbol contained in string S in the global hash table.

References cl_malloc(), cl_strdup(), symbol::Hash, Hash(), symbol::Name, symbol::Next, and symbol::Tail.

Referenced by Parse().

int MakeExp ( int  Q,
ExpTag  Tag,
  ... 
)
void MergeStates ( void  )
int Parse ( void  )

the regex parser proper: private function

References Action, AND, AndX, EQU, EqualT, IdenT, LastW, LBrT, LEX(), LookUp(), LParT, MakeExp(), OneT, OneX, OPT, OptX, OR, OrX, PAR, PlusX, POP, PUSH(), REGEX2DFA_ERROR(), RULE, Stack, StarX, Store(), SymX, TOP, ZeroT, and ZeroX.

Referenced by regex2dfa().

void PopQ ( void  )

References equation::Stack, Xs, and XStack.

Referenced by FormState().

void PUSH ( StackTag  Tag,
int  Q 
)

References StackCard::Q, REGEX2DFA_ERROR(), STACK_MAX, StackCard::Tag, and Tag.

Referenced by Parse().

void PushQ ( int  Q)

References cl_realloc(), equation::Stack, X_EXTEND, XMax, Xs, and XStack.

Referenced by FormState().

void regex2dfa ( char *  rxs,
DFA automaton 
)

Converts a regular expression to a DFA.

Public function.

Parameters
rxsThe regular expression.
automatonPointer to the DFA object to write to.

References C, cl_malloc(), state::Class, dfa::E_State, eep, state::Empty, Environment, ERRORS, False, dfa::Final, FormState(), init(), state::LHS, dfa::Max_Input, dfa::Max_States, evalenv::MaxPatIndex, MergeStates(), symbol::Name, Parse(), state::RHS, searchstr, state::Shifts, state::ShList, show_dfa, SP, Ss, dfa::TransTable, True, and WriteStates().

Referenced by do_SearchPattern().

static void REGEX2DFA_ERROR ( char *  Format,
  ... 
)
static

Prints an error message to stdout, and exits the program if there are now just too many errors.

References ERRORS, LINE, and MAX_ERRORS.

Referenced by LEX(), Parse(), and PUSH().

void show_complete_dfa ( DFA  dfa)

Prints the contents of a DFA to stdout.

References dfa::E_State, dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.

Referenced by show_environment().

void Store ( Symbol  S,
int  Q 
)
static void UNGET ( int  Ch)
static

Ungets a character from the search string (by decrementing the index into the search string; the actual string itself isn't modified).

Parameters
ChIgnored.

References currpos.

Referenced by LEX().

void WriteStates ( void  )

Write states to stdout.

Private function.

References C, state::Class, state::Empty, state::LHS, symbol::Name, state::RHS, state::Shifts, state::ShList, SP, and Ss.

Referenced by regex2dfa().

Variable Documentation

char* Action[7]
Initial value:
=
{
".ABCH|&&&&&+*",
"I=BCH|&&&&&+*",
"DD)FH|&&&&&+*",
"EEG]H|&&&&&+*",
"vvvvv|&&&&&+*",
"xxxxxx&&&&&+*"
}

Referenced by Parse().

char ChArr[MAX_CHAR]
static

Referenced by init(), and LEX().

char* ChP

Referenced by init(), and LEX().

int currpos

Index into searchstr showing the next character that will be read.

Referenced by GET(), init(), and UNGET().

int EMax

Referenced by AddEquiv(), and MergeStates().

int EquMax

Referenced by init(), and MakeExp().

int Equs

Referenced by init(), and MakeExp().

Equation EquTab = (Equation)NULL
int ERRORS

The number of errors enocuntered while parsing a regex to a DFA.

Referenced by init(), regex2dfa(), and REGEX2DFA_ERROR().

int Es

Referenced by AddEquiv(), and MergeStates().

struct Equiv * ETab = NULL

Referenced by AddEquiv(), init(), and MergeStates().

Exp ExpHash[NN]

TODO needs a comment!

Symbol FirstB
Symbol HashTab[HASH_MAX]

Global hash table containing Symbols.

Item IBuf = (Item)NULL
int IMax

Referenced by AddBuf(), and FormState().

int Is

Referenced by AddBuf(), and FormState().

Symbol LastB
char* LastW

Referenced by LEX(), and Parse().

int LINE

Referenced by init(), and REGEX2DFA_ERROR().

char* searchstr

Global variable containing a search string that is to be converted to a DFA.

(Needs to be global; functions using the DFA write to it, and then the DFA parser reads from it. Declared as an external global in cqp.h so other parts of CQP can access it.)

Referenced by do_SearchPattern(), do_StandardQuery(), GET(), prepare_input(), prepare_Query(), and regex2dfa().

StackCard * SP
int Ss
State STab = (State)NULL

Referenced by Parse().

int XMax

Referenced by init(), and PushQ().

int Xs

Referenced by FormState(), init(), PopQ(), and PushQ().

int* XStack = NULL

Referenced by FormState(), init(), PopQ(), and PushQ().