High-Speed Finite-State Machines
by Brenton Hoff


Example 1:

(a) 

0x00  ABANDON
0x01  AGAIN
0x02  AGAIN
 ...
'w'   AGAIN
'x'   MATCH
'y'   AGAIN
 ...

(b)
for (;;) {
  NextCh = *pText++;
  Action = pState[NextCh];
  switch (Action) {
   case AGAIN: continue;
   case MATCH: return TRUE;
   case ABANDON: return FALSE;
  }
}

(c)
AGAIN:	lodsb		; Address 0
	xlat
	jmp	ax
MATCH:	mov	ax,1	; Address 4
	ret
ABANDON: mov	ax,0	; Address 8
	ret



Listing One

#include "fcntl.h"
#include "io.h"
#include <stdio.h>

typedef unsigned short  UINT16;   /*Unsigned 16-bit integer*/
typedef unsigned char   BYTE;     /*unsigned byte*/

#define PAGE_ALIGNED far
#define ENDMARKER       0xEE

#define EE_NOP          0x00      /*Buf end check, NOP if not*/
#define NOP             0x04      /*Stay in same state*/
#define EE_WORD         0x08      /*Buf end check, WORD if not*/
#define WORD            0x0c      /*Words++, change to word*/
#define WHITE           0x14      /*Change to whitespace*/
#define LF_WHITE        0x1a      /*Lines++, change to whitespace*/
#define LF_NOP          0x1c      /*Lines++, stay in same state*/
#define BUF_SIZE        (28*2048u)

/*Global variables updated directly by counting loop*/
unsigned long Lines;
unsigned long Words;
BYTE *pCurrentTable;

BYTE Tables[512];
#define WhiteTable      (&Tables[0])
#define WordTable       (&Tables[256])

static BYTE Buf[BUF_SIZE + 2];
static void InitTables(void) {
    int i;
    for (i = 0; i < 256 ; i++) WhiteTable[i] = WORD;
    WhiteTable[ 9]   = WhiteTable[11] = WhiteTable[12] = NOP;
    WhiteTable[13]   = WhiteTable[32] = NOP;
    WhiteTable[10]   = LF_NOP;
    WhiteTable[ENDMARKER] = EE_WORD;

    for (i = 0; i < 256; i++) WordTable[i] = NOP;
    WordTable[ 9]   = WordTable[11] = WordTable[12] = WHITE;
    WordTable[13]   = WordTable[32] = WHITE;
    WordTable[10]   = LF_WHITE;
    WordTable[ENDMARKER] = EE_NOP;
}
/*C and assembly versions of word counter*/
void PAGE_ALIGNED CountC(UINT16 NrBytes, BYTE *pText);
void PAGE_ALIGNED CountAsm(UINT16 NrBytes, BYTE *pText);
static int WCFile(char *pFilename);
int main(int argc, char **argv) {
    InitTables();
    while (--argc) {   /*Count each file*/
        if (! WCFile(*++argv))
            return 1;
    }
    return 0;
}
static int WCFile(char *pFilename) {
    int NrBytes;
    int Handle;
    /*Attempt to open file for reading as raw bytes*/
    Handle = open(pFilename, O_BINARY | O_RDONLY);
    if (Handle == -1) {
        printf("\nCan't open: %s", pFilename);
        return 0;
    }
    Lines = 0;
    Words = 0;
    pCurrentTable = WhiteTable;
    do {
        /*Read a slab of the file into memory*/
        NrBytes = read(Handle, Buf, BUF_SIZE);
        /*Exit loop if there's an error*/
        if (NrBytes == -1) break;
        /*Process the slab using CountC or CountAsm*/
        CountAsm(NrBytes, Buf);
        /*Stop if finished with file*/
    } while (NrBytes >= BUF_SIZE);
    printf("%7lu %7lu %7lu %s\n", Lines, Words, tell(Handle), pFilename);
    (void) close(Handle);
    return 1;
} /*WCFile*/
void PAGE_ALIGNED CountC(UINT16 NrBytes, BYTE *pText) {
    BYTE NextCh;
    BYTE Action;
    BYTE *pTab = pCurrentTable;
    BYTE *pEnd = &pText[NrBytes];
    /*Add endmarker to buffer to trigger end test below*/
    *pEnd++ = ENDMARKER;
    /*Loop through each byte*/
    for (;;) {
        NextCh = *pText++;
        Action = pTab[NextCh];
        switch (Action) {
        case EE_NOP:   if (pText == pEnd) break;
                       /*FALLTHROUGH*/
        case NOP:      continue;

        case EE_WORD:  if (pText == pEnd) break;
                       /*FALLTHROUGH*/
        case WORD:     Words++;
                       pTab = WordTable;
                       continue;
        case WHITE:    pTab = WhiteTable;
                       continue;
        case LF_WHITE: pTab = WhiteTable;
                       /*FALLTHROUGH*/
        case LF_NOP:   Lines++;
                       continue;
        }
        /*End of buffer found: remember in-word context and exit*/
        pCurrentTable = pTab;
        return;
    }
} /*CountC*/

1


