Parsing Complex Text Structures
by Ian E. Gorman


Example 1:
; swap.xom
; omnimark: swap first two fields in a line of tab-delimited fields
; Run this program with the command
;   omnimark -sb swap.xom swap.txt
; where swap.txt is an input file with tab-delimited fields
process
    submit #main-input
find        line-start
            [any-text except "%t"]* => Field1
            "%t"
            [any-text except "%t"]* => Field2
    output Field2 || "%t" || Field1


Example 2:
#!/bin/perl
# swap.pl
# perl: swap first two fields in a line of tab-delimited fields
# Run this program with the command
#   perl swap.pl swap.txt
# where swap.txt is an input file with tab-delimited fields
while (<>)  {
    s/^([^\t]*)\t([^\t\n]*)/$2\t$1/;
    print "$_"
}

Example 3:
#!/bin/sed -f
# swap.sed
# sed: swap first two fields in a line of tab-delimited fields
# Run this program with the command
#   sed -f swap.sed swap.txt
# where swap.txt is an input file with tab-delimited fields
s/^\([^ ]*\)    \([^    ]*\)/\2 \1/
#   Note - each blank is actually a single hard tab "\t"
#       s/^\([^\t]*\)\t\([^\t]*\)/\2\t\1/


Example 4: 

program:
    program expression '\n'
|   /* nix */
;
expression:
    INTEGER
|   expression '+' expression
|   expression '-' expression
|   expression '*' expression
|   expression '/' expression
|   '(' expression ')'
;


Example 5: 

macro INTEGER is digit+ macro-end
define switch function expression
as
    do scan #current-input
        match INTEGER
        match expression '+' expression
        match expression '-' expression
        match expression '*' expression
        match expression '/' expression
        match'(' expression ')'
        else
            return false    ; never matched
    done
    return true             ; matched once


Example 6: 

expression      ->  term r-expression
r-expression    ->  epsilon
                    | "+" term r-expression
                    | "-" term r-expression
term            ->  factor r-term
r-term          ->  epsilon
                    | "+" factor r_term
                    | "-" factor r_term
factor          ->  "(" expression ")"
                    | integer

Example 7:  

define switch function r-expression
as
    do scan #current-input
        match "+" term r-expression
        match "-" term r-expression
    done
    ;if not matched above, accept the empty string
    return true

Example 8: 

define switch function r-expression
as
    do scan #current-input
        match "+" term r-expression
            ;pop two integers, add them, push the result
        match "-" term r-expression
            ;pop two integers, subtract them, push the result

    done
    ;if not matched above, accept the empty string
    return true

Example 9: 

    expression      ->  term r-expression
    r-expression    ->  epsilon
                        | add-term r-expression
                        | subtract-term r-expression
*   add-term        -> "+" term
*   subtract-term   -> "-" term
    term            ->  factor r-term
    r-term          ->  epsilon
                        | multiply-factor r_term
                        | divide-factor r_term
*   multiply-factor ->  "*" factor
*   divide-factor   ->  "/" factor
    factor          ->  "(" expression ")"
*                       | integer


Listing One
#!/bin/omnimark -sb
; parnest.xom
; omnimark: pattern functions used in recursive pattern matching
; Run this program with the command
;   omnimark -sb parnest.xom parnest.txt
; where parnest.txt is an input file with nested sets of parentheses
; This program extracts nested matching parentheses from a file
; and prints the outermost set with all of the intervening text
; as a single unit.
; In the following line, the underlined text would be printed out
;   ( skip this ( but ( show ) this ) and ( not this
;               ---------------------
; Try using this program as input to itself:
;   omnimark -sb parnest.xom parnest.xom

; forward definition because functions refer to each other
define switch function paren-block
elsewhere
define switch function paren-block-interior
as
    repeat scan #current-input
        match [any except "()"]     ;any except start or end of block
        match paren-block           ;any contained block
    again
    return true
define switch function paren-block
as
    return #current-input matches ( "(" paren-block-interior ")" )

find paren-block => text
    ; list each outer block
    output text || "%n"
find any        ; Grab everything rejected by the first 'find' rule
    ; discard all characters
process
    submit #main-input  ;Send all input through the 'find' rules above


Listing Two
;------------------------------------------------------------------
;   Four-function calculator written with OmniMark pattern matching rules.
;   Supports + - * / and () with correct precedence
;   Requires OmniMark 5.1 or later (free from
;       http://www.omnimark.com)
;   Run the program with the following comannd line:
;       omnimark -s dc.xom
;   Exit by typing "quit" or EOF (ctrl-Z in Windows, ctrl-D in Unix)
;------------------------------------------------------------------
declare #main-input has unbuffered
declare #main-output has unbuffered

;------------------------------------------------------------------
; integer stack, with stack operators
global counter Stack variable initial-size 0
declare catch StackUnderflow
define function push
(   value counter num   )
as
    set new Stack to num
define counter function pop
()
as
    local counter num
    throw StackUnderflow when number of Stack < 1   ;nothing to pop
    set num to Stack    
    remove Stack        ;discard top of stack
    return num
;------------------------------------------------------------------
; patterns and pattern matching functions

declare catch DivisionByZero
macro integer is
(   digit+  )
macro-end

define switch function expression
elsewhere                           ;forward definition

define switch function factor
as
    do scan #current-input
        match blank* "(" blank* expression blank* ")"
            ;do nothing
        match blank* integer => int
            push( int )
        else
            return false
    done
    return true
define switch function multiply-factor
as
    do scan #current-input
        match blank* "*" blank* factor
            push( pop() * pop() )
            return true
    done
    return false
define switch function divide-factor
as
    do scan #current-input
        match blank* "/" blank* factor
            local counter num
            set num to pop()
            throw DivisionByZero when num = 0
            push( pop() / num )
            return true
    done
    return false
define switch function r-term
as
    do scan #current-input
        match multiply-factor r-term
        match divide-factor r-term
    done
    ; if we match nothing else, we can match the empty string
    return true
define switch function term
as
    return #current-input matches ( factor r-term )
define switch function add-term
as
    do scan #current-input
        match blank* "+" term
            push( pop() + pop() )
            return true
    done
    return false
define switch function subtract-term
as
    do scan #current-input
        match blank* "-" term
            push( 0 - pop() + pop() )
            return true
    done
    return false
define switch function r-expression
as
    do scan #current-input
        match add-term r-expression
        match subtract-term r-expression
    done
    ; if we match nothing else, we can match the empty string
    return true
define switch function expression
as
    return #current-input matches ( term r-expression )
;------------------------------------------------------------------
; control loop
process
    repeat
        repeat scan #main-input
            match expression =>text blank* "%n"
                output "d" % pop() || "%n"
                put #error "Stack error: " || "d" % number of Stack
                            || "%n"
                    when number of Stack != 0
                clear Stack
            match blank* ul "quit"
                halt
            match blank* "%n"
            match   any-text+ =>text "%n"?
                put #error "Syntax error: %x(text)%n"
                clear Stack
        again
        halt
    catch DivisionByZero
        put #error "Divide by zero%n"
    catch StackUnderflow
        put #error "Stack underflow%n"
    always
        do scan #main-input
            match any-text* "%n"
        done
        clear Stack
    again
;------------------------------------------------------------------



1


