An Information Assembly Line In Perl
by Ray Snow


Figure 3:

 1. .begin (story)
 2. .begin (header)
 3. Wallenberg monument inaugurated outside UN 
 4.
 5. .begin (text)
 6. Agence Franco Presse via NewsEdge Corporation : UNITED NATIONS, Nov 9 (AFP) - A monument to Raoul Wallenberg, the Swedish diplomat who saved the lives of tens of thousands of Hungarian Jews & others during World War II, was unveiled outside the United Nations on Monday. 
 7.
 8. <<Agence&#160;Franco&#xa0;Presse&nbsp;--&nbsp;11-09-97>>
 9.
10. .begin (storyid)
11. [11-09-97 at 15:50 EDT, Copyright 1997, Agence Franco Presse, File: g1109154.700]
12. .end (story)


Listing One
 1. #!/bin/sh
 2. # Assembly line "main" for Makefeed Version 3.1:
 3. # -------------------------------------------------------
 4.
 5.     FeedPump    main All 2>> main.FeedPump.log   |\
 6.     BuildTrees  main     2>> main.BuildTrees.log |\
 7.     MarkUp      main     2>> main.MarkUp.log     |\
 8.     Index       main     2>> main.Index.log      |\
 9.     CheckFeed   main     2>> main.CheckFeed.log  |\
10.     Aggregate   main     2>> main.Aggregate.log  |\
11.     SendFeed    main     2>> main.SendFeed.log

Listing Two
 
 1. $tag01 = "\\.begin \\(header\\)";          # Escape Period & Parentheses.
 2. $tag02 = "\\.begin \\(text\\)";            # Here too.
 3.
 4. if (! m/${tag01}\s+(.+)\s+${tag02}/m )     # Look for a match.
 5.     {
 6.     print STDERR "Can't find headline.\n"; # If not found, error.
 7.     }
 8. else
 9.     {
10.     $headline = $1;                    # If so, $1 contains the headline.
11.     }

Listing Three
 1. my $num =  s/[\x00-\x07\x0b\x0c\x0e-\x1f]//g;
 2. if ( $num > 0 )
 3.    {
 4.    print STDERR "Replaced $num ASCII Control Characters.\n";
 5.    }

Listing Four
 1. $tbl{"<"}  = "&lt;"   ;
 2. $tbl{">"}  = "&gt;"   ;
 3. $tbl{"&"}  = "&amp;"  ;
 4. $tbl{"\""} = "&quot;" ;
 5.
 6. $dec_char_ent = "#\\d{1,3}";             # Like in "&#160;".
 7. $hex_char_ent = "#x[0-9A-Fa-f]{1,2}";    # Like in "&#xa0;".
 8. $gen_char_ent = "[0-9A-Za-z]{1,6}";      # Like in "&nbsp;".
 9. $choices      = "${dec_char_ent}|${hex_char_ent}|${gen_char_ent}";
10.
11. $num =  s/&(?!${choices};)/$tbl{"&"}/g ; # Escape SOME Ampersands; Not all.
12. if ($num > 0)
13.     {
14.     print STDERR "Replaced $num Ampersands (\"&\").\n";
15.     }
16. $num =  s/([<>"])/$tbl{$1}/g ;  # Escape Less-Than, Greater-Than, & Quote.
17. if ($num > 0)
19.     print STDERR "Replaced $num special HTML charactors 
20.                                      with SGML Standard Entities.\n";
21.     }






2

