# MailMuncher
# Filter header lines and quoted material from news and email.
# Preserves first three lines of a quote.

# Tested under UNIX awk and gawk 2.15 thru gawk 3.1

# requires file "MM.INI" an ascii listing of 
# the first word to indicate those header lines we want removed.
# This file can be edited; each line should contain a single word.
# input file Lines containing an .INI file line  as their first word
# will not be retained in the output.

BEGIN {
 i = 0; 
 RepeatQuotes = 0; # counts contiguous quoted lines

 # read in the table from "MM.INI" with the header lines to 
 # remove from output.

 while (StillReadingLines = getline headerlines[i] <"MM.INI" > 0)
    { i++ }  # end while

} # end-begin

# Returns true if a string matches one in the MM.INI file

function IsaHeader(searchstring) {
 isintable= 0;
 for (i in headerlines) {
   if (searchstring == headerlines[i]) {
     isintable= 1;
     BREAK;
   } #if 
 } #for
 return isintable;
} #function IsaHeader 

# Returns true if line is quoted text.
# checks for a right angle bracket in the first eight columns of the line but
# does not check for vertical bars or other quoting chars.
# a QUOTE THAT IS INDICATED WITH OTHER CHARS OR IS A SPILL-OVER FROM
# WORD-WRAP ONTO A NEW LINE WON'T RETURN TRUE.

 function IsaQuote(line) {
 foundquote = 0; i= 0;
 while (i < 9) {
   if (substr(line,i,1) == ">") {
   foundquote = 1; BREAK; }
   i++;
 } # while 
 return foundquote;
} # function IsaQuote

# body of script

# this pattern executes for all input lines (null pattern)
{
 if (IsaQuote($0)) {
   RepeatQuotes++;
     if (RepeatQuotes < 4) {
       print $0; # PRESERVE FIRST 3 LINES OF QUOTE
     } # ENDif repeatquotes < 4
   next; # DONE WITH QUOTED LINE, go immediately to next pattern 
     } 
   else { # not a quote so reset counter
   RepeatQuotes = 0;
} # ENDIF IsaQuote
} # end null pattern

# Tests if not a line to exclude from output
!IsaHeader($1) {
 print $0; # preserve; not an excludable header
 next;  # done processing this line
} # end !IsaHeader pattern 

# this pattern tests for separator line used in digests
# and replaces with a form feed.
/^\-\-\-\-\-\-\-\-\-\-\-/ {
 printf ("\f");
 next; # done processing digest separator
} # end line separator pattern

END {
} # end end-pattern

# end mailmuncher
# last mod: 9/15/99
