#  parse.tcl parse.tcl
#  Searches source file for matches to the parse criteria.
#  Matches are stored in a destination file.
#  The number of matches is returned to calling program

proc parse { source bCrit gap eCrit dest } {

  # CONVERT PARSE CRITERIA TO LOWER CASE
  set bCrit [string tolower $bCrit]
  set eCrit [string tolower $eCrit]

  # CREATE A LOWER CASE COPY OF THE PARSE SOURCE
  set rawHTML [ readEntireFile $source ]
  set lowHTML [ string tolower $rawHTML ]

  # OPEN DESTINATION FILE
  set destFile [open $dest "w"]

  # DETERMINE THE LENGTHS OF THE CRITERIA
  set bCritLen [string length $bCrit]
  set eCritLen [string length $eCrit]
                
  # PARSE LOOP 
  set count 0
  while { 1 } {
    # LOOK FOR FIRST OCCURRENCES OF PARSE CRITERIA
    set bCritLoc [string first $bCrit $lowHTML]
    set eCritLoc [string first $eCrit $lowHTML]

    # BREAK IF EITHER PARSE CRITERIA ARE NOT FOUND
    if {$bCritLoc < 0 || $eCritLoc < 0} { break }
        
    # IF ALL CRITERIA SATISFIED, CLEAN-UP AND SAVE
    set actualGap [expr $eCritLoc-$bCritLoc-$bCritLen]
    if { $actualGap <= $gap } {
      set foundData [ string range $rawHTML $bCritLoc \
               [expr $eCritLoc+$eCritLen-1]]
      if { [string length $foundData] > 0} {
        regsub -all "\n" $foundData "" data
        puts $destFile $data
        incr count
      }
      set temp [string range $rawHTML [expr $eCritLoc+$eCritLen] end]
      set rawHTML $temp
      set temp [string range $lowHTML [expr $eCritLoc+$eCritLen] end]
      set lowHTML $temp
    } 
  }
  close $destFile
  return $count
}