#!/usr/local/bin/perl5 -s
#
# This file is part of SnarfNews
# Copyright (C) 1991,1992,1993,1994,1995,1996 Alec Muffett
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#

$bkeep = 0;
$bdrop = 0;
@patterns = ();
$xpostmax = 0;
$sizemax = 0;
$blksz = 8192;

sub cat
{
    ($remain, $flag) = @_;

    while ($remain)
    {
	$done = read(STDIN, $buffer,
		     (($remain < $blksz) ? $remain : $blksz));
	$remain -= $done;
	print $buffer if ($flag);
    }
}

$cf = undef;
$config = shift || "default";
$config2 = "$ENV{SNARFCONF}/filter/$config";

if (-f $config2)
{
    $cf = $config2;
}
elsif (-f $config)
{
    $cf = $config;
}

if (defined($cf) && open(CONFIG, $cf))
{
    while (<CONFIG>)
    {
	next if (/^(\#|\s)/o);
	chomp;

	($key, $value) = split(/\s+/, $_, 2);

	warn "filterbatch: key='$key' value='$value'\n" if ($debug);

	if ($key eq "header")
	{
	    push(@patterns, $value);
	}
	elsif ($key eq "xpost")
	{
	    $xpostmax = $value;
	}
	elsif ($key eq "size")
	{
	    $value =~ tr/A-Z/a-z/;

	    if ($value =~ /^([0-9]+(\.[0-9]+)?)\s*([km])?/oi)
	    {
		$sizemax = $1;

		if ($3 eq "k")
		{
		    $sizemax *= 1024;
		}
		elsif ($3 eq "m")
		{
		    $sizemax *= 1048576;
		}
		$sizemax = int($sizemax);
	    }
	}
    }
    close(CONFIG);
}
else
{
    $cf = "(none)";
}

warn "filterbatch: cf=$cf xpost=$xpostmax size=$sizemax\npatterns=@patterns\n\n" if ($debug);

while (<STDIN>)
{
    $tag = $_;

    unless (/^\#!\s*rnews\s+(\d+)/)
    {
	die "filterbatch: cannot find rnews tag at STDIN line $.\n";
    }

    $size = $1;

    warn "filterbatch: new batch, $size bytes\n" if ($debug);

    if ($sizemax && ($size > $sizemax))
    {
	&cat($size, 0);
	warn "filterbatch: dropped [size $size]\n\n" if ($debug);
	$bdrop++;
	next;
    }

    @header = ();
    $hsize = 0;
    $dropit = 0;;

    while (<STDIN>)
    {
	warn "| $_" if ($debug); # debug info
	$hsize += length($_);

	if (/^\s+/o && ($#header >= 0))
	{
	    $header[$#header] .= $_;
	} else
	{
	    push(@header, $_);
	}

	last if /^\s*$/o;       # break off header on blank line
    }

    if ($#header < 0)
    {
	die "filterbatch: empty header\n" if ($debug);
    }

    if ($hsize > $size)
    {
	die "filterbatch: lost sync at article size $size\n";
    }

    foreach $pattern (@patterns)
    {
	if (@matches = grep(/$pattern/i, @header))
	{
	    warn "filterbatch: matches $pattern\n @matches" if ($debug);
	    $dropit = 1;
	    last;
	}
    }

    if ($dropit)
    {
	&cat(($size - $hsize), 0);
	warn "filterbatch: dropped [header]\n\n" if ($debug);
	$bdrop++;
	next;
    }

    $ngline = (grep(/^newsgroups/oi, @header))[0];
    $ngline =~ tr/,//cd;
    $xpostnum = length($ngline) + 1;

    if ($xpostmax && ($xpostnum > $xpostmax))
    {
	&cat(($size - $hsize), 0);
	warn "filterbatch: dropped [xpost $xpostnum]\n\n" if ($debug);
	$bdrop++;
	next;
    }

    print $tag, @header;
    &cat(($size - $hsize), 1);
    warn "filterbatch: ok\n\n" if ($debug);
    $bkeep++;
}

warn "filterbatch: kept=$bkeep dropped=$bdrop\n" if ($debug);

exit 0;
