#!/usr/bin/perl
#++
#   $Id: blacklist.pl,v 1.10 2005/08/13 12:04:13 az Exp $
# 
#   File:		blacklist.pl
#   Date:		Tue Aug  2 13:04:27 2005
#   Author:		Alexander Zangerl (az)
# 
#   Abstract:
#	based on an idea by pettingers (at) gmail.com
#	http://www.pettingers.org/code/sshblack.html
#
#   License: GPL v1 or v2
#
# 	args: -d chattyness, logfile to watch. currently looks 
#	only at ssh attempts
#	DEBUG: numerical chattyness. 5 shows blocks and unblocks, 
#	10 shows all crap.
#	negative chattyness goes to syslog
#	
#   Modifications: 
#	$Log: blacklist.pl,v $
#	Revision 1.10  2005/08/13 12:04:13  az
#	updated regexes once more, tested
#
#	Revision 1.9  2005/08/13 02:26:54  az
#	added illegal user crap
#
#	Revision 1.8  2005/08/06 05:57:09  az
#	updated license tags for snarfed code
#
#	Revision 1.7  2005/08/06 05:02:07  az
#	changed to non-closing file
#
#--
use strict;
use Getopt::Std;
use Sys::Syslog qw(:DEFAULT setlogsock);
use File::Basename;
use Proc::PID::File;
use Time::Local;

die "already running\n" if Proc::PID::File->running();

# config section
# two subs that do blocking and unblocking. in my case: no more ip.
my $blockf=
    sub { system(qw(/sbin/iptables -A blacklist --source),$_[0],qw(-j logdrop));};
my $unblockf=
    sub { system(qw(/sbin/iptables -D blacklist --source),$_[0],qw(-j logdrop));};

my $maxfails=6;			# somany hits in window -> kaboom
my $window=600;			# 10 min window; ignore hits older than this
my $checkdelta=120;		# look for old blocks to get rid of that often
my $blocktime=4*3600;		# 4 hours default block
my $delay=10;

# localhost, tunnels...
my $whitelist="^(127\.|192\.168\.";

my $slogid=basename($0);
my $slogfac="user";
# internal variables from here onwards

my %months_map = (
    'Jan' => 0, 'Feb' => 1, 'Mar' => 2,
    'Apr' => 3, 'May' => 4, 'Jun' => 5,
    'Jul' => 6, 'Aug' => 7, 'Sep' => 8,
    'Oct' => 9, 'Nov' =>10, 'Dec' =>11,
    'jan' => 0, 'feb' => 1, 'mar' => 2,
    'apr' => 3, 'may' => 4, 'jun' => 5,
    'jul' => 6, 'aug' => 7, 'sep' => 8,
    'oct' => 9, 'nov' =>10, 'dec' =>11,
);

my @str2time_last_minute;
my $str2time_last_minute_timestamp;

# args: logfile, maybe -d.
my ($logfile,%opts);
my $usage="usage: $0 [-d chattyness] logfile\n";
die $usage if (!getopts("d:",\%opts) ||
	       !($logfile=$ARGV[0]) || !-f $logfile);

my $chattyness=$ENV{DEBUG};
$chattyness=$opts{d} if (exists($opts{d}));

openlog($slogid,"pid",$slogfac);
setlogsock('unix');

my %badass;			# key: ip address, value: array of timestamps
my %blocked;			# key: ip address, value: timestamp of block
my $lastcleaned=time();

my ($fh,$inode,$offset);
while (1)
{
    my $now=time();
    &debug(10,"waking\n");
    # do cleanup if required
    if ($now>$lastcleaned+$checkdelta)
    {
	&debug(9,"cleaning out old blocks\n");
	# remove expired blocks
	for my $i (keys %blocked)
	{
	    next if ($now<=$blocked{$i}+$blocktime);
	    &debug(5,"unblocking $i, blocked since ".scalar localtime($blocked{$i})."\n");
	    &$unblockf($i);
	    delete($blocked{$i});
	}
	$lastcleaned=$now;
	&debug(9,"cleaning out old blocks done\n");
    }

    my @newlines;
    # read new lines
    ($fh,$inode,$offset,@newlines)=&logtail($fh,$logfile,$inode,$offset);
    &debug(10,"logtail returns fh $fh inode $inode, offset $offset, "
	   .scalar(@newlines)." lines\n");

    my $lr;
    for (@newlines)
    {
	my $lr=parseline($_);
	&debug(10,"parser returns \"".$lr->{text}."\" from ".$lr->{program}."\n");
	# search text for pattern
	next if ($lr->{program} ne "sshd");
	
	# Failed none for illegal user luser from 192.168.105.2 port 57917 ssh2
# Failed keyboard-interactive/pam for illegal user luser from 192.168.105.2 port 57917 ssh2
#Failed password for az from 131.244.8.139 port 52195 ssh2
#Illegal user corey from 66.15.145.131
	if ($lr->{text}=~
	    /^(?:Failed \S+ for(?: illegal user)?|Illegal user) \S+ from (\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) /o)
	{
	    my $ip=$1;
	    &debug(10,"got hit from $ip\n");
	    if ($ip=~/$whitelist/o)
	    {
		&debug(10,"$ip is whitelisted\n");
		next;
	    }
	    
	    # look at status of badass: enough hits recently?
	    push @{$badass{$ip}},$lr->{timestamp};
	    &debug(8,"$ip known hits: ".join(" ",@{$badass{$ip}})."\n");
	    # weed out the stale hits outside the window
	    my @recent=grep($_>$now-$window, @{$badass{$ip}});
	    &debug(8,"$ip recent hits: ".join(" ",@recent)."\n");
	    $badass{$ip}=\@recent;
	      
	    # block if badass
	    if (@recent>=$maxfails)
	    {
		if ($blocked{$ip})
		{
		    &debug(6,"duplicate block $ip: existing since ".
			   scalar localtime($blocked{$ip})."\n");
		}
		else
		{
		    &debug(5,"blocking $ip\n");
		    &$blockf($ip);
		    $blocked{$ip}=$now;
		}
	    }
	}
    }
    sleep($delay);
}
return 0;

sub debug
{
    my ($importance,@msg)=@_;

    if ($chattyness<0)
    {
	syslog("debug",join(" ",@msg)) if (-$chattyness>=$importance);
    }
    else
    {
	print STDERR (scalar localtime().": ",@msg) if ($chattyness>=$importance);
    }
}


# code extracted/extended from logtail
# Author: Paul Slootman <paul@debian.org> 2001/03/14
# Licence: GPL
# args: (open) fhandle, logfile, inode, offset
# returns: inode, offset, array of loglines
# dies on errors
sub logtail
{
    my ($fh, $logfile,$inode,$offset)=@_;
    my @result;
    die "File $logfile cannot be read.\n"
	if (! -f $logfile);
    if (!$fh)
    {
	die "File $logfile cannot be read.\n" 
	    if (!open($fh, $logfile));
    }

    my ($ino, $size);
    die "Cannot get $logfile file size.\n" 
	unless ((undef,$ino,undef,undef,undef,undef,undef,$size) 
		= stat $logfile);

    return ($fh,$ino,$size,@result) if (!$inode); # starting up -> at end please

    if ($inode == $ino) 
    {
	return ($fh,$inode,$offset,@result) if $offset == $size; # short cut
	$offset=0	if ($offset > $size); #  rotated in place
    }
    else
    {
	close $fh;
	die "File $logfile cannot be read.\n" 
	    if (!open($fh, $logfile));
    }
	
    $offset = 0 if ($inode!=$ino || $offset > $size);    

    
    seek($fh, $offset, 0);
    @result=<$fh>;
    $size = tell $fh;

    return ($fh, $ino,$size,@result);
}


# str2time and parseline snarfed/modified 
# from parse::syslog (which sucks otherwise as it only works on
# files or tails (and file::tail sucks even worse).
# LICENSE: This module is free software; you can redistribute it and/or 
#	modify it under the same terms as Perl itself.
# AUTHOR: David Schweikert <dws@ee.ethz.ch>
# Copyright: (c) 2001, Swiss Federal Institute of Technology, Zurich. 

# 0: sec, 1: min, 2: h, 3: day, 4: month, 5: year
sub str2time($$$$$$$)
{
    my $GMT = pop @_;

    if(defined $str2time_last_minute[4] and
        $str2time_last_minute[0] == $_[1] and
        $str2time_last_minute[1] == $_[2] and
        $str2time_last_minute[2] == $_[3] and
        $str2time_last_minute[3] == $_[4] and
        $str2time_last_minute[4] == $_[5])
    {
        return $str2time_last_minute_timestamp + $_[0];
    }

    my $time;
    if($GMT) {
        $time = timegm(@_);
    }
    else {
        $time = timelocal(@_);
    }

    @str2time_last_minute = @_[1..5];
    $str2time_last_minute_timestamp = $time-$_[0];

    return $time;
}

sub parseline
{
    my ($str) = @_;

    # date, time and host 
    $str =~ /^
	(\S{3})\s+(\d+)   # date  -- 1, 2
	\s
	(\d+):(\d+):(\d+) # time  -- 3, 4, 5
	\s
	([-\w\.]+)        # host  -- 6
	\s+
	(.*)              # text  -- 7
	$/x or return undef;
    
    my $mon = $months_map{$1};
    defined $mon or return undef;
    
    # convert to unix time
    my $time = str2time($5,$4,$3,$2,$mon,(localtime)[5],0);
    my ($host, $text) = ($6, $7);

    # some systems send over the network their
    # hostname prefixed to the text. strip that.
    $text =~ s/^$host\s+//;

    # discard ':' in HP-UX 'su' entries like this:
    # Apr 24 19:09:40 remedy : su : + tty?? root-oracle
    $text =~ s/^:\s+//;
    
    $text =~ /^
	([^:]+?)        # program   -- 1
	(?:\[(\d+)\])?  # PID       -- 2
	:\s+
	(?:\[ID\ (\d+)\ ([a-z0-9]+)\.([a-z]+)\]\ )?   # Solaris 8 "message id" -- 3, 4, 5
	(.*)            # text      -- 6
	$/x or return undef;
    
    return {
	timestamp => $time,
	host      => $host,
	program   => $1,
	pid       => $2,
	msgid     => $3,
	facility  => $4,
	level     => $5,
	text      => $6,
    };
}

