#!/usr/bin/perl # ------------------------------------------------------------- # file: sa-stats.pl (SARE release) # created: 2005-01-31 # updated: 2007-01-30 # version: 1.03 # author: Dallas Engelken # desc: Generates Top Spam/Ham Rules fired for SA 3.1.x installations. # # IMPORTANT NOTES # # SA 3.0.x log files do not have user= in # the report: log entries, so this does not work with 3.0. # See http://www.rulesemporium.com/programs/sa-stats.txt for # a SA 3.0.x version ( no per-domain / per-user support ) # # If your top 5 does not contain URIBL_BLACK, see # http://www.uribl.com/usage.shtml # ------------------------------------------------------------- # Per User and Per Domain Statistics... # ------------------------------------------------------------- # # ./sa-stats -r postmaster # - this would give all stats for postmaster users, # regardless of which domain it was for. handy if you # have alot of domain aliases # # ./sa-stats -r @domain # - this would give all stats for the domain specified. # make sure you include the '@' sign before the # domain or the script will assume you wanted a user # name instead. # # ./sa-stats -r user@domain.com # - this would give all stats for a specific email address. # this assumes you pass 'spamc -u ' vs. # 'spamc -u '. If you do the latter, you simply # want to call -r instead. # # ------------------------------------------------------------- use Getopt::Long; use Pod::Usage; my ($LOG_DIR,$FILE,$TOPRULES,$PRINT_TO_WEB,$HELP,$RECIP); GetOptions ( 'logdir|l=s' => \$LOG_DIR, 'filename|f=s' => \$FILE, 'recip|r=s' => \$RECIP, 'num|n=i' => \$TOPRULES, 'web|w' => \$PRINT_TO_WEB, 'help|h' => \$HELP ); if ($HELP) { print "usage: $0 [-l ] [-f ] [-n ] [-w]\n"; print "\t--logdir|-l \tDirectory containing spamd logs\n"; print "\t--filename|-f \tFile names or regex to look for in the logdir\n"; print "\t--num|-n \tNumber of top rules to display\n"; print "\t--web|-w\tMake it web friendly output\n"; print "\t--help|-h\tPrints this help\n"; exit; } if (!defined $TOPRULES) { $TOPRULES=20 } if (!defined $LOG_DIR) { $LOG_DIR="/var/log/qmail/spamd" } if (!defined $FILE) { $FILE='^current$' } # regex # LEAVE THE REST ALONE UNLESS YOU KNOW WHAT YOU ARE DOING... ################################################################ my $NUM_EMAIL=0; my $NUM_SPAM=0; my $NUM_HAM=0; my $EMAIL_HITS=0; my $SPAM_HITS=0; my $HAM_HITS=0; my %SPAM_RULES=(); my %HAM_RULES=(); my $TOTAL_SPAM_RULES=0; my $TOTAL_HAM_RULES=0; my $ALSPAM=0; my $ALHAM=0; my $ALNO=0; my $HAM_SEC=0; my $SPAM_SEC=0; my $EMAIL_SEC=0; my $footer = ''; opendir (DIR,"$LOG_DIR"); my @logs = grep /$FILE/i, readdir DIR; closedir DIR; foreach my $log (@logs) { &calcstats($LOG_DIR."/".$log); } &summarize(); exit; ############################# sub calcstats { my $log=shift; if (!-e $log || -d $log) { print "$log not found..\n"; return; } open(F,"$log"); while() { my ($result,$score,$rules,$time,$size,$learn,$recip); my $spam=0; # for user=, it may be %domain or $GLOBAL or @GLOBAL or user@domain.. if (/.*result:\s+(\w|\.)\s+(\-?\d+)\s+\-\s+(.*)\s+scantime\=([\d\.]+)\,size\=(\d+).*user=([^\,]+).*autolearn=(\w+)/) { $result=$1; $score=$2; $rules=$3; $time=$4; $size=$5; $recip=$6; $learn=$7; } else { next; } my ($user,$domain); if ($recip =~ m/^[\%\@](.+)/) { $user = undef; $domain = '@'.$1; } if ($recip =~ m/(.+)\@(.+)/) { $user=$1; $domain='@'.$2; } else { $user=$recip; $domain='@localhost'; } my $email = $user.$domain; next if ($RECIP && $RECIP !~ m/\@/ && $RECIP ne $user); next if ($RECIP =~ m/^[\%\@](.+)/ && $RECIP ne $domain); next if ($RECIP =~ m/(.+)\@(.+)/ && $RECIP ne $email); if ($result eq "Y") { $SPAM_SEC+=$time; } else { $HAM_SEC+=$time; } $EMAIL_SEC+=$time; $spam=1 if ($result =~ m/Y/); if ($learn =~ /ham/) { $ALHAM++; } elsif ($learn =~ /spam/) { $ALSPAM++; } else { $ALNO++; } my @tmprules=split(/\,/,$rules); foreach my $r (@tmprules) { if ($spam) { $TOTAL_SPAM_RULES++; if (defined $SPAM_RULES{$r}) { $SPAM_RULES{$r}++; } else { $SPAM_RULES{$r}=1; } } else { $TOTAL_HAM_RULES++; if (defined $HAM_RULES{$r}) { $HAM_RULES{$r}++; } else { $HAM_RULES{$r}=1; } } } if ($spam) { $NUM_SPAM++; $SPAM_HITS += $score; } else { $NUM_HAM++; $HAM_HITS += $score; } $NUM_EMAIL++; $EMAIL_HITS += $score; } close(F); } sub summarize { my ($avgspamhits,$avghamhits,$avgemailhits); print "Content-type: text/html\n\n" if ($PRINT_TO_WEB); print "
" if ($PRINT_TO_WEB);

  if ($NUM_SPAM > 0) { 
     $avgspamhits= sprintf("%.2f",$SPAM_HITS/$NUM_SPAM); 
     $avgspamtime= sprintf("%.2f",$SPAM_SEC/$NUM_SPAM); 
  }
  else { 
     $avgspamhits=0; 
     $avgspamtime=0; 
  }
  
  if ($NUM_HAM > 0) {  
     $avghamhits= sprintf("%.2f",$HAM_HITS/$NUM_HAM);
     $avghamtime= sprintf("%.2f",$HAM_SEC/$NUM_HAM); 
  }
  else { 
     $avghamhits=0; 
     $avghamtime=0; 
  }

  if ($NUM_EMAIL > 0) {  
     $avgemailhits= sprintf("%.2f",$EMAIL_HITS/$NUM_EMAIL); 
     $avgemailtime= sprintf("%.2f",$EMAIL_SEC/$NUM_EMAIL); 
  }
  else { 
     $avgemailhits=0; 
     $avgemailtime=0; 
  }


  print "\n\n";

  if ($RECIP) {
    print "SPAM STATS FOR $RECIP\n";
    print "-" x 60 . "\n";
  }

  my $ALTOT=$ALSPAM+$ALHAM;
  printf("Email: %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_EMAIL,$ALTOT,$avgemailhits,$avgemailtime);
  printf("Spam:  %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_SPAM,$ALSPAM,$avgspamhits,$avgspamtime);
  printf("Ham:   %8s  Autolearn: %5s  AvgScore: %6.2f  AvgScanTime: %5.2f sec\n",$NUM_HAM,$ALHAM,$avghamhits,$avghamtime);

  &br;
  printf "Time Spent Running SA:      %7.2f hours\n",$EMAIL_SEC/60/60;
  printf "Time Spent Processing Spam: %7.2f hours\n",$SPAM_SEC/60/60;
  printf "Time Spent Processing Ham:  %7.2f hours\n",$HAM_SEC/60/60;

  &br;

  my $count=0;
  print "TOP SPAM RULES FIRED";
  print " FOR $RECIP" if ($RECIP);
  print "\n";

  &hr;
  printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
  &hr;
  foreach my $key (sort { $SPAM_RULES{$b} <=> $SPAM_RULES{$a} } keys %SPAM_RULES) {
    my $perc1=sprintf("%.2f", $NUM_EMAIL);
    my $perc2=sprintf("%.2f", $NUM_SPAM);
    my $perc3=sprintf("%.2f", $NUM_HAM);
    if ($NUM_EMAIL > 0) {
      #$perc1=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_EMAIL)*100);
      $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
    }
    if ($NUM_SPAM > 0) {
      $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
    }
    if ($NUM_HAM > 0) {
      $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
    }
    printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$SPAM_RULES{$key},$perc1,$perc2,$perc3);
    $count++;
    if ($count >= $TOPRULES && $TOPRULES > 0) {
       last;
    }
  }
  &hr;
  &br;

  $count=0;  # thanks mike.
  print "TOP HAM RULES FIRED";
  print " FOR $RECIP" if ($RECIP);
  print "\n";
  &hr;
  printf("%4s\t%-24s\t%5s %8s %7s %7s %7s\n","RANK","RULE NAME","COUNT","\%OFMAIL","\%OFSPAM","\%OFHAM");
  &hr;
  foreach my $key (sort { $HAM_RULES{$b} <=> $HAM_RULES{$a} } keys %HAM_RULES) {
    my $perc1=sprintf("%.2f", $NUM_EMAIL);
    my $perc2=sprintf("%.2f", $NUM_SPAM);
    my $perc3=sprintf("%.2f", $NUM_HAM);
    if ($NUM_EMAIL > 0) {
      # $perc1=sprintf("%.2f",($HAM_RULES{$key}/$NUM_EMAIL)*100);
      $perc1=sprintf("%.2f",(($SPAM_RULES{$key}+$HAM_RULES{$key})/$NUM_EMAIL)*100);
    }
    if ($NUM_SPAM > 0) {
      $perc2=sprintf("%.2f",($SPAM_RULES{$key}/$NUM_SPAM)*100);
    }
    if ($NUM_HAM > 0) {
      $perc3=sprintf("%.2f",($HAM_RULES{$key}/$NUM_HAM)*100);
    }
    printf("%4d\t%-24s\t%5s\t%6.2f\t%6.2f\t%6.2f\n",$count+1,$key,$HAM_RULES{$key},$perc1,$perc2,$perc3);
    $count++;
    if ($count >= $TOPRULES && $TOPRULES > 0) {
       last;
    }
  }
  &hr;
  &br;
  print "
\n" if ($PRINT_TO_WEB); print $footer if ($PRINT_TO_WEB && $footer ne ""); print "\n"; } ####################### sub hr { if ($PRINT_TO_WEB) { print "
"; } else { print "-" x 70 ."\n"; } } ####################### sub br { if ($PRINT_TO_WEB) { print "
"; } else { print "\n"; } }