#!/bin/bash
### BEGIN INIT INFO
# Provides:          bmsf.sh
# Required-Start:    
# Required-Stop:     
# Default-Start:     2 3 4 5
# Default-Stop:      0 1 6
# Short-Description: Anaylse and mark email
# Description:       Anaylse and mark email
### END INIT INFO
#	mail:mlouk@gmx.fr
#start_spam_list (WARNING: do not remove this line)
##Exemple:
#MARK:					Set MARK in Subject, like: `Subject: MARK ...`
#H:MARK:HEDEAR				Match regex in headers
#h:inv:head:MARK:exp			Match in head ex:"Subject"(Or "From"[ |=]) exp inv=1: invert selection
#B:count:MARK:BODY			Match count BODY in body	
#Config function, work with paterns. Not sure about regex, not realy tested, escap sequence: \\. not only \.
#B:3:SPAM:Les experts
#h:0:From:SYSTEM:vlad-tepes
function spam_list ()
{
cat << EOF
H:TV:robot@programme-television\\.org
h:0:To:kloknok:Mouaha\\.srelevre@ptet.pa
H:SPAM:bons*plans*
H:SPAM:reply@
h:0:From:TV:Télérama
EOF
}
#end_spam_list (WARNING: do not remove this line)
FETCHMAIL="fetchmail"
LOGGER="logger -t $0"
IOERR="logger -t $0 -s"
JOURNAL=$IOERR
function daemon_server ()
{
	local MBOXSPAM_SH="$1"
	local MBOXDIR="/var/mail"
	local WAITING=0
	local MAILER="mutt"
	local MAILSERVER="imap.gmx.com"
	local HOST="$(hostname)"
	local DOMAIN="$(dnsdomainname)"
	until pidof $FETCHMAIL &>/dev/null
	do
		WAITING=`bc <<< "$WAITING+1"`
		if [ "$WAITING" -eq 10 ]
		then
			$JOURNAL "$FETCMAIL, not started/start too slow."
			exit 1
		fi
		sleep 6
	done
	$JOURNAL "Bash Anti Spam Filter Start: Success."
	while pidof $FETCHMAIL &>/dev/null
	do
		socat TCP-LISTEN:imap,bind=localhost TCP:$MAILSERVER:imap &
		SOCAT=$!
		printf $SOCAT > /tmp/socat.pid
		wait $SOCAT
		if pidof "$MAILER" &>/dev/null
		then
			killall -9 $MAILER
			$JOURNAL "working, daemon_server(): $MAILER killed."
		fi
		if [ $? -eq 0 ]
		then
			for MBOX in $MBOXDIR/*
			do
				case $MBOX
				in
				*.lock|*.bak)
				# fichier verrou crée par flock
				# Not handled: No lock.
				# Il à fallu que je change impeut aussi :/
				;;
				*)
					if ([ -w $MBOX ] && [ -r $MBOX ])
					then
						cp $MBOX $MBOX.bak
						MAILTO=$(sed "s@$MBOXDIR\(\/\)\?@@" <<< $MBOX)
						flock $MBOX -c "$MBOXSPAM_SH SPAM $MBOX" &>> /var/log/bmfs-spam-scripts.warning
						#sed -n '/^Subject:/,/^[A-R,T-Z]/{/^[A-R,T-Z]/ !p}' $MBOX.bak > /tmp/diff-mail.orig
						#sed -n '/^Subject:/,/^[A-R,T-Z]/{/^[A-R,T-Z]/ !p}' $MBOX > /tmp/diff-mail.new
						sed -n '/^From /,/^$/{ /^Subject:/,/^[A-Z]/{/^\(Subject\|[^A-Z]\)/p}}' $MBOX.bak > /tmp/diff-mail.orig
						sed -n '/^From /,/^$/{ /^Subject:/,/^[A-Z]/{/^\(Subject\|[^A-Z]\)/p}}' $MBOX > /tmp/diff-mail.new
						#LA BASE: diff -y /tmp/diff-mail.new /tmp/diff-mail.orig  > /tmp/mail && printf "No change\n" > /tmp/mail
						#RECTIFICATION:
						diff -y /tmp/diff-mail.new /tmp/diff-mail.orig > /tmp/mail || \
						cat /tmp/mail | \
						mail root@$HOST.$DOMAIN -s "$MAILTO: Spam analyse Result." -b $MAILTO@$HOST.$DOMAIN -c $MAILTO@$HOST.$DOMAIN
						rm /tmp/diff-mail.{orig,new} /tmp/mail
					else $JOURNAL "Can't access to $MBOX"
					fi
				;;
				esac
			done
		else
			exit $?
		fi
		if [ -f /tmp/socat-stop_waiting.pid ]
		then
			rm -v /tmp/socat-stop_waiting.pid
			break;
		fi;
	done
}
function get_spams ()
{
	awk -v s="$3" \
	'BEGIN{ orig=s
		typ=s;		#type
		inv=s;		#invert
		c=s		#header,count
		msg=s		#subject alert message
		msgpos=0	#1:From, 0:Body
		mark=0
		count=0
		subject=0
		Sub=0
		add_line=0
		if ( sub("^h:","",s)>0 )
		{	sub(":.*$","",typ)
			inv=gensub("^h:([0,1]):.*","\\1","1",inv)
			c=gensub("^h:[0,1]:([^:]+):.*$","\\1","1",c)
			msg=gensub("^h:[0-1]:.*:([A-Z]+):.*","\\1","1",msg)
			sub("[0,1]:.*:[A-Z]+:","",s)
			#print typ"-"s"-"msg"-"c"-"inv
		}
		else
		{	if ( sub("^B:","",s)>0 )
			{	sub(":.*","",typ)
				sub("^[0-9]+:[A-Z]+:","",s)
				c=gensub("^B:([0-9]+):.*","\\1","1",c)
				msg=gensub("^B:[0-9]+:([A-Z]+):.*","\\1","1",msg)
				#print typ"-"s"-"msg-"-"c
			}
			else
			{	sub(":.*","",typ)
				sub("^H:[A-Z]+:","",s)
				msg=gensub("^H:([A-Z]+):.*","\\1","1",msg)
				#print typ"-"s"-"msg
			}
		}
		mark_set="^(Subject:)? "msg"$"
	}
	{	## Identify position: Header OR Body
		#print mark
		if ( msgpos == 0 && $0 ~ /^From / )
		{	msgpos=1
			subject=0
			if ( typ == "B"  )
			{	count=0
				mark=0
			}
		}
		if ( msgpos == 1 && $0 ~ /^$/ )
		{	msgpos=0
			mark=( typ == "h" || typ == "H" )?0:mark
		}
		## Scan Ok
		if ( Sub == 0 )
		{
			if ( mark == 1 || ( ( typ == "h" || typ == "H" ) && msgpos == 0 ) || ( typ == "B" && msgpos == 1 && subject != 0) )
			{	next	}
			if ( mark == 2 && subject != 0 )
			{	subject+=add_line
				add_line++
				print "sed -i \""subject" { s/^Subject: /Subject: "msg"\\n / }\" "ARGV[ARGIND]
				mark=3
				subject=0
				next
			}
		}
		## get spams
		if ( Sub == 1 && $0 ~ /^[A-Z]/ )
		{	Sub=0	}
		if ( Sub == 1 || ( msgpos == 1 && $0 ~ /^Subject: / ) )
		{
			if ( $0 ~ mark_set )
			{	mark=1
				subject=0
				next
			}
			else
			{	if ( Sub == 0 )
				{	subject=NR	}
			}
			Sub=1
		}
		if ( mark == 0 )
		{	if ( typ == "H" && msgpos == 1 )
			{	mark=( $0 ~ s )?2:mark	}
			else
			{	if ( typ == "h" && msgpos == 1 )
				{	mark=(( inv == 0 && $1 ~ c && $1 ~ /:$/ && $0 ~ s )||( inv == 1 && $1 ~ c && $1 ~ /:$/ && $0 !~ s ))?2:mark
				}
				else
				{	if ( msgpos == 0 )
					{	while( sub(s,"")>0 )
						{	count=1+count/1		}
						mark=(count/1 >= c/1)?2:mark
					}
				}
			}
		}
	}' $1 > $2
}
function spam_filter ()
{
	local MBOXFILE="/var/mail/$USER"
	local MBOXSPAM=""
	test -n "$1" && MBOXFILE=$1
	if [ ! -f  $MBOXFILE  ]
	then
		$JOURNAL "$MBOXFILE: File not found."
		exit 1
	fi
	MBOXSPAM=`sed 's@.*/\([^/]\+\)$@/tmp/\1.spam@' <<< $MBOXFILE`
	touch $MBOXSPAM
	if [ $? -ne 0 ]
	then
		$JOURNAL "$MBOXFILE: can't create file."
		exit 1
	fi
	spam_list |\
	while read -r SPAM;
	do
		get_spams $MBOXFILE $MBOXSPAM "$SPAM"
		source $MBOXSPAM
	done
	rm $MBOXSPAM
}
function wait_for_exit ()
{
	local SOCAT_PID=`cat /tmp/socat.pid`
	if [ ! -f /tmp/socat.pid ]
	then
		$JOURNAL "No instance of $0 running."
		exit 1
	fi
	if [ -f /tmp/socat-stop_waiting.pid ]
	then
		$JOURNAL "Another instance of $0 waiting for stop."
		exit 1
	fi
	mv /tmp/socat.pid /tmp/socat-stop_waiting.pid
	kill -9 $SOCAT_PID

}
case $1
in 
start)
	if [ -f /tmp/socat.pid  ]
	then
		$JOURNAL "/tmp/socat.pid: exist..."
		exit 1
	fi
	if [ -f /tmp/socat-stop_waiting.pid ]
	then
		printf "$0 not fully stopped.\n"
		exit 1
	fi
	$JOURNAL "Bash Anti Spam Filter starting."
	daemon_server $0 &
;;
stop)
	wait_for_exit
	$JOURNAL "Stopped."
;;
reload)
	wait_for_exit
	while [ -f /tmp/socat-stop_waiting.pid ]
	do
		sleep 1
	done
	daemon_server $0 &
	$JOURNAL "Reloaded."
;;
SPAM)
	spam_filter "$2"
;;
CONFIG)
	sed -n '/^#start_spam_list /,/^#end_spam_list /p' $0
	exit 0
;;
*)
cat << EOF
usage:
$0 SERVER|START: start server: socat is use for waiting incoming mails.
$0 SPAM [mboxfile]: anaylse mboxfile, default: /var/mail/\$USER (Repertory need to be writable for "sed").
$0 STOP: stop server, wait $FETCHMAIL for kill socat
$0 CONFIG: show spam_list function
EOF
;;
esac
