#!/bin/sh
# Copyright (c) 2006, Nicholas Holder http://www.redlohft.com
# Modified by William Olson http://www.goodcleanemail.com 
# All rights reserved.
#
# spamass-learn v .1
# Feb 18, 2007 - Initial Release
#
# spamass-learn v .2
# Added variable to delete Spams after X days
#
# Redistribution and use in source and binary forms, with or without modification, 
# are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this 
#   list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice, 
#   this list of conditions and the following disclaimer in the documentation 
#   and/or other materials provided with the distribution.
# * Neither the name of Redlohft nor the names of its contributors may be used 
#   to endorse or promote products derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
# IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 
# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Path to sa-learn
SAPROG="/usr/local/bin/sa-learn"
# Command flags. Sould not need to be changed if you
# used the freebsdrocks.net guide.
SAFLAGS="-u qscand -C /usr/local/etc/mail/spamassassin"

# Set to your vpopmail domains folder
DOMAIN_BASE_PATH="/home/vpopmail/domains/"
# A list of your local domains which you would like to
# use this script to learn against
# Use spaces to seperate domains
DOMAINS="domain.com"

# If you have created a catch all account for mail 
# directed to a non-existent account, set it here.
#CATCHALL_USER="junkmail"
# If you assume that all mail directed to the catchall
# is spam, you can train all mail in its account as spam.
# You can also just report.
# 0 - Off  1 - Report only  2 - Train
CHECK_CATCHALL=0
# Set to delete the mail in the catch all account after
# training. You must train to delete!
# 0 - Leave  1 - Delete
#DELETE_CATCHALL=0

# If you have set qmail-scanner to quarantine spam,
# set the path to the folder in which it places the
# caught spam. You can set qmail-scanner to place all
# spam in a folder seperate from the virus quarantine.
# See qmail-scanner-queue.pl for more info.
#QUAR_PATH="/var/spool/qmailscan/spam/new"
# If you assume that qmail-scanner has accurately
# determined that all mail in the quarantine is in
# fact spam, use this to train it all as spam.
# 0 - Off  1 - Report only  2 - Train
CHECK_QUAR=0
# Set to delete mail in the quarantine after training.
# You must train to delete!
# 0 - Leave  1 - Delete
DELETE_QUAR=0

# Used to check users spam/ham and optionally learn or report.
# 0 - Off  1 - Report only  2 - Train only spam
# 3 - Train only ham  4 - Train ham and spam
CHECK_USERS=2
# A list of user folders to check for spam relative to their Maildir/
USER_SPAM_DIRS=".Spam/new .Spam/cur"
# A list of user folders to check for ham relative to their Maildir/
# Keep in mind that this assumes a user deletes spam after recognition
# and doesn't keep the message in their mailbox.
USER_HAM_DIRS="cur"
# Set to delete spam messages in users' spam folders
# You must train to delete!
# 0 - Leave  1 - Delete
DELETE_USER_SPAM=1
# Set this to the amount of days you would like to keep spams. 
# Default is 30 days
DELETE_SPAM=30

# If you want some serious output...
REPORT_VERBOSE=1
# Turn on for a bunch of debug messages...
DEBUG=0

##################################################
# You should not need to modify anything
# below this line. Change at your own risk!
##################################################

if [ ${DEBUG} -eq 1 ]; then
	REPORT_VERBOSE=1
	echo "DEBUG: Options:"
	echo "DEBUG: SAPROG=${SAPROG}"
	echo "DEBUG: SAFLAGS=${SAFLAGS}"
	echo "DEBUG: DOMAIN_BASE_PATH=${DOMAIN_BASE_PATH}"
	echo "DEBUG: DOMAINS=${DOMAINS}"
	echo "DEBUG: CATCHALL_USER=${CATCHALL_USER}"
	echo "DEBUG: CHECK_CATCHALL=${CHECK_CATCHALL}"
	echo "DEBUG: DELETE_CATCHALL=${DELETE_CATCHALL}"
	echo "DEBUG: QUAR_PATH=${QUAR_PATH}"
	echo "DEBUG: CHECK_QUAR=${CHECK_QUAR}"
	echo "DEBUG: DELETE_QUAR=${DELETE_QUAR}"
	echo "DEBUG: CHECK_USERS=${CHECK_USERS}"
	echo "DEBUG: USER_SPAM_DIRS=${USER_SPAM_DIRS}"
	echo "DEBUG: USER_HAM_DIRS=${USER_HAM_DIRS}"
	echo "DEBUG: DELETE_USER_SPAM=${DELETE_USER_SPAM}"
fi

if [ ${CHECK_QUAR} -gt 0 ]; then
	echo ""
	echo "----------------------------------------"
	echo "| SPAM QUARANTINE"
	echo "----------------------------------------"
	
	if [ -d ${QUAR_PATH} ]; then
		cd ${QUAR_PATH}
		
		if [ ${DEBUG} -eq 1 ]; then
			echo "DEBUG: Should be in ${QUAR_PATH}"
			echo "DEBUG: Current directory: `pwd`"
		fi
		
		# Get a count of messages in the quarantine
		FCOUNT=`ls | wc -l`
		
		echo "| Total messages: ${FCOUNT}"
		echo "| Quarantine size: `du -sh`"
		if [ ${FCOUNT} -eq 0 ]; then
			echo "| No spam in quarantine. Huzzah!"
		fi
		if [ ${REPORT_VERBOSE} -eq 1 -a ${FCOUNT} -gt 0 ]; then
			for FILE in `ls`
			do
				echo "|"
				echo "| **********"
				echo "| ${FILE}"
				echo "| `egrep "^From:" ${FILE}`"
				echo "| `egrep "^To:" ${FILE}`"
				echo "| `egrep "^Subject:" ${FILE}`"
				echo "| `egrep "^X-Spam-Status" ${FILE}`"
				echo "| **********"
			done
		fi
		if [ ${CHECK_QUAR} -eq 2 -a ${FCOUNT} -gt 0 ]; then
			echo "|"
			if ${SAPROG} ${SAFLAGS} --spam * 2>&1; then
				if [ ${DELETE_QUAR} -eq 1 ]; then
					echo "| Deleting messages..."
					find . -name '*' -delete
				else
					echo "| Leaving spam in quarantine."
					echo "| This won't hurt but bayes will not learn them again."
				fi
			fi
		fi
	else
		echo "| Invalid QUAR_PATH specified in ${0}"
	fi
	echo "----------------------------------------"
	echo ""
fi

if [ ${CHECK_CATCHALL} -gt 0 -o ${CHECK_USERS} -gt 0 ]; then
	for DOMAIN in ${DOMAINS}
	do
		echo "CHECKING DOMAIN ${DOMAIN}..."
		if [ ${CHECK_CATCHALL} -gt 0 ]; then
			echo ""
			echo "----------------------------------------"
			echo "| CATCHALL: ${CATCHALL_USER}@${DOMAIN}"
			echo "----------------------------------------"
			
			MAILPATH="${DOMAIN_BASE_PATH}${DOMAIN}/${CATCHALL_USER}/Maildir/new"
			if [ ${DEBUG} -eq 1 ]; then
				echo "DEBUG: MAILPATH=${MAILPATH}"
			fi
			
			if [ -d ${MAILPATH} ]; then
				cd ${MAILPATH}
				
				if [ ${DEBUG} -eq 1 ]; then
					echo "DEBUG: Should be in ${MAILPATH}"
					echo "Current directory: `pwd`"
				fi
				
				# Get a count of messages in the catchall
				FCOUNT=`ls | wc -l`
		
				echo "| Total messages: ${FCOUNT}"
				echo "| Catchall size: `du -sh`"
				if [ ${FCOUNT} -eq 0 ]; then
					echo "| No mail for ${CATCHALL_USER}@${DOMAIN}. Huzzah!"
				fi
				if [ ${REPORT_VERBOSE} -eq 1 -a ${FCOUNT} -gt 0 ]; then
					echo "| Here comes the message list..."
					for FILE in `ls`
					do
						echo "|"
						echo "| **********"
						echo "| ${FILE}"
						echo "| `egrep "^From:" ${FILE}`"
						echo "| `egrep "^To:" ${FILE}`"
						echo "| `egrep "^Subject:" ${FILE}`"
						echo "| `egrep "^X-Spam-Status" ${FILE}`"
						echo "| **********"
					done
				fi
				if [ ${CHECK_CATCHALL} -eq 2 -a ${FCOUNT} -gt 0 ]; then
					echo "|"
					if ${SAPROG} ${SAFLAGS} --spam * 2>&1; then
						if [ ${DELETE_CATCHALL} -eq 1 ]; then
							echo "| Deleting messages..."
							find . -name '*' -delete
						else
							echo "| Leaving spam in catchall."
							echo "| This won't hurt but bayes will not learn them again."
						fi
					fi
				fi
			else
				echo "| ${MAILPATH} does not exist for ${CATCHALL_USER}@${DOMAIN}"
				echo "| Are you sure this user exists?"
			fi
			echo "----------------------------------------"
			echo ""
		fi
		
		if [ ${CHECK_USERS} -gt 0 ]; then
			echo ""
			echo "----------------------------------------"
			echo "| USERS: ${DOMAIN}"
			echo "----------------------------------------"
			
			if [ -d ${DOMAIN_BASE_PATH}${DOMAIN} ]; then
				cd ${DOMAIN_BASE_PATH}${DOMAIN}
				
				if [ ${DEBUG} -eq 1 ]; then
					echo "DEBUG: Should be in ${DOMAIN_BASE_PATH}${DOMAIN}"
					echo "DEBUG: Current directory: `pwd`"
				fi
				
				# Get a count of mailboxes in the domain
				USERCOUNT=`ls -l | egrep "^d" | wc -l`
				USERS=`ls -d * -DN`
				
				if [ ${DEBUG} -eq 1 ]; then
					echo "DEBUG: User list..."
					for USER in ${USERS}
					do
						echo "DEBUG: USER=${USER}"
					done
					echo "DEBUG: Safe to ignore vpasswd 'users'"
				fi
				
				echo "| Total mailboxes: ${USERCOUNT}"
				if [ ${USERCOUNT} -eq 0 ]; then
					echo "| No mailboxes found in ${DOMAIN}."
				fi
				
				TOTAL_SPAM_COUNT=0
				TOTAL_HAM_COUNT=0
				
				for USER in ${USERS}
				do
					if [ ! -e ${DOMAIN_BASE_PATH}${DOMAIN}/${USER}/mailinglist -a ${USER} != "vpasswd" -a ${USER} != "vpasswd.cdb" ]; then
						if [ ${REPORT_VERBOSE} -eq 1 ]; then
							echo "|"
							echo "----------------------------------------"
							echo "| USER: ${USER}@${DOMAIN}"
							echo "----------------------------------------"
							echo "| Spam message counts:"
						fi
						
						for SPAMDIR in ${USER_SPAM_DIRS}
						do
							SPAMPATH="${DOMAIN_BASE_PATH}${DOMAIN}/${USER}/Maildir/${SPAMDIR}"
							if [ ${DEBUG} -eq 1 ]; then
								echo "DEBUG: SPAMPATH=${SPAMPATH}"
							fi
							
							if [ -d ${SPAMPATH} ]; then
								cd ${SPAMPATH}
								
								if [ ${DEBUG} -eq 1 ]; then
									echo "DEBUG: Should be in ${SPAMPATH}"
									echo "DEBUG: Current directory: `pwd`"
								fi
								
								FCOUNT=`ls | wc -l`
								TOTAL_SPAM_COUNT=$((TOTAL_SPAM_COUNT + ${FCOUNT}))
								
								if [ ${REPORT_VERBOSE} -eq 1 ]; then
									echo "| ${SPAMDIR}: ${FCOUNT}"
								fi
								
								if [ ${CHECK_USERS} -eq 2 -o ${CHECK_USERS} -eq 4 ]; then
									if [ ${FCOUNT} -gt 0 ]; then
										if ${SAPROG} ${SAFLAGS} --spam * 2>&1; then
											if [ ${DELETE_USER_SPAM} -eq 1 ]; then
												if [ ${REPORT_VERBOSE} -eq 1 ]; then
													echo "|"
													echo "| Deleting messages in ${SPAMDIR}..."
												fi
												find . -mtime +${DELETE_SPAM} -name '*' -delete
											else
												if [ ${REPORT_VERBOSE} -eq 1 ]; then
													echo "|"
													echo "| Leaving spam in ${SPAMDIR}."
													echo "| This won't hurt but bayes will not learn them again."
												fi
											fi
										fi
									fi
								fi
							else
								echo "| ${SPAMDIR} does not exist for ${USER}@${DOMAIN}"
							fi
						done
						
						if [ ${REPORT_VERBOSE} -eq 1 ]; then
							echo "|"
							echo "| Ham message counts:"
						fi
						
						for HAMDIR in ${USER_HAM_DIRS}
						do
							HAMPATH="${DOMAIN_BASE_PATH}${DOMAIN}/${USER}/Maildir/${HAMDIR}"
							if [ ${DEBUG} -eq 1 ]; then
								echo "DEBUG: HAMPATH=${HAMPATH}"
							fi
							
							if [ -d ${HAMPATH} ]; then
								cd ${HAMPATH}
								
								if [ ${DEBUG} -eq 1 ]; then
									echo "DEBUG: Should be in ${HAMPATH}"
									echo "Current directory: `pwd`"
								fi
								
								FCOUNT=`ls | wc -l`
								TOTAL_HAM_COUNT=$((TOTAL_HAM_COUNT + ${FCOUNT}))
								
								if [ ${REPORT_VERBOSE} -eq 1 ]; then
									echo "| ${HAMDIR}: ${FCOUNT}"
								fi
								
								if [ ${CHECK_USERS} -eq 3 -o ${CHECK_USERS} -eq 4 ]; then
									if [ ${FCOUNT} -gt 0 ]; then
										${SAPROG} ${SAFLAGS} --ham * 2>&1
									fi
								fi
							else
								echo "| ${HAMDIR} does not exist for ${USER}@${DOMAIN}"
							fi
						done
						if [ ${REPORT_VERBOSE} -eq 1 ]; then
							echo "----------------------------------------"
						fi
					fi
				done
				
				echo ""
				echo "----------------------------------------"
				echo "| DOMAIN: ${DOMAIN}"
				echo "| Total user spam messages: ${TOTAL_SPAM_COUNT}"
				echo "| Total user ham messages: ${TOTAL_HAM_COUNT}"
				echo "----------------------------------------"
				echo ""
			else
				echo "Invalid domain path for ${DOMAIN} specified."
			fi
		fi
	done
fi

echo ""
echo "Done"

