summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorJesse Luehrs <doy@tozt.net>2017-11-02 04:18:30 -0400
committerJesse Luehrs <doy@tozt.net>2017-11-02 04:19:12 -0400
commite43d87f5af82413117ad3ec42aec44a547d93e2a (patch)
treed18780e6f1f554d14e45c7c06aee195395675cd7 /bin
parent79d08724be82922ebe9e6653c5e8b3b0c891b658 (diff)
downloadconf-e43d87f5af82413117ad3ec42aec44a547d93e2a.tar.gz
conf-e43d87f5af82413117ad3ec42aec44a547d93e2a.zip
only sa-learn in fixed size chunks
it seems to use memory linear in the number of files processed at a time, for some reason?
Diffstat (limited to 'bin')
-rwxr-xr-xbin/learn_spam17
1 files changed, 11 insertions, 6 deletions
diff --git a/bin/learn_spam b/bin/learn_spam
index f3f8412..a3f4b3d 100755
--- a/bin/learn_spam
+++ b/bin/learn_spam
@@ -6,22 +6,27 @@ SALEARN=/usr/bin/vendor_perl/sa-learn
if [[ "$1" == "-q" ]]; then
OPTS="$OPTS"
- PRINT=""
OUTFILE="/dev/null"
else
OPTS="$OPTS --showdots"
- PRINT="\n From directory %p\n"
OUTFILE="/dev/stdout"
fi
if [[ ! "$1" == "-q" ]]; then
- echo -n "Learning ham..."
+ echo -n "$(date): Learning ham..."
fi
-nice find $MAILDIR -type d ! -iwholename '*spam*' ! -iwholename '*sent*' ! -iwholename '*trash*' ! -iwholename '*draft*' ! -iwholename '*queue*' -name cur -printf "$PRINT" -exec $SALEARN $OPTS --ham '{}' \; > $OUTFILE
+nice find $MAILDIR -type f | grep '/\(cur\|new\)/' | grep -v '/spam/' | xargs -P1 -n1000 $SALEARN $OPTS --ham > $OUTFILE
if [[ ! "$1" == "-q" ]]; then
- echo -n "Learning spam..."
+ echo -n "$(date): Learning spam..."
fi
-nice find $MAILDIR -type d -iwholename '*spam*' -name cur -printf "$PRINT" -exec $SALEARN $OPTS --spam '{}' \; > $OUTFILE
+nice find $MAILDIR -type f | grep '/\(cur\|new\)/' | grep '/spam/' | xargs -P1 -n1000 $SALEARN $OPTS --spam > $OUTFILE
+if [[ ! "$1" == "-q" ]]; then
+ echo -n "$(date): Syncing..."
+fi
nice $SALEARN --sync > $OUTFILE
+
+if [[ ! "$1" == "-q" ]]; then
+ echo -n "$(date): Done!"
+fi