The chaff is usually in the HTML as white text, at a small font size. So the user never sees it, but the filter’s supposed to see it.
The partial solution is to save the messages as regular email, and .EML file, with the HTML parts intact. Spamassassin seems to have code that will treat obfuscated HTML correctly. That way, the white text is removed from the training.
This code is very raw. Plenty of things to fix, like error handling, but it is working right now. The code is set up not to save out text versions of the email.
To use it, go to a folder, select the spam, and run the MarkAsSpam macro.
This is intended to be used by the sysadmin. I have learned that end-user spam filtering is hit and miss. Some people use spam filters to block legit email rather than unsubscribe from the messages.
Sub MarkAsHam()
CopyMessagesToFile ("\mailfilterspamassassin-ham")
End Sub
Sub MarkAsSpam()
CopyMessagesToFile ("\mailfilterspamassassin-spam")
End Sub
' Move the selected message(s) to the given folder **************************
Function CopyMessagesToFile(folderName As String)
Dim myOLApp As Application
Dim myNameSpace As NameSpace
Dim myInbox As MAPIFolder
Dim currentMessage As MailItem
Dim errorReport As String
Set myOLApp = CreateObject("Outlook.Application")
Set myNameSpace = myOLApp.GetNamespace("MAPI")
Set myInbox = myNameSpace.GetDefaultFolder(olFolderInbox)
' Figure out if the active window is a list of messages or one message
' in its own window
On Error GoTo QuitIfError ' But if there's a problem, skip it
Select Case myOLApp.ActiveWindow.Class
' The active window is a list of messages (folder); this means there
' might be several selected messages
Case olExplorer
Debug.Print "list of messages"
For Each currentMessage In myOLApp.ActiveExplorer.Selection
Call writeAsFile(folderName, currentMessage)
Next
' The active window is a message window, meaning there will only
' be one selected message (the one in this window)
Case olInspector
Call writeAsFile(folderName, myOLApp.ActiveInspector.CurrentItem)
' can't handle any other kind of window; anything else will be ignored
End Select
QuitIfError: ' Come here if there was some kind of problem
Set myOLApp = Nothing
Set myNameSpace = Nothing
Set myInbox = Nothing
Set currentMessage = Nothing
End Function
Sub writeAsFile(folderName As String, item As MailItem)
On Error GoTo Bail
Dim x As MailItem
Dim fn As String
Set x = item
'Let fn = folderName & Right(x.EntryID, 64) & ".txt"
'Debug.Print "file will be " & fn
'Open fn For Output As #1
' Print #1, "From : " & x.SenderEmailAddress
' Print #1, "To: " & x.To
' Print #1, "Subject: " & x.Subject
' Print #1, vbCrLf & vbCrLf
' Print #1, x.body
Let fn = folderName & Right(x.EntryID, 64) & ".eml"
Debug.Print "file will be " & fn
Open fn For Output As #2
Print #2, "From : " & x.SenderEmailAddress
Print #2, "To: " & x.To
Print #2, "Subject: " & x.Subject
Print #2, "MIME-Version: 1.0"
Print #2, "Content-Type: multipart/alternative;"
Print #2, " boundary = ""----=_NextPart_000_000D_01CCF6AD.D1159750"""
Print #2, "Content-Language: en-us"
Print #2, ""
Print #2, "This is a multipart message in MIME format."
Print #2, ""
Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750"
Print #2, "Content-Type: text/plain;"
Print #2, " Charset = ""us-ascii"""
Print #2, "Content-Transfer-Encoding: 7bit"
Print #2, ""
Print #2, item.body
Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750"
Print #2, "Content-Type: text/html;"
Print #2, " Charset = ""UTF-8"""
Print #2, "Content-Transfer-Encoding: 7-bit"
Print #2, "Content-Disposition: inline"
Print #2, ""
Print #2, item.HTMLBody
Print #2, "------=_NextPart_000_000D_01CCF6AD.D1159750--"
On Error GoTo 0
Bail:
Close #1
Close #2
Set item = Nothing
End Sub