Friday, November 25, 2022

Part 2: Detect a Spam Email using Natural Language Processing

 This is a continuation of my previous post(Detect a Spam Email using Natural Language Processing). In this post I tried to enhance the second program(sample deployment of my spam detection model) by adding a Pyqt6 Window. The enhancements I made aside from adding a user interface are the following:

  1. On startup, it will try to retrieve any unread emails from my gmail account
  2. I added a button which will run in background to retrieve email from my gmail every 5 seconds
  3. Each time it receives a new email, it will check first the contents of the email if it is a spam or not and send notification.
  4. A "Stop Background Job" button is added to stop the thread and stop retrieving the email.

Please note that items #2 and #3 are still  work in progress. #2 should hide the Window and an icon in system tray should appear while #3 should be able to open the window when clicked.

I am still figuring out how to accomplish these tasks but I published it anyway because the basic functionalities already works. 

By the way, the "Compose" button is just a mere decoration, I plan to add this new function in its future version.

During my testing, I tried to copy the subject and contents on my original dataset. This data is classified as spam. My model correctly identified it as  spam but gmail did not. As far as I know  gmail implemented the most accurate spam detection model today but I am curious why it did not detected my spam email.

The design of my button is taken from my previous post(Button with PyQt6) . The background process is basically a multithreading job design to make the application responsive. You may check my previous post on this topic(PyQt6 Progress Bar Enhancement).


Here is the code:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import sys
from PyQt6.QtWidgets import QApplication,  QWidget,  QPushButton, QTableWidget, QStyledItemDelegate, QTableWidgetItem
#from PyQt6.QtGui import  QPainter, QColor, QPen
from PyQt6.QtCore import  QEvent, QObject, QTimer, QThread
import email
import imaplib
import time
import multiprocessing as mp
import winrt.windows.ui.notifications as notifications
import winrt.windows.data.xml.dom as dom
import pandas as pd
import string
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
import joblib
import warnings
warnings.filterwarnings('ignore')
notif1 = 0
x = 0
xx = ''
breaker = 0
class Delegate(QStyledItemDelegate):
    def createEditor(self, parent, option, index):
        if index.data() == "100":
            return super(Delegate, self).createEditor(parent, option, index)

class Window(QWidget):

    def __init__(self):
        super(Window, self).__init__()

        self.initUI()

    def initUI(self):

        pb3 = QPushButton('Compose', self)
        pb3.setGeometry(25, 5, 100, 35)
        pb3.setStyleSheet('QPushButton {background-color: #2F569B; color: #d4d4d4;}')
        pb3.clicked.connect(self.onClick_pb3)
        self.setGeometry(25, 45, 600, 480)
        self.setWindowTitle('Zumail')

        self.pb4 = QPushButton('Run in Background', self)
        self.pb4.setGeometry(130, 5, 150, 35)
        self.pb4.setStyleSheet('QPushButton {background-color: #2F569B; color: #d4d4d4;}')
        self.pb4.clicked.connect(self.onClick_pb4)

        self.pb5 = QPushButton('Stop Background Job', self)
        self.pb5.setGeometry(285, 5, 150, 35)
        self.pb5.setStyleSheet('QPushButton {background-color: #2F569B; color: #d4d4d4;}')
        self.pb5.clicked.connect(self.onClick_pb5)
        self.pb5.setEnabled(False)
        
        self.createTable()
        self.show()
        
 
    def onClick_pb3(self):
 
       pass
    def onClick_pb5(self):
       global breaker
       breaker = 1
       self.pb4.setEnabled(True)
       self.pb5.setEnabled(False)
       print("thread was stopped")
    def process_text(self, text):
        no_punc = [char for char in text if char not in string.punctuation]
        no_punc = ''.join(no_punc)    
        return ' '.join([word for word in no_punc.split() if word.lower() not in stopwords.words('english')])

    def stemming (self, text):
        return ''.join([stemmer.stem(word) for word in text])

    def onClick_pb4(self):
       global notif1, breaker
       if breaker == 1:
           breaker = 0
           pass
       timer = QTimer(self)
       timer.timeout.connect(self.showtext)
       timer.start(5000)
       self.thread() 
       self.worker = WorkerThread()
       self.worker.start()
       self.pb4.setEnabled(False)
       self.pb5.setEnabled(True)
       notif1 = 1
       
    def showtext(self):
        global breaker
        if breaker == 1:
            pass
        else:
            self.recmail()
            
    def createTable(self):
        self.tableWidget = QTableWidget(self)
        self.tableWidget.viewport().installEventFilter(self)
  
        self.tableWidget.setRowCount(24)
        self.tableWidget.setColumnCount(3)
        self.tableWidget.setFixedSize(550, 415)
        self.tableWidget.move(25, 50)
        delegate = Delegate(self.tableWidget)
        self.tableWidget.setItemDelegate(delegate)
              
        self.tableWidget.setHorizontalHeaderLabels(['Sender', 'Date', 'Subject'])
        stylesheet = "::section{Background-color:rgb(179, 179, 179);color: white; bor  der-radius:14px;}"
        self.tableWidget.horizontalHeader().setStyleSheet(stylesheet)
        self.tableWidget.horizontalHeader().setStretchLastSection(True)
        self.tableWidget.verticalHeader().setStretchLastSection(True)
        self.recmail()

    def recmail(self):
        global x, notif1
        EMAIL = 'm******@gmail.com'
        PASSWORD = '*******'
        SERVER = 'imap.gmail.com'

        mail = imaplib.IMAP4_SSL(SERVER)
        mail.login(EMAIL, PASSWORD)
        mail.select('INBOX')
        status, data = mail.search(None, '(UNSEEN)')        
        mail_ids = []
        
        for block in data:
            mail_ids += block.split()

        for i in mail_ids:
            status, data = mail.fetch(i, '(RFC822)')
            for response_part in data:
        
                if isinstance(response_part, tuple):
                    message = email.message_from_bytes(response_part[1])
                    mail_from = message['from']
                    mail_subject = message['subject']
                    mail_date = message['date']
                    if message.is_multipart():
                        mail_content = ''

                        for part in message.get_payload():
                            if part.get_content_type() == 'text/plain':
                                mail_content += part.get_payload()
                    else:
                        mail_content = message.get_payload()
                    #print(f'From: {mail_from}')
                    #print(f'Subject: {mail_subject}')
                    #print(f'Content: {mail_content}')
                    xx =  mail_subject + ': ' + mail_content
                    xx = self.process_text(xx)
                    xx = self.stemming(xx)
                    filename = "model.sav"
                    bow = 'vect.sav'
                    vectorizer = joblib.load(bow)
                    message_bow = vectorizer.transform([xx])
                    loaded_model = joblib.load(filename)
                    result = loaded_model.predict(message_bow)
                    if result == [[0]]:
                        text = mail_from
                        it = QTableWidgetItem(text)
                        self.tableWidget.setItem(x, 0, it)
                        text = mail_date
                        it = QTableWidgetItem(text)
                        self.tableWidget.setItem(x, 1, it)
                        text = mail_subject
                        it = QTableWidgetItem(text)
                        self.tableWidget.setItem(x, 2, it)
                        x += 1
                        if notif1 == 1:
                            self.notif(mail_from)
                    else:
                        if notif1 == 1:
                            self.notif('Spam')

    def notif(self, arg1):
        app = '{1AC14E77-02E7-4E5D-B744-2EB1AE5198B7}\\WindowsPowerShell\\v1.0\\powershell.exe'
        nManager = notifications.ToastNotificationManager
        notifier = nManager.create_toast_notifier(app)
        if arg1 == 'Spam':
            zmail = "You've Got Spam!"
        else:
            zmail = "You've got Mail"
        tString = """
          <toast>
            <visual>
              <binding template='ToastGeneric'>
                <text>""" + zmail + """</text>
                <text>from """ + arg1.split('<')[0] + """</text>
              </binding>
            </visual>
            <actions>
              <action
                content="Delete"
                arguments="action=delete"/>
              <action
                content="Dismiss"
                arguments="action=dismiss"/>
            </actions>        
          </toast>
        """
        #print(tString)
        xDoc = dom.XmlDocument()
        xDoc.load_xml(tString)
        notifier.show(notifications.ToastNotification(xDoc))
        
    def eventFilter(self, source, event):
        #if self.tableWidget.selectedIndexes() != []:
            
        if event.type() == QEvent.Type.MouseButtonRelease:
                #if event.button() == QtCore.Qt.LeftButton:
            row = self.tableWidget.currentRow()
            col = self.tableWidget.currentColumn()
            if self.tableWidget.item(row, col) is not None:
                print(str(row) + " " + str(col) + " " + self.tableWidget.item(row, col).text())
            else:
                print(str(row) + " " + str(col))            
                    #self.test.leftClick(row, col)
                #elif event.button() == QtCore.Qt.RightButton:

                 #   row = self.tableWidget.currentRow()
                 #   col = self.tableWidget.currentColumn()
                 #   self.test.rightClick(row, col)
       
        return QObject.event(source, event)
class WorkerThread(QThread):
  
   def run(self):
     global breaker  
     print("thread started")
     if breaker == 1:
         pass
 
         #Window.recmail(Window)
         

   
def main():

    app = QApplication(sys.argv)
    ex = Window()
    sys.exit(app.exec())


if __name__ == '__main__':
    main()


No comments:

Post a Comment