WelcomeBot.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. import datetime
  2. import json
  3. import pickle
  4. import praw
  5. import re
  6. import time
  7. from fuzzywuzzy import fuzz
  8. JSON_LOG_PATH = './log.json'
  9. CREDENTIALS_PATH = './AccountCredentials.json'
  10. USERS_JSON_PATH = 'Users.json'
  11. SUBREDDIT_NAME = 'Piracy'
  12. BOT_USERNAME = 'PiracyBot'
  13. POSSIBLE_RULE3_RE = [r'trying to \w+ a ((tv )?show|movie|series)', r'can \w+ help me (find|torrent|stream)', r'looking for a ((tv )?show|movie|series)', r'where (i can|can i|to) (download|get|stream|watch|torrent|find)', r'anyone know (of )?a \w* ?(to )?(place|link|torrent)', r'best \w+ to (download|get|stream|watch|torrent)', r'free ([^\.,?!\n]+ ){1, 6} site', r'any good (link|place|\w*site)', r'\b(download|get|stream|watch|torrent|find)\b[^\.,?!\n]+(show|movie|series)[^\.,?!]+\?\n', r'free download(?! manager)', r'safe link', r'looking for [^\.,?!\n]+(book|movie|show|link|site|place|download|free)', r'(any|where|site|link)[\w ]+book[\w ]+free\?', r'\bwhere (do|can)[^\.,?!\n]{2,15}(get[^\n\.,?!]+\?|find)', r'where (can i|do i|to) [^\.,?!\n]+free', r'^looking for', r'\ba [pd]m\br']
  14. # Post will not be approved by bot if OP does not answer within 12 hours.
  15. MAX_TIME_ALLOWANCE = 3600 * 10
  16. # how frequent to save to Users.json
  17. SAVE_FREQUENCY = 60 * 5
  18. SPAM_FILTER_CHECK_FREQUENCY = 60 * 10
  19. RULES_URL = 'https://www.reddit.com/r/Piracy/wiki/piracy_rules'
  20. WELCOME_MESSAGE_SUBJECT_TITLE = 'Message from /r/Piracy'
  21. REMOVAL_MESSAGE_SUBJECT_TITLE = 'Concerning your /r/Piracy submission'
  22. WELCOME_MESSAGE = '''Welcome to /r/Piracy! You are receiving this message because you are new to the subreddit. If you are not new, then don't worry, you will not be messaged a second time!
  23. ---
  24. [Please make sure to read our rules](https://www.reddit.com/r/Piracy/wiki/piracy_rules), as it will help to save the subreddit and yourself from being banned in the possible future.
  25. **[Also see our Wiki](/r/Piracy/wiki/index)**, which contains a list of sites, tools, FAQ, and other useful resources.
  26. Your question also may have been asked previously - you can search the subreddit via google - example: https://i.imgur.com/1jA767u.jpg
  27. '''
  28. REQUIRED_REPLY = 'I have read the rules and the wiki'
  29. REQUIRED_REPLY_RE = r'i (\w+ )?read (\w+ )?rules \w+ (\w+ )?wiki'
  30. REMOVAL_MESSAGE_FIRST_HALF = 'Thank you for [your submission.]('
  31. REMOVAL_MESSAGE_SECOND_HALF = f''') I am a bot. Since you are new to the subreddit, your submission is not yet visible to everyone.
  32. ---
  33. **Please read [the rules](https://www.reddit.com/r/Piracy/wiki/piracy_rules), especially rule 3: Do not request for or link to specific pirated titles (ie. specific movie, book, etc). We're not your personal search party.**
  34. [Also see our Wiki](/r/Piracy/wiki/index), which contains a Megathread with a list of sites, tools, FAQ, and other useful resources.
  35. **Your question also may have been asked previously - you can search the subreddit via google - example: https://i.imgur.com/1jA767u.jpg**
  36. ---
  37. If your submission abides by the rules and is not covered by the wiki, please reply to this message with "{REQUIRED_REPLY}" to have your submission approved.
  38. '''
  39. SORRY_REPLY = f'You are past the {int(MAX_TIME_ALLOWANCE/3600)}-hour time window. Your submission would be buried if it were approved now. Please re-submit.'
  40. OVERRIDE_UNAVAILABLE_REPLY = '''Your submission has been removed by another moderator. This bot is unable to override this action.
  41. ---
  42. [Contact the moderators of this subreddit if you have any concerns](https://www.reddit.com/message/compose/?to=/r/Piracy)'''
  43. # Capture group for the permalink that was removed, appearing in the bot welcome message
  44. PERMALINK_FIRST_MESSAGE = r'\((.+?)\)'
  45. def main():
  46. with open(CREDENTIALS_PATH, 'r', encoding='utf8') as f:
  47. credentials = json.load(f)
  48. CLIENT_ID = credentials['client_id']
  49. CLIENT_SECRET = credentials['client_secret']
  50. USERNAME = credentials['username']
  51. PASSWORD = credentials['password']
  52. USER_AGENT = credentials['user_agent']
  53. reddit = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, username=USERNAME, password=PASSWORD, user_agent=USER_AGENT)
  54. Users = getUsers()
  55. startTime = time.time()
  56. timeLastSavedUsers = startTime
  57. now = datetime.datetime.now().strftime('%Y-%m-%d at %H:%M:%S')
  58. print(f'Started at {now}')
  59. monitorStream(timeLastSavedUsers, Users, startTime, reddit)
  60. def monitorStream(timeLastSavedUsers, Users, startTime, reddit):
  61. subreddit = reddit.subreddit(SUBREDDIT_NAME)
  62. submission_stream = subreddit.stream.submissions(pause_after=-1, skip_existing=True)
  63. comment_stream = subreddit.stream.comments(pause_after=-1, skip_existing=True)
  64. inbox_stream = reddit.inbox.stream(pause_after=-1)
  65. timeLastCheckedSpamQueue = time.time()
  66. try:
  67. while True:
  68. for submission in submission_stream:
  69. if submission is None or submission.author is None:
  70. break
  71. # if submission is not removed by any mods or submission is removed by reddit filters (banned_by == True) and submission is not already approved
  72. if not submission.author.name in Users and submission.banned_by is None and not submission.approved:
  73. processSubmission(reddit, submission)
  74. for comment in comment_stream:
  75. if comment is None or comment.author is None:
  76. break
  77. if not comment.author.name in Users and not REQUIRED_REPLY.lower() in comment.body.lower():
  78. processComment(Users, reddit, comment)
  79. for message in inbox_stream:
  80. # log_json('Checking messages', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  81. if message is None or message.author is None:
  82. # log_json('Message is None', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  83. break
  84. if isinstance(message, praw.models.Message) and (fuzz.token_set_ratio(REQUIRED_REPLY, message.body) > 90 or re.search(REQUIRED_REPLY_RE, message.body, re.IGNORECASE)):
  85. # log_json('Processing message', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  86. processMessage(Users, reddit, message)
  87. message.mark_read()
  88. # Save Users to json every 5 minutes
  89. currTime = time.time()
  90. if currTime - timeLastSavedUsers > SAVE_FREQUENCY:
  91. timeLastSavedUsers = currTime
  92. saveUsers(Users)
  93. if currTime - timeLastCheckedSpamQueue > SPAM_FILTER_CHECK_FREQUENCY:
  94. for spamSubmission in subreddit.mod.spam(only="submissions"):
  95. if spamSubmission is None or spamSubmission.author is None:
  96. continue
  97. if spamSubmission.banned_by == True and spamSubmission.created_utc > startTime:
  98. processSubmission(reddit, spamSubmission)
  99. timeLastCheckedSpamQueue = currTime
  100. except Exception as e:
  101. print(f' >>>>> There was an error: {str(e)}')
  102. time.sleep(60) # wait for 60 seconds before restarting
  103. monitorStream(timeLastSavedUsers, Users, startTime, reddit)
  104. def saveUsers(Users):
  105. with open(USERS_JSON_PATH, 'w', encoding='utf8') as f:
  106. json.dump(Users, f, indent=4)
  107. now = datetime.datetime.now().strftime('%Y-%m-%d at %H:%M:%S')
  108. print(f'Saved at {now}')
  109. def log_json(logStr, now):
  110. if int(time.time()) % 120 > 10:
  111. return
  112. logList = []
  113. with open(JSON_LOG_PATH, 'r', encoding='utf8') as f:
  114. logList = json.load(f)
  115. logList.append(logStr + ' ' + str(now))
  116. with open(JSON_LOG_PATH, 'w', encoding='utf8') as f:
  117. json.dump(logList, f, indent=4)
  118. def processSubmission(reddit, submission_temp):
  119. subID = submission_temp.id
  120. time.sleep(2)
  121. # reload submission to get fresh data
  122. submission = reddit.submission(id=subID)
  123. permalink = submission.permalink
  124. submissionID = submission.id
  125. authorName = submission.author.name
  126. submissionTitle = submission.title
  127. submission.mod.remove()
  128. print(f' > Removed submission by {authorName} : {submissionID} : {submissionTitle}')
  129. recipient = reddit.redditor(authorName)
  130. recipient.message(subject=REMOVAL_MESSAGE_SUBJECT_TITLE, message=f'{REMOVAL_MESSAGE_FIRST_HALF}https://reddit.com{permalink}{REMOVAL_MESSAGE_SECOND_HALF}')
  131. def processComment(Users, reddit, comment):
  132. authorName = comment.author.name
  133. print(f' > Welcoming {authorName}')
  134. recipient = reddit.redditor(authorName)
  135. recipient.message(subject=WELCOME_MESSAGE_SUBJECT_TITLE, message=f'{WELCOME_MESSAGE}')
  136. Users[authorName] = 'Comment: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  137. def processMessage(Users, reddit, message):
  138. try:
  139. firstMessage = reddit.inbox.message(message.first_message_name[3:])
  140. except:
  141. message.mark_read()
  142. message.reply('There was an issue. You must reply to the original PM instead of creating a new message.')
  143. return
  144. firstMessageText = firstMessage.body
  145. if not RULES_URL in firstMessageText:
  146. message.mark_read()
  147. return
  148. fullPermalink = re.search(PERMALINK_FIRST_MESSAGE, firstMessageText).groups()[0]
  149. authorName = message.author.name
  150. currTime = time.time()
  151. # if url is a a submission permalink
  152. if fullPermalink.count('/') == 8:
  153. submission = reddit.submission(url=fullPermalink)
  154. submissionTitle = submission.title
  155. submissionID = submission.id
  156. SubmissionBannedBy = submission.banned_by
  157. # if submission is not removed
  158. if SubmissionBannedBy is None:
  159. message.reply(f'[Your submission]({fullPermalink}) has already been approved is visible to everyone.')
  160. # if reply is past the MAX_TIME_ALLOWANCE (seconds)
  161. elif currTime - submission.created_utc > MAX_TIME_ALLOWANCE:
  162. message.reply(SORRY_REPLY)
  163. elif SubmissionBannedBy != BOT_USERNAME:
  164. message.reply(OVERRIDE_UNAVAILABLE_REPLY)
  165. elif SubmissionBannedBy == BOT_USERNAME:
  166. print(f' >>> Approving submission by {authorName} : {submissionID} : {submissionTitle}')
  167. submission.mod.approve()
  168. if submission.is_self:
  169. processApprovedSubmission(submission)
  170. message.reply(f'Thank you. [Your submission]({fullPermalink}) is now visible to everyone.')
  171. if not authorName in Users:
  172. Users[authorName] = 'Submission: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  173. # if url is a comment fullPermalink
  174. # elif fullPermalink.count('/') == 9:
  175. # comment = reddit.comment(url=fullPermalink)
  176. # if comment.banned_by is None:
  177. # message.reply('[Your comment](fullPermalink) has already been approved and is visible to everyone.')
  178. # userSet.add(authorName)
  179. # elif currTime - comment.created_utc > MAX_TIME_ALLOWANCE:
  180. # message.reply(SORRY_REPLY)
  181. # elif comment.banned_by != BOT_USERNAME:
  182. # message.reply(OVERRIDE_UNAVAILABLE_REPLY)
  183. # userSet.add(authorName)
  184. # elif comment.banned_by == BOT_USERNAME:
  185. # print(f'Approving comment {fullPermalink}')
  186. # comment.mod.approve()
  187. # message.reply('[Your comment](fullPermalink) has been approved. It is now visible to everyone.')
  188. # userSet.add(authorName)
  189. # def processReply(userSet, reddit, comment):
  190. # submission = comment.submission
  191. # submitterName = submission.author.name
  192. # if comment.author.name != submitterName:
  193. # return
  194. # currTime = time.time()
  195. # parentComment = comment.parent()
  196. # SubmissionBannedBy = submission.banned_by
  197. # permalink = submission.permalink
  198. # submissionID = submission.id
  199. # submissionTitle = submission.title
  200. # recipient = reddit.redditor(submitterName)
  201. # # if submission is not removed
  202. # if SubmissionBannedBy is None:
  203. # recipient.message(subject='Your submission is already approved', message=f'[Your submission](https://reddit.com{permalink}) has already been approved and is visible to everyone.')
  204. # # if reply is past the MAX_TIME_ALLOWANCE (seconds)
  205. # elif currTime - submission.created_utc > MAX_TIME_ALLOWANCE + 60:
  206. # botComment = comment.reply(SORRY_REPLY)
  207. # botComment.mod.distinguish()
  208. # elif SubmissionBannedBy != BOT_USERNAME:
  209. # botComment = comment.reply(OVERRIDE_UNAVAILABLE_REPLY)
  210. # botComment.mod.distinguish()
  211. # elif 'has been removed' in parentComment.body and SubmissionBannedBy == BOT_USERNAME:
  212. # print(f' >>> Approving submission by {submitterName}: {submissionID}: {submissionTitle}')
  213. # recipient.message(subject='Your /r/Piracy submission has been approved', message=f'Thank you. Your [submission](https://reddit.com{permalink}) is now visible to everyone.')
  214. # # botComment = comment.reply(f'>{comment.body}\n\nThank you /u/{comment.author.name}. Your submission is now visible to everyone.')
  215. # # botComment.mod.distinguish()
  216. # parentComment.edit(f'{REMOVAL_COMMENT}\n\n \n\nEDIT: /u/{submitterName} has replied. This submission is now approved')
  217. # submission.mod.approve()
  218. # userSet.add(submitterName)
  219. def processApprovedSubmission(submission):
  220. maxLength = 65
  221. title = submission.title
  222. selftext = submission.selftext
  223. for str_re in POSSIBLE_RULE3_RE:
  224. m = re.search(str_re, title, re.IGNORECASE | re.DOTALL)
  225. if m:
  226. mGroup = m.group()
  227. mGroup = mGroup[:maxLength] if len(mGroup) > maxLength else mGroup
  228. submission.report(f'Possible rule 3? - in_title: [{mGroup}]')
  229. return
  230. m = re.search(str_re, selftext, re.IGNORECASE | re.DOTALL)
  231. if m:
  232. mGroup = m.group()
  233. mGroup = mGroup[:maxLength] if len(mGroup) > maxLength else mGroup
  234. submission.report(f'Possible rule 3? - in_body: [{mGroup}]')
  235. def isUserNew(username, Users):
  236. if username in Users:
  237. return False
  238. return True
  239. # def getUserSet():
  240. # try:
  241. # with open(USER_SET_PICKLE_PATH, 'rb') as f:
  242. # return pickle.load(f)
  243. # except:
  244. # userSet = set()
  245. # with open(USER_SET_PICKLE_PATH, 'wb') as f:
  246. # pickle.dump(userSet, f)
  247. # return userSet
  248. def getUsers():
  249. try:
  250. with open(USERS_JSON_PATH, 'r', encoding='utf8') as f:
  251. Users = json.load(f)
  252. return Users
  253. except:
  254. with open(USERS_JSON_PATH, 'w', encoding='utf8') as f:
  255. json.dump({}, f, indent=4)
  256. return {}
  257. if __name__ == '__main__':
  258. print('Welcome Bot has started')
  259. main()