WelcomeBot.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. import datetime
  2. import json
  3. import pickle
  4. import praw
  5. import re
  6. import time
  7. from fuzzywuzzy import fuzz
  8. JSON_LOG_PATH = './log.json'
  9. CREDENTIALS_PATH = './AccountCredentials.json'
  10. USERS_JSON_PATH = 'Users.json'
  11. SUBREDDIT_NAME = 'Piracy'
  12. BOT_USERNAME = 'PiracyBot'
  13. POSSIBLE_RULE3_RE = [r'trying to \w+ a ((tv )?show|movie|series)', r'can \w+ help me (find|torrent|stream)', r'looking for a ((tv )?show|movie|series)', r'where (i can|can i|to) (download|get|stream|watch|torrent|find)', r'anyone know (of )?a \w* ?(to )?(place|link|torrent)', r'best \w+ to (download|get|stream|watch|torrent)', r'free ([^\.,?!\n]+ ){1, 6} site', r'any good (link|place|\w*site)', r'\b(download|get|stream|watch|torrent|find)\b[^\.,?!\n]+(show|movie|series)[^\.,?!]+\?\n', r'free download(?! manager)', r'safe link', r'looking for [^\.,?!\n]+(book|movie|show|link|site|place|download|free)', r'(any|where|site|link)[\w ]+book[\w ]+free\?', r'\bwhere (do|can)[^\.,?!\n]{2,15}(get[^\n\.,?!]+\?|find)', r'where (can i|do i|to) [^\.,?!\n]+free', r'^looking for', r'\ba [pd]m\br']
  14. # Post will not be approved by bot if OP does not answer within 12 hours.
  15. MAX_TIME_ALLOWANCE = 3600 * 10
  16. # how frequent to save to Users.json
  17. SAVE_FREQUENCY = 60 * 5
  18. SPAM_FILTER_CHECK_FREQUENCY = 60 * 10
  19. RULES_URL = 'https://www.reddit.com/r/Piracy/wiki/piracy_rules'
  20. WELCOME_MESSAGE_SUBJECT_TITLE = 'Message from /r/Piracy'
  21. REMOVAL_MESSAGE_SUBJECT_TITLE = 'Concerning your /r/Piracy submission'
  22. WELCOME_MESSAGE = '''Welcome to /r/Piracy! You are receiving this message because you are new to the subreddit. If you are not new, then don't worry, you will not be messaged a second time!
  23. ---
  24. [Please make sure to read our rules](https://www.reddit.com/r/Piracy/wiki/piracy_rules), as it will help to save the subreddit and yourself from being banned in the possible future.
  25. **[Also see our Wiki](/r/Piracy/wiki/index)**, which contains a list of sites, tools, FAQ, and other useful resources.
  26. Your question also may have been asked previously - you can search the subreddit via google - example: https://i.imgur.com/1jA767u.jpg
  27. '''
  28. REQUIRED_REPLY = 'I have read the rules and the wiki'
  29. REQUIRED_REPLY_RE = r'i (\w+ )?read (\w+ )?rules \w+ (\w+ )?wiki'
  30. REMOVAL_MESSAGE_FIRST_HALF = 'Thank you for [your submission.]('
  31. REMOVAL_MESSAGE_SECOND_HALF = f''') I am a bot. Since you are new to the subreddit, your submission is not yet visible to everyone.
  32. ---
  33. **Please read [the rules](https://www.reddit.com/r/Piracy/wiki/piracy_rules), especially rule 3: Do not request for or link to specific pirated titles (ie. specific movie, book, etc). We're not your personal search party.**
  34. [Also see our Wiki](/r/Piracy/wiki/index), which contains a Megathread with a list of sites, tools, FAQ, and other useful resources.
  35. **Your question also may have been asked previously - you can search the subreddit via google - example: https://i.imgur.com/1jA767u.jpg**
  36. ---
  37. If your submission abides by the rules and is not covered by the wiki, please reply to this message with "{REQUIRED_REPLY}" to have your submission approved.
  38. '''
  39. SORRY_REPLY = f'You are past the {int(MAX_TIME_ALLOWANCE/3600)}-hour time window. Your submission would be buried if it were approved now. Please re-submit.'
  40. OVERRIDE_UNAVAILABLE_REPLY = '''Your submission has been removed by another moderator. This bot is unable to override this action.
  41. ---
  42. [Contact the moderators of this subreddit if you have any concerns](https://www.reddit.com/message/compose/?to=/r/Piracy)'''
  43. # Capture group for the permalink that was removed, appearing in the bot welcome message
  44. PERMALINK_FIRST_MESSAGE = r'\((.+?)\)'
  45. def main():
  46. with open(CREDENTIALS_PATH, 'r', encoding='utf8') as f:
  47. credentials = json.load(f)
  48. CLIENT_ID = credentials['client_id']
  49. CLIENT_SECRET = credentials['client_secret']
  50. USERNAME = credentials['username']
  51. PASSWORD = credentials['password']
  52. USER_AGENT = credentials['user_agent']
  53. reddit = praw.Reddit(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, username=USERNAME, password=PASSWORD, user_agent=USER_AGENT)
  54. # Get list of users that have already been oriented to the subreddit rules
  55. Users = getUsers()
  56. startTime = time.time()
  57. timeLastSavedUsers = startTime
  58. now = datetime.datetime.now().strftime('%Y-%m-%d at %H:%M:%S')
  59. print(f'Started at {now}')
  60. monitorStream(timeLastSavedUsers, Users, startTime, reddit)
  61. def monitorStream(timeLastSavedUsers, Users, startTime, reddit):
  62. subreddit = reddit.subreddit(SUBREDDIT_NAME)
  63. submission_stream = subreddit.stream.submissions(pause_after=-1, skip_existing=True)
  64. comment_stream = subreddit.stream.comments(pause_after=-1, skip_existing=True)
  65. inbox_stream = reddit.inbox.stream(pause_after=-1)
  66. timeLastCheckedSpamQueue = time.time()
  67. try:
  68. while True:
  69. for submission in submission_stream:
  70. # If submission is deleted or the author shows up as [deleted] (deleted their account)
  71. if submission is None or submission.author is None:
  72. break
  73. # if submission is not removed by any mods or submission is removed by reddit filters (banned_by == True) and submission is not already approved
  74. if not submission.author.name in Users:
  75. processSubmission(reddit, submission)
  76. for comment in comment_stream:
  77. # If comment is deleted or the author shows up as [deleted] (deleted their account)
  78. if comment is None or comment.author is None:
  79. break
  80. if not comment.author.name in Users:
  81. processComment(Users, reddit, comment)
  82. for message in inbox_stream:
  83. # log_json('Checking messages', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  84. if message is None or message.author is None:
  85. # log_json('Message is None', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  86. break
  87. if isinstance(message, praw.models.Message) and (fuzz.token_set_ratio(REQUIRED_REPLY, message.body) > 90 or re.search(REQUIRED_REPLY_RE, message.body, re.IGNORECASE)):
  88. # log_json('Processing message', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
  89. processMessage(Users, reddit, message)
  90. message.mark_read()
  91. # Save Users to json
  92. currTime = time.time()
  93. if currTime - timeLastSavedUsers > SAVE_FREQUENCY:
  94. timeLastSavedUsers = currTime
  95. saveUsers(Users)
  96. # check spam queue for submissions caught in reddit's filter
  97. if currTime - timeLastCheckedSpamQueue > SPAM_FILTER_CHECK_FREQUENCY:
  98. for spamSubmission in subreddit.mod.spam(only="submissions"):
  99. if spamSubmission is None or spamSubmission.author is None:
  100. continue
  101. if spamSubmission.banned_by == True and spamSubmission.created_utc > startTime:
  102. processSubmission(reddit, spamSubmission)
  103. timeLastCheckedSpamQueue = currTime
  104. except Exception as e:
  105. print(f' >>>>> There was an error: {str(e)}')
  106. time.sleep(60) # wait for 60 seconds before restarting
  107. monitorStream(timeLastSavedUsers, Users, startTime, reddit)
  108. def saveUsers(Users):
  109. with open(USERS_JSON_PATH, 'w', encoding='utf8') as f:
  110. json.dump(Users, f, indent=4)
  111. now = datetime.datetime.now().strftime('%Y-%m-%d at %H:%M:%S')
  112. print(f'Saved at {now}')
  113. #def log_json(logStr, now):
  114. # if int(time.time()) % 120 > 10:
  115. # return
  116. # logList = []
  117. # with open(JSON_LOG_PATH, 'r', encoding='utf8') as f:
  118. # logList = json.load(f)
  119. # logList.append(logStr + ' ' + str(now))
  120. # with open(JSON_LOG_PATH, 'w', encoding='utf8') as f:
  121. # json.dump(logList, f, indent=4)
  122. def processSubmission(reddit, submission):
  123. # wait X seconds and reload submission to get fresh data to allow the automod to work through its own spam rules
  124. time.sleep(3)
  125. submission = reddit.submission(id=submission.id)
  126. # If submission is not removed and is not removed by reddit's spam filter or the submission has already been approved, skip submission
  127. if submission.banned_by != None and submission.banned_by != True or submission.approved:
  128. return
  129. permalink = submission.permalink
  130. submissionID = submission.id
  131. authorName = submission.author.name
  132. submissionTitle = submission.title
  133. submission.mod.remove()
  134. print(f' > Removed submission by {authorName} : {submissionID} : {submissionTitle}')
  135. recipient = reddit.redditor(authorName)
  136. recipient.message(subject=REMOVAL_MESSAGE_SUBJECT_TITLE, message=f'{REMOVAL_MESSAGE_FIRST_HALF}https://reddit.com{permalink}{REMOVAL_MESSAGE_SECOND_HALF}')
  137. def processComment(Users, reddit, comment):
  138. authorName = comment.author.name
  139. print(f' > Welcoming {authorName}')
  140. recipient = reddit.redditor(authorName)
  141. recipient.message(subject=WELCOME_MESSAGE_SUBJECT_TITLE, message=f'{WELCOME_MESSAGE}')
  142. Users[authorName] = 'Comment: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  143. def processMessage(Users, reddit, message):
  144. try:
  145. firstMessage = reddit.inbox.message(message.first_message_name[3:])
  146. except:
  147. message.mark_read()
  148. message.reply('There was an issue. You must reply to the original PM instead of creating a new message.')
  149. return
  150. firstMessageText = firstMessage.body
  151. if not RULES_URL in firstMessageText:
  152. message.mark_read()
  153. return
  154. fullPermalink = re.search(PERMALINK_FIRST_MESSAGE, firstMessageText).groups()[0]
  155. authorName = message.author.name
  156. currTime = time.time()
  157. # if url is a a submission permalink
  158. if fullPermalink.count('/') == 8:
  159. submission = reddit.submission(url=fullPermalink)
  160. submissionTitle = submission.title
  161. submissionID = submission.id
  162. SubmissionBannedBy = submission.banned_by
  163. # if submission is not removed
  164. if SubmissionBannedBy is None:
  165. message.reply(f'[Your submission]({fullPermalink}) has already been approved is visible to everyone.')
  166. # if reply is past the MAX_TIME_ALLOWANCE (seconds)
  167. elif currTime - submission.created_utc > MAX_TIME_ALLOWANCE:
  168. message.reply(SORRY_REPLY)
  169. elif SubmissionBannedBy != BOT_USERNAME:
  170. message.reply(OVERRIDE_UNAVAILABLE_REPLY)
  171. elif SubmissionBannedBy == BOT_USERNAME:
  172. print(f' >>> Approving submission by {authorName} : {submissionID} : {submissionTitle}')
  173. submission.mod.approve()
  174. if submission.is_self:
  175. processApprovedSubmission(submission)
  176. message.reply(f'Thank you. [Your submission]({fullPermalink}) is now visible to everyone.')
  177. if not authorName in Users:
  178. Users[authorName] = 'Submission: ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
  179. # if url is a comment fullPermalink
  180. # elif fullPermalink.count('/') == 9:
  181. # comment = reddit.comment(url=fullPermalink)
  182. # if comment.banned_by is None:
  183. # message.reply('[Your comment](fullPermalink) has already been approved and is visible to everyone.')
  184. # userSet.add(authorName)
  185. # elif currTime - comment.created_utc > MAX_TIME_ALLOWANCE:
  186. # message.reply(SORRY_REPLY)
  187. # elif comment.banned_by != BOT_USERNAME:
  188. # message.reply(OVERRIDE_UNAVAILABLE_REPLY)
  189. # userSet.add(authorName)
  190. # elif comment.banned_by == BOT_USERNAME:
  191. # print(f'Approving comment {fullPermalink}')
  192. # comment.mod.approve()
  193. # message.reply('[Your comment](fullPermalink) has been approved. It is now visible to everyone.')
  194. # userSet.add(authorName)
  195. # def processReply(userSet, reddit, comment):
  196. # submission = comment.submission
  197. # submitterName = submission.author.name
  198. # if comment.author.name != submitterName:
  199. # return
  200. # currTime = time.time()
  201. # parentComment = comment.parent()
  202. # SubmissionBannedBy = submission.banned_by
  203. # permalink = submission.permalink
  204. # submissionID = submission.id
  205. # submissionTitle = submission.title
  206. # recipient = reddit.redditor(submitterName)
  207. # # if submission is not removed
  208. # if SubmissionBannedBy is None:
  209. # recipient.message(subject='Your submission is already approved', message=f'[Your submission](https://reddit.com{permalink}) has already been approved and is visible to everyone.')
  210. # # if reply is past the MAX_TIME_ALLOWANCE (seconds)
  211. # elif currTime - submission.created_utc > MAX_TIME_ALLOWANCE + 60:
  212. # botComment = comment.reply(SORRY_REPLY)
  213. # botComment.mod.distinguish()
  214. # elif SubmissionBannedBy != BOT_USERNAME:
  215. # botComment = comment.reply(OVERRIDE_UNAVAILABLE_REPLY)
  216. # botComment.mod.distinguish()
  217. # elif 'has been removed' in parentComment.body and SubmissionBannedBy == BOT_USERNAME:
  218. # print(f' >>> Approving submission by {submitterName}: {submissionID}: {submissionTitle}')
  219. # recipient.message(subject='Your /r/Piracy submission has been approved', message=f'Thank you. Your [submission](https://reddit.com{permalink}) is now visible to everyone.')
  220. # # botComment = comment.reply(f'>{comment.body}\n\nThank you /u/{comment.author.name}. Your submission is now visible to everyone.')
  221. # # botComment.mod.distinguish()
  222. # parentComment.edit(f'{REMOVAL_COMMENT}\n\n \n\nEDIT: /u/{submitterName} has replied. This submission is now approved')
  223. # submission.mod.approve()
  224. # userSet.add(submitterName)
  225. def processApprovedSubmission(submission):
  226. # process the just-bot-approved submission to see if it should report it under possible breaking of rule 3
  227. maxLength = 65
  228. title = submission.title
  229. selftext = submission.selftext
  230. for str_re in POSSIBLE_RULE3_RE:
  231. m = re.search(str_re, title, re.IGNORECASE | re.DOTALL)
  232. if m:
  233. mGroup = m.group()
  234. mGroup = mGroup[:maxLength] if len(mGroup) > maxLength else mGroup
  235. submission.report(f'Possible rule 3? - in_title: [{mGroup}]')
  236. return
  237. m = re.search(str_re, selftext, re.IGNORECASE | re.DOTALL)
  238. if m:
  239. mGroup = m.group()
  240. mGroup = mGroup[:maxLength] if len(mGroup) > maxLength else mGroup
  241. submission.report(f'Possible rule 3? - in_body: [{mGroup}]')
  242. def isUserNew(username, Users):
  243. if username in Users:
  244. return False
  245. return True
  246. def getUsers():
  247. try:
  248. with open(USERS_JSON_PATH, 'r', encoding='utf8') as f:
  249. Users = json.load(f)
  250. return Users
  251. except:
  252. with open(USERS_JSON_PATH, 'w', encoding='utf8') as f:
  253. json.dump({}, f, indent=4)
  254. return {}
  255. if __name__ == '__main__':
  256. print('Welcome Bot has started')
  257. main()