qdb.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. from event import Event
  2. import re
  3. import difflib
  4. try:
  5. import imgurpython
  6. except ImportError:
  7. print "Warning: QDB module requires imgurpython."
  8. imgurpython = object
  9. try:
  10. import requests
  11. except ImportError:
  12. print "Warning: QDB module requires requests."
  13. requests = object
  14. class QDB:
  15. def __init__(self, events=None, printer_handle=None, bot=None, say=None):
  16. self.events = events
  17. self.printer = printer_handle
  18. self.interests = ['__.qdb__', '1__all_lines__'] # should be first event in the listing.. so lines being added is a priority
  19. self.bot = bot
  20. self.say = say
  21. self.imgur_client_id = "6f4e468a474bb6e"
  22. self.imgur_client_secret = "22f791df5569e7964a1ca78637125c94cba6f312"
  23. self.bot.mem_store['qdb'] = {}
  24. #define a key for _recent since that will not be a potential channel name
  25. self.bot.mem_store['qdb']['_recent'] = []
  26. for event in events:
  27. if event._type in self.interests:
  28. event.subscribe(self)
  29. self.help = ".qdb <search string of first line> | <search string of last line>"
  30. self.MAX_BUFFER_SIZE = 500
  31. self.MAX_HISTORY_SIZE = 10
  32. def _imgurify(self, url):
  33. client = imgurpython.ImgurClient(self.imgur_client_id, self.imgur_client_secret)
  34. replacement_values = list()
  35. if type(url) is list:
  36. for u in url:
  37. resp = client.upload_from_url(u)
  38. replacement_values.append(resp)
  39. else:
  40. try:
  41. resp = client.upload_from_url(url)
  42. replacement_values.append(resp)
  43. except imgurpython.helpers.error.ImgurClientError, e:
  44. self.bot.debug_print("ImgurClientError: ")
  45. self.bot.debug_print(str(e))
  46. except UnboundLocalError, e:
  47. self.bot.debug_print("UnboundLocalError: ")
  48. self.bot.debug_print(str(e))
  49. except requests.ConnectionError, e:
  50. self.bot.debug_print("ConnectionError: ")
  51. self.bot.debug_print(str(e))
  52. return replacement_values
  53. def _detect_url(self, quote):
  54. """
  55. right now this is strictly for tsdbot's printout functionality
  56. follows this format:
  57. http://irc.teamschoolyd.org/printouts/8xnK5DmfMz
  58. """
  59. try:
  60. url = re.search("(?P<url>http://irc\.teamschoolyd\.org/printouts/\w+)", quote).group("url")
  61. except AttributeError: # we didn't find anything
  62. return quote
  63. repl = self._imgurify(url)
  64. new_quote = re.sub('(?P<url>http://irc\.teamschoolyd\.org/printouts/\w+)',repl[0]['link'], quote)
  65. return new_quote
  66. def strip_formatting(self, msg):
  67. """Uses regex to replace any special formatting in IRC (bold, colors) with nothing"""
  68. return re.sub('([\x02\x1D\x1F\x16\x0F]|\x03([0-9]{2})?)', '', msg)
  69. def add_buffer(self, event=None, debug=False):
  70. """Takes a channel name and line passed to it and stores them in the bot's mem_store dict
  71. for future access. The dict will have channel as key. The value to that key will be a list
  72. of formatted lines of activity.
  73. If the buffer size is not yet exceeded, lines are just added. If the buffer
  74. is maxed out, the oldest line is removed and newest one inserted at the beginning.
  75. """
  76. if debug:
  77. print "Line: " + event.line
  78. print "Verb: " + event.verb
  79. print "Channel: " + event.channel
  80. print ""
  81. if not event:
  82. return
  83. #there are certain things we want to record in history, like nick changes and quits
  84. #these often add to the humor of a quote. however, these are not specific to a channel
  85. #in IRC and our bot does not maintain a userlist per channel. Therefore, when nick
  86. #changes and quits occur, we will add them to every buffer. This is not technically
  87. #correct behavior and could very well lead to quits/nick changes that are not visible
  88. #showing up in a quote, but it's the best we can do at the moment
  89. if not event.channel:
  90. #discard events with unwanted verbs
  91. if event.verb not in ["QUIT", "NICK"]:
  92. return
  93. try:
  94. for chan in self.bot.mem_store['qdb'].keys():
  95. if chan != '_recent':
  96. if len(self.bot.mem_store['qdb'][chan]) >= self.MAX_BUFFER_SIZE:
  97. self.bot.mem_store['qdb'][chan].pop()
  98. line = self.format_line(event)
  99. if line:
  100. self.bot.mem_store['qdb'][chan].insert(0, line)
  101. except (KeyError, IndexError):
  102. print "QDB add_buffer() error when no event channel"
  103. #now we continue with normal, per channel line addition
  104. #create a dictionary associating the channel with an empty list if it doesn't exist yet
  105. else:
  106. if event.channel not in self.bot.mem_store['qdb']:
  107. self.bot.mem_store['qdb'][event.channel] = []
  108. try:
  109. #check for the length of the buffer. if it's too long, pop the last item
  110. if len(self.bot.mem_store['qdb'][event.channel]) >= self.MAX_BUFFER_SIZE:
  111. self.bot.mem_store['qdb'][event.channel].pop()
  112. #get a line by passing event to format_line
  113. #insert the line into the first position in the list
  114. line = self.format_line(event)
  115. if line:
  116. self.bot.mem_store['qdb'][event.channel].insert(0, line)
  117. except IndexError:
  118. print "QDB add_buffer() error. Couldn't access the list index."
  119. def format_line(self, event):
  120. """Takes an event and formats a string appropriate for quotation from it"""
  121. # first strip out printout urls and replace them with imgur mirrors
  122. # commenting out for now to avoid uploading to imgur so often
  123. #event.msg = self._detect_url(event.msg)
  124. #format all strings based on the verb
  125. if event.verb == "":
  126. return ''
  127. elif event.verb == "PRIVMSG":
  128. #special formatting for ACTION strings
  129. if event.msg.startswith('\001ACTION'):
  130. #strip out the word ACTION from the msg
  131. return ' * %s %s\n' % (event.user, event.msg[7:])
  132. else:
  133. return '<%s> %s\n' % (event.user, self.strip_formatting(event.msg))
  134. elif event.verb == "JOIN":
  135. return ' --> %s has joined channel %s\n' % (event.user, event.channel)
  136. elif event.verb == "PART":
  137. return ' <-- %s has left channel %s\n' % (event.user, event.channel)
  138. elif event.verb == "NICK":
  139. return ' -- %s has changed their nick to %s\n' % (event.user, event.msg)
  140. elif event.verb == "TOPIC":
  141. return ' -- %s has changed the topic for %s to "%s"\n' % (event.user, event.channel, event.msg)
  142. elif event.verb == "QUIT":
  143. return ' <-- %s has quit (%s)\n' % (event.user, event.msg)
  144. elif event.verb == "KICK":
  145. #this little bit of code finds the kick target by getting the last
  146. #thing before the event message begins
  147. target = event.line.split(":", 2)[1].split()[-1]
  148. return ' <--- %s has kicked %s from %s (%s)\n' % (event.user, target, event.channel, event.msg)
  149. elif event.verb == "NOTICE":
  150. return ' --NOTICE from %s: %s\n' % (event.user, event.msg)
  151. else:
  152. #no matching verbs found. just ignore the line
  153. return ''
  154. def get_qdb_submission(self, channel=None, start_msg='', end_msg='', strict=False):
  155. """Given two strings, start_msg and end_msg, this function will assemble a submission for the QDB.
  156. start_msg is a substring to search for and identify a starting line. end_msg similarly is used
  157. to search for the last desired line in the submission. This function returns a string ready
  158. for submission to the QDB if it finds the desired selection. If not, it returns None.
  159. """
  160. if not channel:
  161. return None
  162. #must have at least one msg to search for and channel to look it up in
  163. if len(start_msg) == 0 or not channel:
  164. return None
  165. #first, check to see if we are doing a single string submission.
  166. if end_msg == '':
  167. for line in self.bot.mem_store['qdb'][channel]:
  168. if start_msg.lower() in line.lower():
  169. return self._detect_url(line) #removing temporary printout urls and replacing with imgur
  170. #making sure we get out of the function if no matching strings were found
  171. #don't want to search for a nonexistent second string later
  172. return None
  173. #search for a matching start and end string and get the buffer index for the start and end message
  174. start_index = -1
  175. end_index = -1
  176. """Finds matching string for beginning line. Buffer is traversed in reverse-chronological order
  177. .qdb -> strict = False -> earliest occurence
  178. .qdbs -> strict = True -> latest occurence
  179. """
  180. for index, line in enumerate(self.bot.mem_store['qdb'][channel]):
  181. #print "evaluating line for beginning: {}".format(line)
  182. if start_msg.encode('utf-8','ignore').lower() in line.encode('utf-8','ignore').lower():
  183. #print "found match, start_index={}".format(index)
  184. start_index = index
  185. if strict:
  186. break
  187. #finds newest matching string for ending line
  188. for index, line in enumerate(self.bot.mem_store['qdb'][channel]):
  189. #print "evaluating line for end: {}".format(line)
  190. if end_msg.lower() in line.lower():
  191. #print "found match, end_index={}".format(index)
  192. end_index = index
  193. break
  194. #check to see if index values are positive. if not, string was not found and we're done
  195. if start_index == -1 or end_index == -1 or start_index < end_index:
  196. return None
  197. #now we generate the string to be returned for submission
  198. submission = ''
  199. try:
  200. for i in reversed(range(end_index, start_index + 1)):
  201. #print 'Index number is ' + str(i) + ' and current submission is ' + submission
  202. submission += self._detect_url(self.bot.mem_store['qdb'][channel][i]) #detect temporary printout urls and replace with imgur
  203. except IndexError:
  204. print "QDB get_qdb_submission() error when accessing list index"
  205. return submission
  206. def submit(self, qdb_submission, debug=False):
  207. """Given a string, qdb_submission, this function will upload the string to hlmtre's qdb
  208. server. Returns a string with status of submission. If it worked, includes a link to new quote.
  209. """
  210. if debug:
  211. print "Submission is:"
  212. print qdb_submission
  213. print "Current buffer is:"
  214. print self.bot.mem_store['qdb']
  215. print ""
  216. return ''
  217. #accessing hlmtre's qdb api
  218. url = 'http://qdb.zero9f9.com/api.php'
  219. payload = {'q':'new', 'quote': qdb_submission.rstrip('\n')}
  220. try:
  221. qdb = requests.post(url, payload)
  222. except ConnectionError, e:
  223. self.bot.debug_print("ConnectionError: ")
  224. self.bot.debug_print(str(e))
  225. #check for any HTTP errors and return False if there were any
  226. try:
  227. qdb.raise_for_status()
  228. except requests.exceptions.HTTPError, e:
  229. self.bot.debug_print('HTTPError: ')
  230. self.bot.debug_print(str(e))
  231. return "HTTPError encountered when submitting to QDB"
  232. try:
  233. q_url = qdb.json()
  234. self.add_recently_submitted(q_url['id'], qdb_submission)
  235. return "QDB submission successful! http://qdb.zero9f9.com/quote.php?id=" + str(q_url['id'])
  236. except (KeyError, UnicodeDecodeError):
  237. return "Error getting status of quote submission."
  238. return "That was probably successful since no errors came up, but no status available."
  239. def delete(self, user, post_id='', passcode=''):
  240. """A special function that allows certain users to delete posts"""
  241. #accessing hlmtre's qdb api
  242. url = 'http://qdb.zero9f9.com/api.php'
  243. payload = {'q':'delete', 'user':user, 'id':post_id, 'code':passcode}
  244. deletion = requests.get(url, params=payload)
  245. #check for any HTTP errors and return False if there were any
  246. try:
  247. deletion.raise_for_status()
  248. except requests.exceptions.HTTPError, e:
  249. self.bot.debug_print('HTTPError: ')
  250. self.bot.debug_print(str(e))
  251. return "HTTPError encountered when accessing QDB"
  252. try:
  253. del_status = deletion.json()
  254. if del_status['success'] == "true":
  255. for quote in self.bot.mem_store['qdb']['_recent']: # they're a list of dicts
  256. if int(post_id) in quote:
  257. self.bot.mem_store['qdb']['_recent'].remove(quote)
  258. return "QDB deletion succeeded."
  259. return "QDB deletion failed."
  260. except (KeyError, UnicodeDecodeError):
  261. return "Error getting status of quote deletion."
  262. def recently_submitted(self, submission):
  263. """Checks to see if the given submission is string is at least 75% similar to the strings
  264. in the list of recently submitted quotes.
  265. Returns the id of the quote if it was recently submitted. If not, returns -1.
  266. """
  267. #set up a difflib SequenceMatcher with the first string to test
  268. comparer = difflib.SequenceMatcher()
  269. comparer.set_seq1(submission)
  270. #if we find that it has 75% similarity or greater to a recent submission, return True
  271. try:
  272. for recent_quote in self.bot.mem_store['qdb']['_recent']:
  273. comparer.set_seq2(recent_quote.values()[0])
  274. if comparer.ratio() >= .75:
  275. return recent_quote.keys()[0]
  276. except TypeError:
  277. return -1
  278. except KeyError:
  279. return -1
  280. except IndexError:
  281. return -1
  282. return -1
  283. def add_recently_submitted(self, q_id, submission):
  284. """Takes a string, submission, and adds it to the list of recent submissions.
  285. Also we do length checking, only keep record of the previous MAX_HISTORY_SIZE quotes.
  286. """
  287. #first, see if we have reached the maximum history size. if so, remove last item
  288. if len(self.bot.mem_store['qdb']['_recent']) >= self.MAX_HISTORY_SIZE:
  289. self.bot.mem_store['qdb']['_recent'].pop()
  290. #inserting a dict with the qdb id of the submission and the submission content
  291. self.bot.mem_store['qdb']['_recent'].insert(0, {q_id:submission})
  292. def handle(self, event):
  293. #first check to see if there is a special deletion going on
  294. if event.msg.startswith(".qdbdelete") and event.is_pm:
  295. deletion = event.msg.split(' ', 2)
  296. try:
  297. #requires the format ".qdbdelete <post_id> <password>"
  298. self.say(event.user, self.delete(event.user, deletion[1], deletion[2]))
  299. except IndexError:
  300. self.say(event.user, "Not enough parameters provided for deletion.")
  301. return
  302. """
  303. See if we're going to generate a qdb submission, or just add the line to the buffer.
  304. .qdb is the standard, generous implementation selected after hours of testing and ideal for a significant number of situations where lines are repeated. Use specific search strings. the start_index of the submission will be the EARLIEST occurrence of the substring in the buffer.
  305. .qdbs is the strict implementation. The start_index will be the LATEST occurrence of the substring.
  306. """
  307. if event.msg.startswith(".qdb ") or event.msg.startswith(".qdbs "):
  308. #split the msg with '.qdb[s] ' stripped off beginning and divide into 1 or 2 search strings
  309. #e.g. ".qdb string1|string2" -> [".qdb", "string1|string2"]
  310. cmd_parts = event.msg.split(None,1)
  311. if len(cmd_parts) < 2:
  312. #do something here to handle '.qdb[s]'
  313. return
  314. #determine if using strict mode
  315. strict_mode = cmd_parts[0] == ".qdbs"
  316. #split the search parameter(s)
  317. #e.g. "string1|string2" -> ["string1", "string2"]
  318. string_token = cmd_parts[1].split('|', 1)
  319. start_msg = string_token[0].rstrip()
  320. #see if we only have a one line submission
  321. if len(string_token) == 1:
  322. #s is the string to submit
  323. s = self.get_qdb_submission(event.channel, start_msg)
  324. recent = self.recently_submitted(s)
  325. if recent > 0:
  326. q_url = "http://qdb.zero9f9.com/quote.php?id=" + str(recent)
  327. self.printer("PRIVMSG " + event.channel + " :QDB Error: A quote of >75% similarity has already been posted here: " + q_url + "\n")
  328. return
  329. if not s:
  330. self.printer("PRIVMSG " + event.channel + ' :QDB Error: Could not find requested string.\n')
  331. return
  332. #Print the link to the newly submitted quote
  333. self.printer("PRIVMSG " + event.channel + ' :' + self.submit(s) + '\n')
  334. return
  335. #We should only get here if there are two items in string_token
  336. end_msg = string_token[1].lstrip()
  337. s = self.get_qdb_submission(event.channel, start_msg, end_msg, strict_mode)
  338. recent = self.recently_submitted(s)
  339. if recent > 0:
  340. q_url = "http://qdb.zero9f9.com/quote.php?id=" + str(recent)
  341. self.printer("PRIVMSG " + event.channel + " :QDB Error: A quote of >75% similarity has already been posted here: " + q_url + "\n")
  342. return
  343. #if there's nothing found for the submission, then we alert the channel and gtfo
  344. if not s:
  345. self.printer("PRIVMSG " + event.channel + ' :QDB Error: Could not find requested quotes or parameters were not specific enough.\n')
  346. return
  347. #print the link to the new submission
  348. self.printer("PRIVMSG " + event.channel + ' :' + self.submit(s) + '\n')
  349. return
  350. self.add_buffer(event)