12345678910111213141516171819202122232425262728293031323334353637383940 |
- import requests
- import json
- import time
- from colorama import Fore, Back, Style
- def getThreadList(response):
- ''' Return a list of Ints which denote the thread numbers '''
- return [ t["no"] for board in response for t in board["threads"] ]
-
- def getThreadObjects(thread):
- ''' Return a list of posts for a thread '''
- return [ post for post in thread["posts"] ]
- def getComments(thread):
- ''' Return a list of comments as strings for a thread '''
- t = []
- for post in getThreadObjects(thread):
- if "com" in post:
- t.append( post["com"] )
- return t
- def cleantrash(trash):
- ''' Clean up a string based on arbitrary rules '''
- trash=trash.replace("<br>","\n")
- trash=trash.replace(". ",".\n")
- trash=trash.replace(">",">")
- trash=trash.replace("<span class=\"quote\">","")
- trash=trash.replace("<wbr>","")
- trash=trash.replace("<a href=\"","")
- trash=trash.replace("</a>","")
- trash=trash.replace("</span>","")
- trash=trash.replace("class=\"quotelink\"","")
- trash=trash.replace("'","\'")
- trash=trash.replace(""","\"")
- trash=trash.replace("&","&")
-
- clean = trash
-
- return clean
|