darmor
/
mommy-ng


			
							1234567891011121314151617181920212223242526272829303132333435
							from monitor import Monitor
from threadinfo import ThreadInfo
from api import ChanAPI
from utils import getThreadList
from utils import getThreadObjects
from utils import getComments
from bs4 import BeautifulSoup
import pickle
from sys import argv
import sys
import re
'''
Download all 4chan posts and parse them for links
'''


# Everything below this line needs to be cleaned
board = argv[1:][0]
fchan = ChanAPI(board)
catalog = fchan.getCatalog()
threadIds = getThreadList(catalog)
prog=re.compile("https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\/[-a-zA-Z0-9@:%._\+~#=\/\?]*")

threadComments = []
for thread in threadIds:
    threadData = fchan.getThread(thread)
    comments = getComments( threadData ) 
    for comment in comments:
        comment=comment.replace("<wbr>","")
        comment=comment.replace("<br>","\n")
        result = prog.match(comment)
        if(result):
            tempstr=result.group(0)  
            print(tempstr)