microblog.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. import sys, os, traceback
  2. import dateutil.parser
  3. from time import strftime, localtime
  4. # returns html-formatted string
  5. def make_buttons(btn_dict, msg_id):
  6. buttons = "<div class=\"buttons\">"
  7. fmt = "<a href=\"%s\">[%s]</a>"
  8. for key in btn_dict:
  9. url = btn_dict[key]
  10. if url[-1] == '=':
  11. # then interpret it as a query string
  12. url += str(msg_id)
  13. buttons += fmt % (url,key)
  14. buttons += "</div>"
  15. return buttons
  16. # apply div classes for use with .css
  17. def make_post(num, timestamp, conf, msg):
  18. fmt = conf["format"]
  19. if "buttons" in conf:
  20. b = make_buttons(conf["buttons"], num)
  21. else:
  22. b = ""
  23. return fmt.format(
  24. __timestamp__=timestamp, __num__=num, __msg__=msg, __btn__=b)
  25. def make_gallery(indices, w, conf=None):
  26. tag = []
  27. if indices == []:
  28. return tag
  29. template = '''
  30. <div class=\"panel\">
  31. <a href=\"%s\"><img src=\"%s\" class=\"embed\"></a>
  32. </div>
  33. '''
  34. tag.append("<div class=\"gallery\">")
  35. for index in reversed(indices):
  36. image = w.pop(index)
  37. is_path = image[0] == '.' or image[0] == '/'
  38. if conf and not is_path:
  39. thumb = "%s/%s" % (conf["path_to_thumb"], image)
  40. full = "%s/%s" % (conf["path_to_fullsize"], image)
  41. tag.append(template % (full,thumb))
  42. continue
  43. elif not conf and not is_path:
  44. msg = ("Warning: no path defined for image %s!" % image)
  45. print(msg,file=sys.stderr)
  46. else:
  47. pass
  48. tag.append(template % (image, image))
  49. tag.append("</div>")
  50. return tag
  51. # apply basic HTML formatting - only div class here is gallery
  52. from html import escape
  53. def markup(message, config):
  54. def is_image(s, image_formats):
  55. l = s.rsplit('.', maxsplit=1)
  56. if len(l) < 2:
  57. return False
  58. # Python 3.10.5
  59. # example result that had to be filtered:
  60. # string: started.
  61. # result: ['started', '']
  62. if l[1] == str(''):
  63. return False
  64. #print(s, l, file=sys.stderr)
  65. if l[1] in image_formats:
  66. return True
  67. return False
  68. result = 0
  69. tagged = ""
  70. # support multiple images (gallery style)
  71. tags = [] # list of strings
  72. output = []
  73. gallery = []
  74. ptags = config["tag_paragraphs"]
  75. sep = ""
  76. if "line_separator" in config:
  77. sep = config["line_separator"]
  78. for line in message:
  79. images = [] # list of integers
  80. words = line.split()
  81. for i in range(len(words)):
  82. word = words[i]
  83. # don't help people click http
  84. if word.find("src=") == 0 or word.find("href=") == 0:
  85. continue
  86. elif word.find("https://") != -1:
  87. w = escape(word)
  88. new_word = ("<a href=\"%s\">%s</a>") % (w, w)
  89. words[i] = new_word
  90. elif word.find("#") != -1 and len(word) > 1:
  91. # split by unicode blank character if present
  92. # allows tagging such as #fanfic|tion
  93. w = word.split(chr(8206))
  94. # w[0] is the portion closest to the #
  95. tags.append(w[0])
  96. new_word = "<span class=\"hashtag\">%s</span>" % (w[0])
  97. if len(w) > 1:
  98. new_word += w[1]
  99. words[i] = new_word
  100. elif is_image(word, config["accepted_images"]):
  101. images.append(i)
  102. if len(images) > 0:
  103. # function invokes pop() which modifies list 'words'
  104. gc = config["gallery"] if "gallery" in config else None
  105. gallery = make_gallery(images, words, gc)
  106. if ptags and len(words) > 0:
  107. words.insert(0,"<p>")
  108. words.append("</p>")
  109. output.append(" ".join(words))
  110. # avoid paragraph with an image gallery
  111. if len(gallery) > 0:
  112. output.append("".join(gallery))
  113. gallery = []
  114. return sep.join(output), tags
  115. class Post:
  116. def __init__(self, ts, msg):
  117. self.timestamp = ts.strip() # string
  118. self.message = msg # list
  119. # format used for sorting
  120. def get_epoch_time(self):
  121. t = dateutil.parser.parse(self.timestamp)
  122. return int(t.timestamp())
  123. # format used for display
  124. def get_short_time(self):
  125. t = dateutil.parser.parse(self.timestamp)
  126. return t.strftime("%y %b %d")
  127. def parse_txt(filename):
  128. content = []
  129. with open(filename, 'r') as f:
  130. content = f.readlines()
  131. posts = [] # list of posts - same order as file
  132. message = [] # list of lines
  133. # {-1 = init;; 0 = timestamp is next, 1 = message is next}
  134. state = -1
  135. timestamp = ""
  136. for line in content:
  137. if state == -1:
  138. state = 0
  139. continue
  140. elif state == 0:
  141. timestamp = line
  142. state = 1
  143. elif state == 1:
  144. if len(line) > 1:
  145. message.append(line)
  146. else:
  147. p = Post(timestamp, message)
  148. posts.append(p)
  149. # reset
  150. message = []
  151. state = 0
  152. return posts
  153. def get_posts(posts, config):
  154. taginfos = []
  155. tagcloud = dict() # (tag, count)
  156. tagged = dict() # (tag, index of message)
  157. total = len(posts)
  158. count = total
  159. index = count # - 1
  160. timeline = []
  161. btns = None
  162. for post in posts:
  163. markedup, tags = markup(post.message, config)
  164. count -= 1
  165. index -= 1
  166. timeline.append(
  167. make_post(count, post.get_short_time(), config, markedup)
  168. )
  169. for tag in tags:
  170. if tagcloud.get(tag) == None:
  171. tagcloud[tag] = 0
  172. tagcloud[tag] += 1
  173. if tagged.get(tag) == None:
  174. tagged[tag] = []
  175. tagged[tag].append(index)
  176. return timeline, tagcloud, tagged
  177. def make_tagcloud(d, rell):
  178. sorted_d = {k: v for k,
  179. v in sorted(d.items(),
  180. key=lambda item: -item[1])}
  181. output = []
  182. fmt = "<span class=\"hashtag\"><a href=\"%s\">%s(%i)</a></span>"
  183. #fmt = "<span class=\"hashtag\">%s(%i)</span>"
  184. for key in d.keys():
  185. link = rell % key[1:]
  186. output.append(fmt % (link, key, d[key]))
  187. return output
  188. class Paginator:
  189. def __init__(self, post_count, ppp, loc=None):
  190. if post_count <= 0:
  191. raise Exception
  192. if not loc:
  193. loc = "pages"
  194. if loc and not os.path.exists(loc):
  195. os.mkdir(loc)
  196. self.TOTAL_POSTS = post_count
  197. self.PPP = ppp
  198. self.TOTAL_PAGES = int(post_count/self.PPP)
  199. self.SUBDIR = loc
  200. self.FILENAME = "%i.html"
  201. self.written = []
  202. def toc(self, current_page=None, path=None): #style 1
  203. if self.TOTAL_PAGES < 1:
  204. return "[no pages]"
  205. if path == None:
  206. path = self.SUBDIR
  207. # For page 'n' do not create an anchor tag
  208. fmt = "<a href=\"%s\">[%i]</a>" #(filename, page number)
  209. anchors = []
  210. for i in reversed(range(self.TOTAL_PAGES)):
  211. if i != current_page:
  212. x = path + "/" + (self.FILENAME % i)
  213. anchors.append(fmt % (x, i))
  214. else:
  215. anchors.append("<b>[%i]</b>" % i)
  216. return "\n".join(anchors)
  217. # makes one page
  218. def singlepage(self, template, tagcloud, timeline_, i=None, p=None):
  219. tc = "\n".join(tagcloud)
  220. tl = "\n\n".join(timeline_)
  221. toc = self.toc(i, p)
  222. return template.format(
  223. postcount=self.TOTAL_POSTS, tags=tc, pages=toc, timeline=tl
  224. )
  225. def paginate(self, template, tagcloud, timeline, is_tagline=False):
  226. outfile = "%s/%s" % (self.SUBDIR, self.FILENAME)
  227. timeline.reverse() # reorder from oldest to newest
  228. start = 0
  229. for i in range(start, self.TOTAL_PAGES):
  230. fn = outfile % i
  231. with open(fn, 'w') as f:
  232. self.written.append(fn)
  233. prev = self.PPP * i
  234. curr = self.PPP * (i+1)
  235. sliced = timeline[prev:curr]
  236. sliced.reverse()
  237. f.write(self.singlepage(template, tagcloud, sliced, i, "."))
  238. return
  239. import argparse
  240. if __name__ == "__main__":
  241. def sort(filename):
  242. def export(new_content, new_filename):
  243. with open(new_filename, 'w') as f:
  244. print(file=f)
  245. for post in new_content:
  246. print(post.timestamp, file=f)
  247. print("".join(post.message), file=f)
  248. return
  249. posts = parse_txt(filename)
  250. posts.sort(key=lambda e: e.get_epoch_time())
  251. outfile = ("%s.sorted" % filename)
  252. print("Sorted text written to ", outfile)
  253. export(reversed(posts), outfile)
  254. def get_args():
  255. p = argparse.ArgumentParser()
  256. p.add_argument("template", help="an html template file")
  257. p.add_argument("content", help="text file for microblog content")
  258. p.add_argument("--sort", \
  259. help="sorts content from oldest to newest"
  260. " (this is a separate operation from page generation)", \
  261. action="store_true")
  262. p.add_argument("--skip-fetch", \
  263. help="skips fetching profile data from remote sources;"
  264. " has no effect if webring is not enabled",\
  265. action="store_true")
  266. args = p.parse_args()
  267. if args.sort:
  268. sort(args.content)
  269. exit()
  270. return args.template, args.content, args.skip_fetch
  271. # assume relative path
  272. def demote_css(template, css_list, level=1):
  273. prepend = ""
  274. if level == 1:
  275. prepend = '.'
  276. else:
  277. for i in range(level):
  278. prepend = ("../%s" % prepend)
  279. tpl = template
  280. for css in css_list:
  281. tpl = tpl.replace(css, ("%s%s" % (prepend, css) ))
  282. return tpl
  283. def writepage(template, timeline, tagcloud, config, subdir = None):
  284. count = len(timeline)
  285. html = ""
  286. with open(template,'r') as f:
  287. html = f.read()
  288. try:
  289. p = config["postsperpage"]
  290. pagectrl = Paginator(count, p, subdir)
  291. except ZeroDivisionError as e:
  292. print("error: ",e, ". check 'postsperpage' in config", file=sys.stderr)
  293. exit()
  294. except Exception as e:
  295. print("error: ",e, ("(number of posts = %i)" % count), file=sys.stderr)
  296. exit()
  297. latest = timeline[:pagectrl.PPP]
  298. link_from_top = "./tags/%s/latest.html"
  299. link_from_subdir = "../tags/%s/latest.html"
  300. link_from_tagdir = "../%s/latest.html"
  301. cloud = ""
  302. level = 1
  303. is_tagline = False
  304. if subdir == None: # if top level page
  305. cloud = make_tagcloud(tagcloud, link_from_top)
  306. print(pagectrl.singlepage(html, cloud, latest))
  307. cloud = make_tagcloud(tagcloud, link_from_subdir)
  308. else:
  309. if subdir != "webring": # timelines per tag
  310. is_tagline = True
  311. level += 1
  312. cloud = make_tagcloud(tagcloud, link_from_tagdir)
  313. else:
  314. cloud = make_tagcloud(tagcloud, link_from_subdir)
  315. demoted = demote_css(html, config["relative_css"], level)
  316. filename = "%s/latest.html" % subdir
  317. with open(filename, 'w') as f: # landing page for tag
  318. pagectrl.written.append(filename)
  319. page = pagectrl.singlepage(demoted, cloud, latest, p=".")
  320. f.write(page)
  321. pagectrl.paginate(
  322. demote_css(html, config["relative_css"], level),
  323. cloud, timeline, is_tagline)
  324. return pagectrl.written
  325. import toml
  326. def load_settings():
  327. s = dict()
  328. filename = "settings.toml"
  329. if os.path.exists(filename):
  330. with open(filename, 'r') as f:
  331. s = toml.loads(f.read())
  332. else:
  333. s = None
  334. return s
  335. import json
  336. def export_profile(post_count, last_update, config):
  337. if "profile" not in config:
  338. return
  339. p = config["profile"]
  340. p["post-count"] = post_count
  341. p["last-updated"] = last_update
  342. if "username" not in p or "url" not in p:
  343. print("Warning: no profile exported", file=sys.stderr)
  344. return
  345. with open(config["file_output"], 'w') as f:
  346. print(json.dumps(p), file=f)
  347. def get_webring(f_cfg):
  348. import pycurl
  349. from io import BytesIO
  350. def get_proxy():
  351. proxy = ""
  352. if "http_proxy" in os.environ:
  353. proxy = os.environ["http_proxy"]
  354. elif "https_proxy" in os.environ:
  355. proxy = os.environ["https_proxy"]
  356. host = proxy[proxy.rfind('/') + 1: proxy.rfind(':')]
  357. port = proxy[proxy.rfind(':') + 1:]
  358. foo = proxy.find("socks://") >= 0 or proxy.find("socks5h://")
  359. return host, int(port), foo
  360. def fetch(url_list):
  361. curl = pycurl.Curl()
  362. if "http_proxy" in os.environ or "https_proxy" in os.environ:
  363. hostname, port_no, is_socks = get_proxy()
  364. curl.setopt(pycurl.PROXY, hostname)
  365. curl.setopt(pycurl.PROXYPORT, port_no)
  366. if is_socks:
  367. curl.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5_HOSTNAME)
  368. datum = []
  369. meta = []
  370. for url in url_list:
  371. buf = BytesIO()
  372. curl.setopt(curl.WRITEDATA, buf)
  373. curl.setopt(pycurl.URL, url)
  374. try:
  375. curl.perform()
  376. datum.append(buf)
  377. meta.append(curl.getinfo(curl.CONTENT_TYPE))
  378. except pycurl.error as e:
  379. print(e,": ", url, file=sys.stderr)
  380. # print(buf.getvalue(),"\n\t", curl.getinfo(curl.CONTENT_TYPE), file=sys.stderr)
  381. curl.close()
  382. assert(len(datum) == len(meta))
  383. return datum, meta
  384. def to_json(curl_outs):
  385. json_objs = []
  386. for buf in curl_outs:
  387. try:
  388. json_objs.append(json.loads(buf.getvalue()))
  389. except Exception as e:
  390. print(e)
  391. return json_objs
  392. def render(profiles, template):
  393. rendered = []
  394. SHORT_BIO_LIMIT = 150
  395. for profile in profiles:
  396. try:
  397. epoch_timestamp = profile["last-updated"]
  398. if not isinstance(epoch_timestamp, int):
  399. epoch_timestamp = 0
  400. post_count = profile["post-count"]
  401. if not isinstance(post_count, int):
  402. post_count = 0
  403. self_desc = profile["short-bio"]
  404. if len(profile["short-bio"]) >= SHORT_BIO_LIMIT:
  405. self_desc = profile["short-bio"][:SHORT_BIO_LIMIT] + "..."
  406. foo = template.format(
  407. __avatar__=escape(profile["avatar"]),
  408. __handle__=escape(profile["username"]),
  409. __url__=escape(profile["url"]),
  410. __post_count__ = post_count,
  411. __shortbio__= escape(self_desc),
  412. __lastupdated__= strftime(
  413. "%Y %b %d", localtime(epoch_timestamp)) )
  414. rendered.append(foo)
  415. except KeyError as e:
  416. print("remote profile is missing key: ", e, file=sys.stderr)
  417. print("\tsource: ", profile, file=sys.stderr)
  418. return rendered
  419. def get_avatars(profiles, save_path, img_src):
  420. import hashlib
  421. imgs, info = fetch([p["avatar"] for p in profiles])
  422. length = len(imgs)
  423. if length != len(profiles) or length == 0:
  424. print("error in retrieving images", file=sys.stderr)
  425. return
  426. for i in range(0,length):
  427. content_type = info[i].split('/')
  428. ext = content_type.pop()
  429. if content_type.pop() != "image":
  430. print("\tskip: not an image", file=sys.stderr)
  431. continue
  432. data = imgs[i].getvalue()
  433. h = hashlib.sha1(data).hexdigest()
  434. filename = "%s.%s" % (h, ext)
  435. path = "%s/%s" % (save_path, filename)
  436. profiles[i]["avatar"] = "%s/%s" % (img_src, filename)
  437. if not os.path.isfile(path):
  438. with open(path, "wb") as f:
  439. f.write(data)
  440. j, m = fetch(f_cfg["list"])
  441. list_of_json_objs = to_json(j)
  442. if list_of_json_objs == []:
  443. print("no remote profiles loaded", file=sys.stderr)
  444. return []
  445. if f_cfg["internal-avatars"]["enabled"]:
  446. a = f_cfg["internal-avatars"]["local_path_to_avatars"]
  447. b = f_cfg["internal-avatars"]["path_to_avatars"]
  448. get_avatars(list_of_json_objs, a, b)
  449. try:
  450. list_of_json_objs.sort(key=lambda e: e["last-updated"], reverse=True)
  451. except KeyError: pass
  452. return render(list_of_json_objs, f_cfg["format"])
  453. def main():
  454. tpl, content, skip_fetch = get_args()
  455. cfg = load_settings()
  456. if cfg == None:
  457. print("exit: no settings.toml found.", file=sys.stderr)
  458. return
  459. if "post" not in cfg:
  460. print("exit: table 'post' absent in settings.toml", file=sys.stderr)
  461. return
  462. if "page" not in cfg:
  463. print("exit: table 'page' absent in settings.toml", file=sys.stderr)
  464. return
  465. p = parse_txt(content)
  466. tl, tc, tg = get_posts(p, cfg["post"])
  467. if tl == []:
  468. return
  469. # main timeline
  470. updated = []
  471. updated += writepage(tpl, tl, tc, cfg["page"])
  472. # timeline per tag
  473. if tc != dict() and tg != dict():
  474. if not os.path.exists("tags"):
  475. os.mkdir("tags")
  476. for key in tg.keys():
  477. tagline = []
  478. for index in tg[key]:
  479. tagline.append(tl[index])
  480. # [1:] means to omit hashtag from dir name
  481. updated += writepage(
  482. tpl, tagline, tc, cfg["page"], \
  483. subdir="tags/%s" % key[1:] \
  484. )
  485. if "webring" in cfg:
  486. if cfg["webring"]["enabled"] == True:
  487. export_profile(
  488. len(p), p[0].get_epoch_time(), cfg["webring"] )
  489. if not skip_fetch:
  490. fellows = get_webring(cfg["webring"]["following"] )
  491. if fellows != []:
  492. updated += writepage(
  493. tpl, fellows, tc, cfg["page"], subdir="webring")
  494. with open("updatedfiles.txt", 'w') as f:
  495. for filename in updated:
  496. print(filename, file=f) # sys.stderr)
  497. if "latestpage" in cfg:
  498. print(cfg["latestpage"], file=f)
  499. if "latestpages" in cfg:
  500. for page in cfg["latestpages"]:
  501. print(page, file=f)
  502. try:
  503. main()
  504. except KeyError as e:
  505. traceback.print_exc()
  506. print("\n\tA key may be missing from your settings file.", file=sys.stderr)
  507. except dateutil.parser._parser.ParserError:
  508. traceback.print_exc()
  509. print("\n\tFailed to interpret a date from string..",
  510. "\n\tYour file of posts may be malformed.",
  511. "\n\tCheck if your file starts with a line break.", file=sys.stderr)
  512. except toml.decoder.TomlDecodeError:
  513. traceback.print_exc()
  514. print("\n\tYour configuration file is malformed.")
  515. except FileNotFoundError as e:
  516. traceback.print_exc()
  517. print("\n\tA potential cause is attempting to save a file to a folder that does not exist.")