lps_gen.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. # -*- coding: utf-8 -*-
  2. #
  3. # SPDX-License-Identifier: CC0-1.0
  4. #
  5. # This file is part of lpschedule-generator.
  6. #
  7. import json
  8. import re
  9. import sys
  10. import pkg_resources as pkgr
  11. import pytz
  12. from argparse import ArgumentParser
  13. from collections import OrderedDict
  14. from datetime import datetime
  15. from os import path
  16. from bs4 import BeautifulSoup
  17. from icalendar import Calendar, Event, vCalAddress, vText, vDatetime
  18. from jinja2 import Environment, FileSystemLoader
  19. from jinja2.exceptions import TemplateNotFound
  20. from mistune import Renderer, Markdown
  21. from pytz import timezone
  22. from unidecode import unidecode
  23. from lpschedule_generator._version import __version__
  24. # Python dictionary that will contain the lp schedule.
  25. lps_dict = OrderedDict()
  26. # Python dictionary that will contain the lp speakers.
  27. lpspeakers_dict = OrderedDict()
  28. def read_file(filename):
  29. """Read file and return it as a string.
  30. :param str filename: Absolute pathname of the file.
  31. """
  32. content = ""
  33. try:
  34. with open(filename, "r") as f:
  35. for line in f:
  36. content = content + line
  37. except IOError:
  38. print("Error: unable to open {}".format(filename))
  39. return content
  40. def write_file(filename, filecontent):
  41. """Write `filecontent` to `filename`.
  42. :param str filename:
  43. Absolute pathname of the file.
  44. :param str filecontent:
  45. Data to write to `filename`.
  46. """
  47. file_ = None
  48. try:
  49. file_ = open(filename, "w")
  50. file_.write(filecontent)
  51. file_.close()
  52. except IOError:
  53. print("Error creating and writing content to {}".format(filename))
  54. exit(1)
  55. def json_write(filename, obj):
  56. """Serialize `obj` to JSON formatted `str` to `filename`.
  57. `filename` is written relative to the current working directory.
  58. """
  59. write_file(filename, json.dumps(obj, ensure_ascii=False, indent=4))
  60. def json_read(filename):
  61. """Deserialize JSON from `filename` into Python object.
  62. """
  63. if not path.isfile(filename):
  64. return False
  65. return json.loads(read_file(filename), object_pairs_hook=OrderedDict)
  66. def template_read(name):
  67. """Return template as `str`.
  68. """
  69. p = "lpschedule_generator"
  70. r = "data/{}.jinja2".format(name)
  71. t = None
  72. try:
  73. t = pkgr.resource_string(p, r).decode("utf-8")
  74. except Exception as e:
  75. print(e, file=sys.stderr)
  76. return t
  77. class LPiCal(object):
  78. """Used for producing iCal for LP schedule.
  79. """
  80. def __init__(self, lps_dict, lp_year):
  81. self.lps_dict = lps_dict
  82. self.lp_year = str(lp_year)
  83. # Matches strings like '09:45 - 10:30: Lorem ipsum dolor sit.'
  84. self.timeslot_re = re.compile(r"(\d+:\d+).+?(\d+:\d+)\s*[:-]?\s*(.+\b)?")
  85. # Matches strings like 'Saturday, March 19'
  86. self.month_day_re = re.compile(r"\w+,\s*([a-zA-Z]+)\s*(\d+)")
  87. self.cal = Calendar()
  88. self.cal.add("prodid", "-//lpschedule generator//mxm.dk//")
  89. self.cal.add("version", "2.0")
  90. self.cal.add("x-wr-calname", "LibrePlanet {}".format(self.lp_year))
  91. # RFC 2445 requires DTSTAMP to be in UTC. DTSTAMP is used in
  92. # VEVENT (Event object, see `add_event` method).
  93. self.dtstamp = vDatetime(datetime.now(pytz.utc))
  94. # used to generate uid for ical.
  95. self.ucounter = 0
  96. def gen_uid(self):
  97. """Returns an unique id.
  98. Used for Event object.
  99. """
  100. self.ucounter = self.ucounter + 1
  101. return "{}@LP{}@libreplanet.org".format(str(self.ucounter), self.lp_year)
  102. def get_timeslot(self, s):
  103. """Get start and end time for a timeslot.
  104. """
  105. timeslot = self.timeslot_re.search(s)
  106. if not timeslot:
  107. return None, None, None
  108. t_start = timeslot.group(1)
  109. t_end = timeslot.group(2)
  110. name = timeslot.group(3) or ""
  111. return t_start, t_end, name
  112. def get_month_day(self, s):
  113. """Get month and day.
  114. """
  115. month_day = self.month_day_re.search(s)
  116. if (not month_day) or (len(month_day.groups()) < 2):
  117. return None, None
  118. month = month_day.group(1)
  119. day = month_day.group(2)
  120. return month, day
  121. def mk_datetime(self, month, day, time):
  122. """Returns datetime object (EST).
  123. """
  124. # Day %d
  125. # Month %B
  126. # Year %Y
  127. # Hour %H (24-hr)
  128. # Minute %M (zero padded)
  129. # Second %S (zero padded)
  130. datetime_fmt = "%d %B %Y %H:%M:%S"
  131. eastern = timezone("US/Eastern")
  132. hour = time.split(":")[0]
  133. minute = time.split(":")[1]
  134. datetime_str = "{} {} {} {}:{}:{}".format(
  135. day, month, self.lp_year, hour.zfill(2), minute.zfill(2), "00"
  136. )
  137. dt_object = datetime.strptime(datetime_str, datetime_fmt)
  138. return vDatetime(eastern.localize(dt_object))
  139. def mk_attendee(self, speaker):
  140. """Make Attendee to be added to an Event object.
  141. See `add_event` method.
  142. """
  143. # Get rid of HTML (<a> element, etc) in `speaker`
  144. speaker = BeautifulSoup(speaker, "html.parser").get_text()
  145. attendee = vCalAddress("invalid:nomail")
  146. attendee.params["cn"] = vText(speaker)
  147. attendee.params["ROLE"] = vText("REQ-PARTICIPANT")
  148. attendee.params["CUTYPE"] = vText("INDIVIDUAL")
  149. return attendee
  150. def add_event(self, month, day, t_start, t_end, t_name, session, session_info):
  151. """Adds event to calendar.
  152. """
  153. event = Event()
  154. event["uid"] = self.gen_uid()
  155. event["dtstamp"] = self.dtstamp
  156. event["class"] = vText("PUBLIC")
  157. event["status"] = vText("CONFIRMED")
  158. event["method"] = vText("PUBLISH")
  159. if session == "st-from-ts":
  160. event["summary"] = t_name
  161. else:
  162. event["summary"] = session
  163. event["location"] = vText(session_info["room"])
  164. # Get rid of HTML in 'desc'
  165. desc = BeautifulSoup(
  166. " ".join(session_info["desc"]).replace("\n", " "), "html.parser"
  167. ).get_text()
  168. event["description"] = desc
  169. # Add speakers
  170. for speaker in session_info["speakers"]:
  171. event.add("attendee", self.mk_attendee(speaker), encode=0)
  172. dt_start = self.mk_datetime(month, day, t_start)
  173. dt_end = self.mk_datetime(month, day, t_end)
  174. event["dtstart"] = dt_start
  175. event["dtend"] = dt_end
  176. # Add to calendar
  177. self.cal.add_component(event)
  178. return event
  179. def gen_ical(self):
  180. """Parse LP schedule dict and generate iCal Calendar object.
  181. """
  182. for day_str, timeslots in self.lps_dict.items():
  183. month, day = self.get_month_day(day_str)
  184. if not month:
  185. # month, day not specified; cannot generate ical for
  186. # this day
  187. continue
  188. for timeslot_str, sessions in timeslots.items():
  189. t_start, t_end, t_name = self.get_timeslot(timeslot_str)
  190. if not t_start:
  191. # timeslot not specified; cannot generate ical for
  192. # this timeslot
  193. continue
  194. for session, session_info in sessions.items():
  195. self.add_event(
  196. month, day, t_start, t_end, t_name, session, session_info
  197. )
  198. return self.cal.to_ical().decode("utf-8")
  199. def to_ical(self):
  200. """Writes iCal to disk.
  201. """
  202. filename = "lp{}-schedule.ics".format(self.lp_year)
  203. write_file(filename, self.gen_ical())
  204. return filename
  205. class LPSRenderer(Renderer):
  206. """Helps convert Markdown version of LP schedule to a dictionary.
  207. """
  208. def __init__(self, **kwargs):
  209. super(LPSRenderer, self).__init__(**kwargs)
  210. self.last_day = None
  211. self.last_time_slot = None
  212. self.last_session = None
  213. # Denotes the no. of the paragraph under a session; this
  214. # information will be helpful in identifying the "speaker",
  215. # "room" and session "description".
  216. self.no_paragraph = None
  217. # Contains a list of speakers' names which are marked up for
  218. # auto-linking[1], but don't have an id to link to.
  219. #
  220. # [1]: Markup for auto-linking speakers is [John Hacker]().
  221. self.speakers_noids = []
  222. # If it is 'False', then the 'speaker.ids' file was not found;
  223. # otherwise it is an OrderedDict containing the mapping of
  224. # speakers and their corresponding id.
  225. self.speakers_ids = json_read("speakers.ids")
  226. def get_uid(self, speaker):
  227. """Generate unique id for `speaker`.
  228. Returns unique id for `speaker` if it exists; `False` otherwise.
  229. """
  230. if not self.speakers_ids:
  231. # There is no speakers_ids OrderedDict available.
  232. return False
  233. speaker = str(speaker)
  234. if speaker in self.speakers_ids.keys():
  235. return self.speakers_ids[speaker]
  236. else:
  237. # speaker not found in speakers_ids OrderedDict.
  238. return False
  239. def _check_session_title_exists(self):
  240. """Checks if :py:attr:`.last_session` is set.
  241. If :py:attr:`.last_session` is not set and first paragraph is
  242. encountered, then it is assumed that the current timeslot is in
  243. the following format::
  244. ### 9:00 - 10:45: Opening Keynote - Beyond unfree...
  245. [Cory Doctorow][doctorow]
  246. Room 32-123
  247. Software has eaten the world...
  248. This method is meant to be called from the
  249. :py:method:`.paragraph` method.
  250. """
  251. if not self.last_session and self.no_paragraph == 0:
  252. # Current timeslot has only one session and there
  253. # no session title.
  254. #
  255. # st-from-ts -> session title from time slot.
  256. lps_dict[self.last_day][self.last_time_slot]["st-from-ts"] = OrderedDict()
  257. self.last_session = "st-from-ts"
  258. def _process_video(self, text):
  259. """Process the video text.
  260. If it's a link, just extract the link and return it.
  261. This method is meant to be called from the
  262. :py:method:`.paragraph` method.
  263. """
  264. soup = BeautifulSoup(text, "html.parser")
  265. links = soup.find_all("a")
  266. if len(links) == 0:
  267. # no links found, so
  268. return text
  269. # link(s) found, return the first link's href.
  270. return links[0]["href"]
  271. def link(self, link, title, text):
  272. # Here, we catch speaker names that have to be autolinked and
  273. # autolink them if there is an id available for the speaker.
  274. if not link:
  275. # We found a speaker that has to be autolinked.
  276. # Here, `text` is the speaker' name.
  277. id_ = self.get_uid(text)
  278. if id_:
  279. link = "speakers.html#{}".format(id_)
  280. else:
  281. # Oh no, there is no id for this speaker.
  282. self.speakers_noids.append(text)
  283. # Don't linkify this speaker; they don't have an id.
  284. return text
  285. return super(LPSRenderer, self).link(link, title, text)
  286. def header(self, text, level, raw=None):
  287. global lps_dict
  288. if level == 2:
  289. # Add new day.
  290. lps_dict[text] = OrderedDict()
  291. self.last_day = text
  292. elif level == 3:
  293. # Add new timeslot
  294. lps_dict[self.last_day][text] = OrderedDict()
  295. self.last_time_slot = text
  296. # New timeslot, reset paragraphs processed and
  297. # last session.
  298. self.no_paragraph = 0
  299. self.last_session = None
  300. elif level == 4:
  301. # Add new session
  302. lps_dict[self.last_day][self.last_time_slot][text] = OrderedDict()
  303. self.last_session = text
  304. # We found a new session; set no of paragraphs processed
  305. # to 0.
  306. self.no_paragraph = 0
  307. return super(LPSRenderer, self).header(text, level, raw)
  308. def paragraph(self, text):
  309. global lps_dict
  310. self._check_session_title_exists()
  311. p = super(LPSRenderer, self).paragraph(text)
  312. if self.no_paragraph == 0:
  313. # Speaker
  314. speakers = text.split(", ")
  315. lps_dict[self.last_day][self.last_time_slot][self.last_session][
  316. "speakers"
  317. ] = speakers
  318. self.no_paragraph = self.no_paragraph + 1
  319. elif self.no_paragraph == 1:
  320. # Room
  321. lps_dict[self.last_day][self.last_time_slot][self.last_session][
  322. "room"
  323. ] = text
  324. self.no_paragraph = self.no_paragraph + 1
  325. elif self.no_paragraph == 2:
  326. lps_dict[self.last_day][self.last_time_slot][self.last_session][
  327. "video"
  328. ] = self._process_video(text)
  329. # Initialize description
  330. lps_dict[self.last_day][self.last_time_slot][self.last_session]["desc"] = []
  331. self.no_paragraph = self.no_paragraph + 1
  332. elif self.no_paragraph > 1:
  333. lps_dict[self.last_day][self.last_time_slot][self.last_session][
  334. "desc"
  335. ].append(text)
  336. return p
  337. class LPSpeakersRenderer(Renderer):
  338. """Helps convert Markdown version of LP speakers to a dictionary.
  339. """
  340. def __init__(self, **kwargs):
  341. super(LPSpeakersRenderer, self).__init__(**kwargs)
  342. global lpspeakers_dict
  343. lpspeakers_dict = OrderedDict()
  344. lpspeakers_dict["keynote-speakers"] = []
  345. lpspeakers_dict["speakers"] = []
  346. # Type of present speaker being processed; can either be
  347. # 'keynote-speakers' or 'speakers'.
  348. self.speaker_type = None
  349. # Maintain a dict of speakers and their IDs.
  350. self.speakers_ids = OrderedDict()
  351. def mk_uid(self, speaker_block):
  352. """Returns a unique id.
  353. """
  354. # 'John HÖcker, Onion Project' -> 'John HÖcker'
  355. speaker = str(speaker_block.split(", ")[0])
  356. # 'John HÖcker' -> 'John Hacker'
  357. ascii_speaker = unidecode(speaker)
  358. # 'John Hacker' -> 'hacker'
  359. id_ = ascii_speaker.split()[-1].lower()
  360. if id_ not in self.speakers_ids.values():
  361. self.speakers_ids[speaker] = id_
  362. return id_
  363. else:
  364. # 'John Hacker' -> 'john_hacker'
  365. id_ = "_".join([s.lower() for s in ascii_speaker.split()])
  366. self.speakers_ids[speaker] = id_
  367. return id_
  368. def header(self, text, level, raw=None):
  369. global lpspeakers_dict
  370. if level == 1:
  371. self.speaker_type = "keynote-speakers"
  372. lpspeakers_dict[self.speaker_type].append(OrderedDict())
  373. lpspeakers_dict[self.speaker_type][-1]["speaker"] = text
  374. lpspeakers_dict[self.speaker_type][-1]["id"] = self.mk_uid(text)
  375. lpspeakers_dict[self.speaker_type][-1]["bio"] = []
  376. elif level == 2:
  377. self.speaker_type = "speakers"
  378. lpspeakers_dict[self.speaker_type].append(OrderedDict())
  379. lpspeakers_dict[self.speaker_type][-1]["speaker"] = text.split(", ")[0]
  380. lpspeakers_dict[self.speaker_type][-1]["id"] = self.mk_uid(text)
  381. lpspeakers_dict[self.speaker_type][-1]["bio"] = []
  382. return super(LPSpeakersRenderer, self).header(text, level, raw)
  383. def image(self, src, title, text):
  384. global lpspeakers_dict
  385. lpspeakers_dict[self.speaker_type][-1]["img_url"] = src
  386. lpspeakers_dict[self.speaker_type][-1]["img_alt"] = text
  387. return super(LPSpeakersRenderer, self).image(src, title, text)
  388. def paragraph(self, text):
  389. global lpspeakers_dict
  390. p = super(LPSpeakersRenderer, self).paragraph(text)
  391. if text.startswith("<img"):
  392. # ignore
  393. return p
  394. lpspeakers_dict[self.speaker_type][-1]["bio"].append(text)
  395. return p
  396. class LPSMarkdown(Markdown):
  397. """Converts MD LP schedule to a dictionary.
  398. Returns the Markdown version of LP schedule as a dictionary.
  399. """
  400. def __init__(self, inline=None, block=None, **kwargs):
  401. """
  402. Initialize with LPSRenderer as the renderer.
  403. """
  404. self.sessions_renderer = LPSRenderer()
  405. super(LPSMarkdown, self).__init__(
  406. renderer=self.sessions_renderer, inline=None, block=None, **kwargs
  407. )
  408. def parse(self, text):
  409. global lps_dict
  410. lps_dict = OrderedDict()
  411. html = super(LPSMarkdown, self).parse(text)
  412. # Write list of speakers with no ids to `speakers.noids`.
  413. json_write("speakers.noids", self.sessions_renderer.speakers_noids)
  414. return lps_dict
  415. class LPSpeakersMarkdown(Markdown):
  416. """Converts MD LP speakers to a dictionary.
  417. Returns the Markdown version of LP speakers as a dictionary.
  418. """
  419. def __init__(self, inline=None, block=None, **kwargs):
  420. """
  421. Initialize with LPSpeakersRenderer as the renderer.
  422. """
  423. self.speakers_renderer = LPSpeakersRenderer()
  424. super(LPSpeakersMarkdown, self).__init__(
  425. renderer=self.speakers_renderer, inline=None, block=None, **kwargs
  426. )
  427. def parse(self, text):
  428. global lpspeakers_dict
  429. html = super(LPSpeakersMarkdown, self).parse(text)
  430. # Write mapping of speakers and their ids to `speakers.ids`.
  431. json_write("speakers.ids", self.speakers_renderer.speakers_ids)
  432. return lpspeakers_dict
  433. def RenderHTML(lp_dict, template_name):
  434. """Renders LP schedule/speakers in HTML from a python dictionary.
  435. Returns the HTML as a string.
  436. """
  437. template_content = template_read(template_name)
  438. if not template_content:
  439. exit("Unable to read {} template".format(template_name))
  440. template = Environment(trim_blocks=True, lstrip_blocks=True).from_string(
  441. template_content
  442. )
  443. lp_html = template.render(lp_dict=lp_dict)
  444. return str(BeautifulSoup(lp_html, "html.parser")).strip()
  445. def main():
  446. parser = ArgumentParser()
  447. group = parser.add_mutually_exclusive_group()
  448. group.add_argument(
  449. "-sc", "--schedule", action="store_true", help="Generate LP schedule"
  450. )
  451. group.add_argument(
  452. "-sp", "--speakers", action="store_true", help="Generate LP speakers"
  453. )
  454. parser.add_argument(
  455. "--ical", type=int, help="Specify LP year as argument; " + "generates iCal"
  456. )
  457. parser.add_argument(
  458. "--version",
  459. action="version",
  460. version="lpschedule-generator version {}".format(__version__),
  461. help="Show version number and exit.",
  462. )
  463. parser.add_argument("lp_md", help="Path to the LP markdown.")
  464. args = parser.parse_args()
  465. lp_md_content = read_file(path.abspath(args.lp_md))
  466. if lp_md_content:
  467. template_name = ""
  468. if args.schedule:
  469. markdown = LPSMarkdown()
  470. template_name = "schedule"
  471. elif args.speakers:
  472. markdown = LPSpeakersMarkdown()
  473. template_name = "speakers"
  474. else:
  475. parser.error("No action requested, add -s or -sp switch")
  476. lp_dict = markdown(lp_md_content)
  477. lp_html = RenderHTML(lp_dict, template_name)
  478. if args.ical and args.schedule:
  479. LPiCal(lp_dict, args.ical).to_ical()
  480. else:
  481. exit("Unable to read LP markdown")
  482. if lp_html:
  483. # stdout lps html
  484. print(lp_html)
  485. else:
  486. print("Error generating LP HTML.")
  487. if __name__ == "__main__":
  488. main()