lps_gen.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2018 lpschedule-generator contributors. See CONTRIBUTORS.
  4. #
  5. # This file is part of lpschedule-generator.
  6. #
  7. # lpschedule-generator is free software: you can redistribute it
  8. # and/or modify it under the terms of the GNU General Public License
  9. # as published by the Free Software Foundation, either version 3 of
  10. # the License, or (at your option) any later version.
  11. #
  12. # lpschedule-generator is distributed in the hope that it will be
  13. # useful, but WITHOUT ANY WARRANTY; without even the implied
  14. # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  15. # See the GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with lpschedule-generator (see COPYING). If not, see
  19. # <http://www.gnu.org/licenses/>.
  20. import json
  21. import re
  22. import sys
  23. import pytz
  24. from argparse import ArgumentParser
  25. from collections import OrderedDict
  26. from datetime import datetime
  27. from os import path
  28. from bs4 import BeautifulSoup
  29. from icalendar import Calendar, Event, vCalAddress, vText, vDatetime
  30. from jinja2 import Environment, FileSystemLoader
  31. from jinja2.exceptions import TemplateNotFound
  32. from mistune import Renderer, Markdown
  33. from pytz import timezone
  34. from unidecode import unidecode
  35. from lpschedule_generator._version import __version__
  36. # unicode magic
  37. reload(sys)
  38. sys.setdefaultencoding('utf-8')
  39. # Python dictionary that will contain the lp schedule.
  40. lps_dict = OrderedDict()
  41. # Python dictionary that will contain the lp speakers.
  42. lpspeakers_dict = OrderedDict()
  43. def read_file(filename):
  44. """Read file and return it as a string.
  45. :param str filename: Absolute pathname of the file.
  46. """
  47. content = ''
  48. try:
  49. with open(filename, 'rb') as f:
  50. for line in f:
  51. content = content + line
  52. except IOError:
  53. print('Error: unable to open {}'.format(filename))
  54. return content
  55. def write_file(filename, filecontent):
  56. """Write `filecontent` to `filename`.
  57. :param str filename:
  58. Absolute pathname of the file.
  59. :param str filecontent:
  60. Data to write to `filename`.
  61. """
  62. file_ = None
  63. try:
  64. file_ = open(filename, 'wb')
  65. file_.write(filecontent)
  66. file_.close()
  67. except IOError:
  68. print('Error creating and writing content to {}'.format(filename))
  69. exit(1)
  70. def json_write(filename, obj):
  71. """Serialize `obj` to JSON formatted `str` to `filename`.
  72. `filename` is written relative to the current working directory.
  73. """
  74. write_file(filename, json.dumps(obj, ensure_ascii=False, indent=4))
  75. def json_read(filename):
  76. """Deserialize JSON from `filename` into Python object.
  77. """
  78. if not path.isfile(filename):
  79. return False
  80. return json.loads(read_file(filename),
  81. object_pairs_hook=OrderedDict)
  82. class LPiCal(object):
  83. """Used for producing iCal for LP schedule.
  84. """
  85. def __init__(self, lps_dict, lp_year):
  86. self.lps_dict = lps_dict
  87. self.lp_year = str(lp_year)
  88. # Matches strings like '09:45 - 10:30: Lorem ipsum dolor sit.'
  89. self.timeslot_re = re.compile(r'(\d+:\d+).+?(\d+:\d+)'
  90. r'\s*[:-]?\s*(.+\b)?')
  91. # Matches strings like 'Saturday, March 19'
  92. self.month_day_re = re.compile(r'\w+,\s*([a-zA-Z]+)\s*(\d+)')
  93. self.cal = Calendar()
  94. self.cal.add('prodid', '-//lpschedule generator//mxm.dk//')
  95. self.cal.add('version', '2.0')
  96. self.cal.add('x-wr-calname', 'LibrePlanet %s' % self.lp_year)
  97. # RFC 2445 requires DTSTAMP to be in UTC. DTSTAMP is used in
  98. # VEVENT (Event object, see `add_event` method).
  99. self.dtstamp = vDatetime(datetime.now(pytz.utc))
  100. # used to generate uid for ical.
  101. self.ucounter = 0
  102. def gen_uid(self):
  103. """Returns an unique id.
  104. Used for Event object.
  105. """
  106. self.ucounter = self.ucounter + 1
  107. return '%s@LP%s@libreplanet.org' % (str(self.ucounter),
  108. self.lp_year)
  109. def get_timeslot(self, s):
  110. """Get start and end time for a timeslot.
  111. """
  112. timeslot = self.timeslot_re.search(s)
  113. if not timeslot:
  114. return None, None, None
  115. t_start = timeslot.group(1)
  116. t_end = timeslot.group(2)
  117. name = timeslot.group(3) or ''
  118. return t_start, t_end, name
  119. def get_month_day(self, s):
  120. """Get month and day.
  121. """
  122. month_day = self.month_day_re.search(s)
  123. if (not month_day) or (len(month_day.groups()) < 2):
  124. return None, None
  125. month = month_day.group(1)
  126. day = month_day.group(2)
  127. return month, day
  128. def mk_datetime(self, month, day, time):
  129. """Returns datetime object (EST).
  130. """
  131. # Day %d
  132. # Month %B
  133. # Year %Y
  134. # Hour %H (24-hr)
  135. # Minute %M (zero padded)
  136. # Second %S (zero padded)
  137. datetime_fmt = '%d %B %Y %H:%M:%S'
  138. eastern = timezone('US/Eastern')
  139. hour = time.split(':')[0]
  140. minute = time.split(':')[1]
  141. datetime_str = '%s %s %s %s:%s:%s' % (day, month, self.lp_year,
  142. hour.zfill(2),
  143. minute.zfill(2),
  144. '00')
  145. dt_object = datetime.strptime(datetime_str, datetime_fmt)
  146. return vDatetime(eastern.localize(dt_object))
  147. def mk_attendee(self, speaker):
  148. """Make Attendee to be added to an Event object.
  149. See `add_event` method.
  150. """
  151. # Get rid of HTML (<a> element, etc) in `speaker`
  152. speaker = BeautifulSoup(speaker, 'html.parser').get_text()
  153. attendee = vCalAddress('invalid:nomail')
  154. attendee.params['cn'] = vText(speaker)
  155. attendee.params['ROLE'] = vText('REQ-PARTICIPANT')
  156. attendee.params['CUTYPE'] = vText('INDIVIDUAL')
  157. return attendee
  158. def add_event(self, month, day, t_start, t_end, t_name, session,
  159. session_info):
  160. """Adds event to calendar.
  161. """
  162. event = Event()
  163. event['uid'] = self.gen_uid()
  164. event['dtstamp'] = self.dtstamp
  165. event['class'] = vText('PUBLIC')
  166. event['status'] = vText('CONFIRMED')
  167. event['method'] = vText('PUBLISH')
  168. if session == 'st-from-ts':
  169. event['summary'] = t_name
  170. else:
  171. event['summary'] = session
  172. event['location'] = vText(session_info['room'])
  173. # Get rid of HTML in 'desc'
  174. desc = BeautifulSoup(' '.join(
  175. session_info['desc']).replace(
  176. '\n', ' '), 'html.parser').get_text()
  177. event['description'] = desc
  178. # Add speakers
  179. for speaker in session_info['speakers']:
  180. event.add('attendee', self.mk_attendee(speaker), encode=0)
  181. dt_start = self.mk_datetime(month, day, t_start)
  182. dt_end = self.mk_datetime(month, day, t_end)
  183. event['dtstart'] = dt_start
  184. event['dtend'] = dt_end
  185. # Add to calendar
  186. self.cal.add_component(event)
  187. return event
  188. def gen_ical(self):
  189. """Parse LP schedule dict and generate iCal Calendar object.
  190. """
  191. for day_str, timeslots in self.lps_dict.items():
  192. month, day = self.get_month_day(day_str)
  193. if not month:
  194. # month, day not specified; cannot generate ical for
  195. # this day
  196. continue
  197. for timeslot_str, sessions in timeslots.items():
  198. t_start, t_end, t_name = self.get_timeslot(timeslot_str)
  199. if not t_start:
  200. # timeslot not specified; cannot generate ical for
  201. # this timeslot
  202. continue
  203. for session, session_info in sessions.items():
  204. self.add_event(month, day, t_start, t_end, t_name,
  205. session, session_info)
  206. return self.cal.to_ical()
  207. def to_ical(self):
  208. """Writes iCal to disk.
  209. """
  210. filename = 'lp%s-schedule.ics' % self.lp_year
  211. write_file(filename, self.gen_ical())
  212. return filename
  213. class LPSRenderer(Renderer):
  214. """Helps convert Markdown version of LP schedule to a dictionary.
  215. """
  216. def __init__(self, **kwargs):
  217. super(LPSRenderer, self).__init__(**kwargs)
  218. self.last_day = None
  219. self.last_time_slot = None
  220. self.last_session = None
  221. # Denotes the no. of the paragraph under a session; this
  222. # information will be helpful in identifying the "speaker",
  223. # "room" and session "description".
  224. self.no_paragraph = None
  225. # Contains a list of speakers' names which are marked up for
  226. # auto-linking[1], but don't have an id to link to.
  227. #
  228. # [1]: Markup for auto-linking speakers is [John Hacker]().
  229. self.speakers_noids = []
  230. # If it is 'False', then the 'speaker.ids' file was not found;
  231. # otherwise it is an OrderedDict containing the mapping of
  232. # speakers and their corresponding id.
  233. self.speakers_ids = json_read('speakers.ids')
  234. def get_uid(self, speaker):
  235. """Generate unique id for `speaker`.
  236. Returns unique id for `speaker` if it exists; `False` otherwise.
  237. """
  238. if not self.speakers_ids:
  239. # There is no speakers_ids OrderedDict available.
  240. return False
  241. speaker = unicode(speaker)
  242. if speaker in self.speakers_ids.keys():
  243. return self.speakers_ids[speaker]
  244. else:
  245. # speaker not found in speakers_ids OrderedDict.
  246. return False
  247. def _check_session_title_exists(self):
  248. """Checks if :py:attr:`.last_session` is set.
  249. If :py:attr:`.last_session` is not set and first paragraph is
  250. encountered, then it is assumed that the current timeslot is in
  251. the following format::
  252. ### 9:00 - 10:45: Opening Keynote - Beyond unfree...
  253. [Cory Doctorow][doctorow]
  254. Room 32-123
  255. Software has eaten the world...
  256. This method is meant to be called from the
  257. :py:method:`.paragraph` method.
  258. """
  259. if not self.last_session and self.no_paragraph == 0:
  260. # Current timeslot has only one session and there
  261. # no session title.
  262. #
  263. # st-from-ts -> session title from time slot.
  264. lps_dict[self.last_day][self.last_time_slot][
  265. 'st-from-ts'] = OrderedDict()
  266. self.last_session = 'st-from-ts'
  267. def _process_video(self, text):
  268. """Process the video text.
  269. If it's a link, just extract the link and return it.
  270. This method is meant to be called from the
  271. :py:method:`.paragraph` method.
  272. """
  273. soup = BeautifulSoup(text, 'html.parser')
  274. links = soup.find_all('a')
  275. if len(links) == 0:
  276. # no links found, so
  277. return text
  278. # link(s) found, return the first link's href.
  279. return links[0]['href']
  280. def link(self, link, title, text):
  281. # Here, we catch speaker names that have to be autolinked and
  282. # autolink them if there is an id available for the speaker.
  283. if not link:
  284. # We found a speaker that has to be autolinked.
  285. # Here, `text` is the speaker' name.
  286. id_ = self.get_uid(text)
  287. if id_:
  288. link = 'speakers.html#%s' % id_
  289. else:
  290. # Oh no, there is no id for this speaker.
  291. self.speakers_noids.append(text)
  292. # Don't linkify this speaker; they don't have an id.
  293. return text
  294. return super(LPSRenderer, self).link(link, title, text)
  295. def header(self, text, level, raw=None):
  296. global lps_dict
  297. if level == 2:
  298. # Add new day.
  299. lps_dict[text] = OrderedDict()
  300. self.last_day = text
  301. elif level == 3:
  302. # Add new timeslot
  303. lps_dict[self.last_day][text] = OrderedDict()
  304. self.last_time_slot = text
  305. # New timeslot, reset paragraphs processed and
  306. # last session.
  307. self.no_paragraph = 0
  308. self.last_session = None
  309. elif level == 4:
  310. # Add new session
  311. lps_dict[self.last_day][self.last_time_slot][
  312. text] = OrderedDict()
  313. self.last_session = text
  314. # We found a new session; set no of paragraphs processed
  315. # to 0.
  316. self.no_paragraph = 0
  317. return super(LPSRenderer, self).header(text, level, raw)
  318. def paragraph(self, text):
  319. global lps_dict
  320. self._check_session_title_exists()
  321. p = super(LPSRenderer, self).paragraph(text)
  322. if self.no_paragraph == 0:
  323. # Speaker
  324. speakers = text.split(', ')
  325. lps_dict[self.last_day][self.last_time_slot][
  326. self.last_session]['speakers'] = speakers
  327. self.no_paragraph = self.no_paragraph + 1
  328. elif self.no_paragraph == 1:
  329. # Room
  330. lps_dict[self.last_day][self.last_time_slot][
  331. self.last_session]['room'] = text
  332. self.no_paragraph = self.no_paragraph + 1
  333. elif self.no_paragraph == 2:
  334. lps_dict[self.last_day][self.last_time_slot][
  335. self.last_session]['video'] = self._process_video(text)
  336. # Initialize description
  337. lps_dict[self.last_day][self.last_time_slot][
  338. self.last_session]['desc'] = []
  339. self.no_paragraph = self.no_paragraph + 1
  340. elif self.no_paragraph > 1:
  341. lps_dict[self.last_day][self.last_time_slot][
  342. self.last_session]['desc'].append(text)
  343. return p
  344. class LPSpeakersRenderer(Renderer):
  345. """Helps convert Markdown version of LP speakers to a dictionary.
  346. """
  347. def __init__(self, **kwargs):
  348. super(LPSpeakersRenderer, self).__init__(**kwargs)
  349. global lpspeakers_dict
  350. lpspeakers_dict = OrderedDict()
  351. lpspeakers_dict['keynote-speakers'] = []
  352. lpspeakers_dict['speakers'] = []
  353. # Type of present speaker being processed; can either be
  354. # 'keynote-speakers' or 'speakers'.
  355. self.speaker_type = None
  356. # Maintain a dict of speakers and their IDs.
  357. self.speakers_ids = OrderedDict()
  358. def mk_uid(self, speaker_block):
  359. """Returns a unique id.
  360. """
  361. # 'John HÖcker, Onion Project' -> 'John HÖcker'
  362. speaker = unicode(speaker_block.split(', ')[0])
  363. # 'John HÖcker' -> 'John Hacker'
  364. ascii_speaker = unidecode(speaker)
  365. # 'John Hacker' -> 'hacker'
  366. id_ = ascii_speaker.split()[-1].lower()
  367. if id_ not in self.speakers_ids.values():
  368. self.speakers_ids[speaker]= id_
  369. return id_
  370. else:
  371. # 'John Hacker' -> 'john_hacker'
  372. id_ = '_'.join([s.lower() for s in ascii_speaker.split()])
  373. self.speakers_ids[speaker] = id_
  374. return id_
  375. def header(self, text, level, raw=None):
  376. global lpspeakers_dict
  377. if level == 1:
  378. self.speaker_type = 'keynote-speakers'
  379. lpspeakers_dict[self.speaker_type].append(OrderedDict())
  380. lpspeakers_dict[self.speaker_type][-1]['speaker'] = text
  381. lpspeakers_dict[self.speaker_type][-1][
  382. 'id'] = self.mk_uid(text)
  383. lpspeakers_dict[self.speaker_type][-1][
  384. 'bio'] = []
  385. elif level == 2:
  386. self.speaker_type = 'speakers'
  387. lpspeakers_dict[self.speaker_type].append(OrderedDict())
  388. lpspeakers_dict[self.speaker_type][
  389. -1]['speaker'] = text.split(', ')[0]
  390. lpspeakers_dict[self.speaker_type][
  391. -1]['id'] = self.mk_uid(text)
  392. lpspeakers_dict[self.speaker_type][
  393. -1]['bio'] = []
  394. return super(LPSpeakersRenderer, self).header(text, level, raw)
  395. def image(self, src, title, text):
  396. global lpspeakers_dict
  397. lpspeakers_dict[self.speaker_type][-1]['img_url'] = src
  398. lpspeakers_dict[self.speaker_type][-1]['img_alt'] = text
  399. return super(LPSpeakersRenderer, self).image(src, title, text)
  400. def paragraph(self, text):
  401. global lpspeakers_dict
  402. p = super(LPSpeakersRenderer, self).paragraph(text)
  403. if text.startswith('<img'):
  404. # ignore
  405. return p
  406. lpspeakers_dict[self.speaker_type][-1]['bio'].append(text)
  407. return p
  408. class LPSMarkdown(Markdown):
  409. """Converts MD LP schedule to a dictionary.
  410. Returns the Markdown version of LP schedule as a dictionary.
  411. """
  412. def __init__(self, inline=None, block=None, **kwargs):
  413. """
  414. Initialize with LPSRenderer as the renderer.
  415. """
  416. self.sessions_renderer = LPSRenderer()
  417. super(LPSMarkdown, self).__init__(
  418. renderer=self.sessions_renderer,
  419. inline=None, block=None,
  420. **kwargs)
  421. def parse(self, text):
  422. global lps_dict
  423. lps_dict = OrderedDict()
  424. html = super(LPSMarkdown, self).parse(text)
  425. # Write list of speakers with no ids to `speakers.noids`.
  426. json_write('speakers.noids',
  427. self.sessions_renderer.speakers_noids)
  428. return lps_dict
  429. class LPSpeakersMarkdown(Markdown):
  430. """Converts MD LP speakers to a dictionary.
  431. Returns the Markdown version of LP speakers as a dictionary.
  432. """
  433. def __init__(self, inline=None, block=None, **kwargs):
  434. """
  435. Initialize with LPSpeakersRenderer as the renderer.
  436. """
  437. self.speakers_renderer = LPSpeakersRenderer()
  438. super(LPSpeakersMarkdown, self).__init__(
  439. renderer=self.speakers_renderer,
  440. inline=None, block=None,
  441. **kwargs)
  442. def parse(self, text):
  443. global lpspeakers_dict
  444. html = super(LPSpeakersMarkdown, self).parse(text)
  445. # Write mapping of speakers and their ids to `speakers.ids`.
  446. json_write('speakers.ids', self.speakers_renderer.speakers_ids)
  447. return lpspeakers_dict
  448. def RenderHTML(lp_dict, template):
  449. """Renders LP schedule/speakers in HTML from a python dictionary.
  450. Returns the HTML as a string.
  451. """
  452. env = Environment(loader=FileSystemLoader(path.dirname(template)),
  453. trim_blocks=True, lstrip_blocks=True)
  454. template_name = path.basename(template)
  455. template = None
  456. try:
  457. template = env.get_template(template_name)
  458. except TemplateNotFound as e:
  459. print('Template {} not found.'.format(template_name))
  460. exit(1)
  461. lp_html = template.render(lp_dict=lp_dict)
  462. return str(BeautifulSoup(lp_html, 'html.parser')).strip()
  463. def main():
  464. parser = ArgumentParser()
  465. group = parser.add_mutually_exclusive_group()
  466. group.add_argument("-s", "--schedule", action="store_true",
  467. help="Generate LP schedule")
  468. group.add_argument("-sp", "--speakers", action="store_true",
  469. help="Generate LP speakers")
  470. parser.add_argument("--ical", type=int,
  471. help="Specify LP year as argument; "
  472. + "generates iCal")
  473. parser.add_argument("--version", action="version",
  474. version='lpschedule-generator version %s'
  475. % __version__,
  476. help="Show version number and exit.")
  477. parser.add_argument("lp_t",
  478. help="Path to the LP template.")
  479. parser.add_argument("lp_md",
  480. help="Path to the LP markdown.")
  481. args = parser.parse_args()
  482. lp_template = args.lp_t
  483. lp_md_content = read_file(path.abspath(args.lp_md))
  484. if path.exists(lp_template) and lp_md_content:
  485. if args.schedule:
  486. markdown = LPSMarkdown()
  487. elif args.speakers:
  488. markdown = LPSpeakersMarkdown()
  489. else:
  490. parser.error('No action requested, add -s or -sp switch')
  491. lp_dict = markdown(lp_md_content)
  492. lp_html = RenderHTML(lp_dict, lp_template)
  493. if args.ical and args.schedule:
  494. LPiCal(lp_dict, args.ical).to_ical()
  495. else:
  496. exit(1)
  497. if lp_html:
  498. # stdout lps html
  499. print(lp_html)
  500. else:
  501. print('Error generating LP HTML.')
  502. if __name__ == "__main__":
  503. main()