document.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624
  1. # -*- coding: utf-8 -*-
  2. # manpage/document.py
  3. # Part of ‘manpage’, a Python library for making Unix manual documents.
  4. #
  5. # Copyright © 2016 Ben Finney <ben+python@benfinney.id.au>
  6. #
  7. # This is free software: see the grant of license at end of this file.
  8. """ Structure and markup of Unix manual page documents.
  9. The Unix manual system is structured into documents, each called a
  10. “manual page”. Manual pages each belong to a topical manual
  11. section. Each manual section is part of a manual. There is always
  12. one default manual, and there may be more on a system.
  13. See the documentation for manual pages (on GNU+Linux, the
  14. ‘man-pages(7)’ page; on BSD, the ‘manpages(5)’ page) for a detailed
  15. explanation of writing manual page documents.
  16. """
  17. import collections
  18. import datetime
  19. import functools
  20. import re
  21. import textwrap
  22. from types import SimpleNamespace
  23. MetaData = collections.namedtuple(
  24. 'MetaData', "name whatis manual section source")
  25. class Document:
  26. """ A specific document (a “man page”) in the manual.
  27. Data attributes:
  28. * `metadata`: The `MetaData` instance to specify this manual page:
  29. * `name`: The name of this document.
  30. * `whatis`: The succinct one-line description of this document.
  31. * `manual`: The title of the manual to which this document
  32. belongs.
  33. * `section`: The section code to which this document belongs.
  34. * `source`: The project that includes of the item documented
  35. in this document.
  36. * `date`: The creation date of this document, as a
  37. `datetime.date` instance.
  38. * `header`: The `DocumentHeader` instance of this document.
  39. """
  40. standard_section_titles = (
  41. "NAME",
  42. "SYNOPSIS",
  43. "DESCRIPTION",
  44. "SEE ALSO",
  45. )
  46. def __init__(self, metadata):
  47. self.metadata = metadata
  48. self._created_date = datetime.date.today()
  49. self.header = DocumentHeader(self)
  50. self.content_sections = collections.OrderedDict(
  51. (title, None)
  52. for title in self.standard_section_titles)
  53. @property
  54. def date(self):
  55. return self._created_date
  56. def as_markup(self, encoding="utf-8"):
  57. """ Get the complete document content with markup. """
  58. content = self.header.as_markup(encoding)
  59. content += "".join(
  60. "{empty}\n{section}".format(
  61. empty=GroffMarkup.control.empty,
  62. section=section.as_markup())
  63. for section in self.content_sections.values()
  64. if section is not None)
  65. editor_hints = GroffMarkup.editor_hints(encoding)
  66. content += "{empty}\n{hints}".format(
  67. empty=GroffMarkup.control.empty,
  68. hints=editor_hints)
  69. return content
  70. def insert_section(self, index, section):
  71. """ Insert the document section at the specified index.
  72. :param index: The index (integer) in the existing sequence
  73. at which to insert this section.
  74. :param section: The `DocumentSection` instance to insert.
  75. :return: ``None``.
  76. """
  77. ordered_titles = list(self.content_sections.keys())
  78. ordered_titles.insert(index, section.title)
  79. self.content_sections[section.title] = section
  80. mapping_type = type(self.content_sections)
  81. self.content_sections = mapping_type(
  82. (title, self.content_sections[title])
  83. for title in ordered_titles)
  84. TitleFields = collections.namedtuple(
  85. 'TitleFields', "title section date source manual")
  86. class DocumentHeader:
  87. """ The header of a “man page” document.
  88. Data attributes:
  89. * `document`: The document of which this is the header.
  90. """
  91. def __init__(self, document):
  92. self.document = document
  93. @property
  94. def metadata(self):
  95. return self.document.metadata
  96. def title_markup(self):
  97. """ Get the document title as Groff markup. """
  98. fields = TitleFields(
  99. title=GroffMarkup.escapetext(
  100. self.metadata.name.upper(),
  101. hyphen=GroffMarkup.glyph.minus),
  102. section=GroffMarkup.escapetext(self.metadata.section),
  103. date=GroffMarkup.escapetext(
  104. self.document.date.strftime("%Y-%m-%d"),
  105. hyphen=GroffMarkup.glyph.minus),
  106. source=None,
  107. manual=None,
  108. )
  109. if self.metadata.source is not None:
  110. fields = fields._replace(
  111. source=GroffMarkup.escapetext(self.metadata.source))
  112. if self.metadata.manual is not None:
  113. fields = fields._replace(
  114. manual=GroffMarkup.escapetext(self.metadata.manual))
  115. result = GroffMarkup.title_command(fields)
  116. return result
  117. def as_markup(self, encoding):
  118. """ Get the complete document header with markup. """
  119. content = self.title_markup()
  120. return content
  121. class DocumentSection:
  122. """ A titled section in a “man page” document.
  123. Data attributes:
  124. * `title`: The title of this section, as plain text.
  125. * `body`: The body of the section, as marked-up text.
  126. """
  127. def __init__(self, title, body=None):
  128. self.title = title
  129. self.body = body
  130. def as_markup(self):
  131. """ Get the complete document section with markup. """
  132. text = textwrap.dedent("""\
  133. {macro.section} {section.title}
  134. """).format(macro=GroffMarkup.macro, section=self)
  135. if self.body is not None:
  136. text += self.body
  137. if not text.endswith("\n"):
  138. text += "\n"
  139. return text
  140. class CommandDocument(Document):
  141. """ A specific document in the manual of commands.
  142. Commands are documented with particular conventions in the
  143. Unix manual system.
  144. Data attributes:
  145. * `metadata`: The `MetaData` instance to specify this manual page:
  146. * `name`: The command documented by this manual page.
  147. * `whatis`: Phrasal one-line summary for the command.
  148. * `manual`: If unspecified, the manual system will infer the
  149. default title for the section code.
  150. * `section`: Most commands should have their manual page in
  151. section “1” (User commands) or “8” (System management
  152. commands).
  153. """
  154. standard_section_titles = (
  155. "NAME",
  156. "SYNOPSIS",
  157. "DESCRIPTION",
  158. "OPTIONS",
  159. "EXIT STATUS",
  160. "ENVIRONMENT",
  161. "FILES",
  162. "CONFORMING TO",
  163. "NOTES",
  164. "BUGS",
  165. "EXAMPLE",
  166. "SEE ALSO",
  167. )
  168. def __init__(self, metadata):
  169. metadata_fields = metadata._asdict()
  170. if metadata_fields['section'] is None:
  171. metadata_fields['section'] = "1"
  172. metadata = MetaData(**metadata_fields)
  173. super().__init__(metadata)
  174. @functools.total_ordering
  175. class Reference:
  176. """ A reference to another document. """
  177. def as_markup(self):
  178. raise NotImplementedError
  179. @property
  180. def _comparison_tuple(self):
  181. """ Tuple of this object used for comparison operations. """
  182. raise NotImplementedError
  183. def __eq__(self, other):
  184. result = False
  185. if isinstance(other, type(self)):
  186. if self._comparison_tuple == other._comparison_tuple:
  187. result = True
  188. return result
  189. def __lt__(self, other):
  190. result = False
  191. if isinstance(other, type(self)):
  192. if self._comparison_tuple < other._comparison_tuple:
  193. result = True
  194. return result
  195. class DocumentReference(Reference):
  196. """ A reference to a “man page” document in the manual.
  197. Data attributes:
  198. * `name`: The name of the document.
  199. * `section`: The section in the manual.
  200. """
  201. spec_pattern = re.compile(r"(?P<name>.+)\((?P<section>\d[^)]*)\)")
  202. class ReferenceFormatError(ValueError):
  203. """ Raised when parsing a malformed man page reference. """
  204. def __init__(self, name, section):
  205. self.name = name
  206. self.section = section
  207. def __str__(self):
  208. text = "{self.name} ({self.section})".format(self=self)
  209. return text
  210. def __repr__(self):
  211. class_name = self.__class__.__name__
  212. class_args_text = "{self.name!r}, {self.section!r}".format(self=self)
  213. text = "{class_name}({args})".format(
  214. class_name=class_name, args=class_args_text)
  215. return text
  216. @property
  217. def _comparison_tuple(self):
  218. return (self.name, self.section)
  219. def __lt__(self, other):
  220. result = super().__lt__(other)
  221. if isinstance(other, ExternalReference):
  222. # Reference to any manual page compares earlier than externals.
  223. result = True
  224. return result
  225. @classmethod
  226. def from_text(cls, text):
  227. """ Parse `text` to generate an instance. """
  228. spec_match = cls.spec_pattern.match(text)
  229. if spec_match is None:
  230. raise cls.ReferenceFormatError(text)
  231. reference = cls(
  232. name=spec_match.group('name'),
  233. section=spec_match.group('section'))
  234. return reference
  235. def as_markup(self):
  236. """ Get the reference with document markup. """
  237. markup = textwrap.dedent("""\
  238. {macro.bold_roman} {ref.name} ({ref.section})
  239. """).format(macro=GroffMarkup.macro, ref=self)
  240. return markup
  241. class ExternalReference(Reference):
  242. """ A reference to an external document.
  243. Data attributes:
  244. * `title`: The title of the document.
  245. * `url`: The URL to the document.
  246. """
  247. def __init__(self, title, url=None):
  248. self.title = title
  249. self.url = url
  250. def __str__(self):
  251. text_template = "{self.title}"
  252. if self.url is not None:
  253. text_template = "{self.title} <URL:{self.url}>"
  254. text = text_template.format(self=self)
  255. return text
  256. def __repr__(self):
  257. class_name = self.__class__.__name__
  258. class_args_text = "{self.title!r}, {self.url!r}".format(self=self)
  259. text = "{class_name}({args})".format(
  260. class_name=class_name, args=class_args_text)
  261. return text
  262. @property
  263. def _comparison_tuple(self):
  264. return (self.title, self.url)
  265. def as_markup(self):
  266. """ Get the reference with document markup. """
  267. title_markup = GroffMarkup.escapetext(self.title)
  268. if self.url is None:
  269. url_markup = None
  270. markup_template = textwrap.dedent("""\
  271. {title}
  272. """)
  273. else:
  274. url_markup = GroffMarkup.escapetext(self.url)
  275. markup_template = textwrap.dedent("""\
  276. {macro.url_begin} {url}
  277. {title}
  278. {macro.url_end}
  279. """)
  280. markup = markup_template.format(
  281. macro=GroffMarkup.macro,
  282. title=title_markup, url=url_markup)
  283. return markup
  284. class GroffMarkup:
  285. """ Implementation of GNU troff markup. """
  286. control = SimpleNamespace(
  287. empty=".", comment=".\\\"",
  288. )
  289. glyph = SimpleNamespace(
  290. backslash="\\[rs]", hyphen="\\[hy]", minus="\\-",
  291. registered="\\*[R]", trademark="\\*[Tm]",
  292. dquote_left="\\[lq]", dquote_right="\\[rq]",
  293. )
  294. font = SimpleNamespace(
  295. previous="\\fP", roman="\\fR", bold="\\fB", italic="\\fI")
  296. size = SimpleNamespace(
  297. normal="\\s0", decrease="\\s-1", increase="\\s+1")
  298. macro = SimpleNamespace(
  299. line_break=".br",
  300. title=".TH", section=".SH", subsection=".SS",
  301. url_begin=".UR", url_end=".UE",
  302. roman=".R", roman_bold=".RB", roman_italic=".RI",
  303. bold=".B", bold_italic=".BI", bold_roman=".BR",
  304. italic=".I", italic_bold=".IB", italic_roman=".IR",
  305. )
  306. @classmethod
  307. def encoding_declaration(cls, encoding):
  308. """ Make an encoding declaration line for the document. """
  309. text = textwrap.dedent("""\
  310. {comment} -*- coding: {encoding} -*-
  311. """).format(
  312. comment=cls.control.comment,
  313. encoding=encoding)
  314. return text
  315. @classmethod
  316. def editor_hints(cls, encoding):
  317. """ Make a comment block of editor hints. """
  318. text = textwrap.dedent("""\
  319. {comment} Local variables:
  320. {comment} coding: {encoding}
  321. {comment} mode: {syntax}
  322. {comment} End:
  323. {comment} vim: fileencoding={encoding} filetype={syntax} :
  324. """).format(
  325. comment=cls.control.comment,
  326. encoding=encoding, syntax="nroff")
  327. return text
  328. @classmethod
  329. def escapetext(cls, text, hyphen=glyph.hyphen):
  330. """ Replace special glyphs in `text` with appropriate markup.
  331. :param text: The raw input text.
  332. :param hyphen: The glyph to substitute for a raw hyphen.
  333. """
  334. result = text
  335. result = result.replace("\\", cls.glyph.backslash)
  336. result = result.replace("-", hyphen)
  337. return result
  338. @classmethod
  339. def title_command(cls, fields):
  340. """ Make the document title command.
  341. :param fields: An instance of `TitleFields` specifying the
  342. fields of the title command.
  343. :return: The generated title command line.
  344. """
  345. fields_markup = " ".join(
  346. '"' + field + '"'
  347. for field in (
  348. getattr(fields, name) for name in TitleFields._fields)
  349. if field is not None)
  350. result = textwrap.dedent("""\
  351. {macro.title} {fields}
  352. """).format(macro=cls.macro, fields=fields_markup)
  353. return result
  354. class ManPageMaker:
  355. """ Maker for a manual page document.
  356. Data attributes:
  357. * `metadata`: A `MetaData` instance specifying the document
  358. metadata for the manual page document.
  359. * `seealso`: A collection of `Reference` instances. If not
  360. ``None``, this is used to populate the “SEE ALSO” section of
  361. the document.
  362. """
  363. document_class = Document
  364. def __init__(self, metadata):
  365. self.metadata = metadata
  366. self.seealso = None
  367. def make_manpage(self):
  368. """ Make a manual page document from the known metadata. """
  369. manpage = self.document_class(self.metadata)
  370. manpage.content_sections.update({
  371. "NAME": self.make_name_section(),
  372. "SYNOPSIS": self.make_synopsis_section(),
  373. "DESCRIPTION": self.make_description_section(),
  374. "SEE ALSO": self.make_seealso_section(),
  375. })
  376. return manpage
  377. def make_name_section(self):
  378. """ Make the “NAME” section of the document. """
  379. section = DocumentSection("NAME")
  380. name_markup = GroffMarkup.escapetext(
  381. self.metadata.name, hyphen=GroffMarkup.glyph.minus)
  382. whatis_markup = GroffMarkup.escapetext(self.metadata.whatis)
  383. summary_markup = "{name} {dash} {whatis}".format(
  384. name=name_markup,
  385. dash=GroffMarkup.glyph.minus,
  386. whatis=whatis_markup)
  387. section.body = textwrap.dedent("""\
  388. {summary}
  389. """).format(summary=summary_markup)
  390. return section
  391. def make_synopsis_section(self, text=None):
  392. """ Make the “SYNOPSIS” section of the document. """
  393. section = None
  394. if text is not None:
  395. section = DocumentSection("SYNOPSIS")
  396. text_markup = text.rstrip()
  397. section.body = textwrap.dedent("""\
  398. {synopsis}
  399. """).format(synopsis=text_markup)
  400. return section
  401. def make_description_section(self, text=None):
  402. """ Make the “DESCRIPTION” section of the document. """
  403. section = None
  404. if text is not None:
  405. section = DocumentSection("DESCRIPTION")
  406. description_markup = GroffMarkup.escapetext(text.rstrip())
  407. section.body = textwrap.dedent("""\
  408. {description}
  409. """).format(description=description_markup)
  410. return section
  411. def make_seealso_section(self, references=None):
  412. """ Make the “SEE ALSO” section of the document. """
  413. section = None
  414. if references is None:
  415. references = self.seealso
  416. if references:
  417. section = DocumentSection("SEE ALSO")
  418. references_sorted = sorted(references)
  419. seealso_items = [
  420. reference.as_markup().rstrip()
  421. for reference in references_sorted]
  422. for (index, item) in enumerate(seealso_items[:-1]):
  423. if item.endswith(GroffMarkup.macro.url_end):
  424. item += " ,"
  425. else:
  426. item += ","
  427. seealso_items[index] = item
  428. references_markup = "\n".join(seealso_items)
  429. section.body = textwrap.dedent("""\
  430. {references}
  431. """).format(references=references_markup)
  432. return section
  433. class Writer:
  434. """ An output file writer for a “man page” document. """
  435. def __init__(self, document, path, encoding="utf-8"):
  436. self.document = document
  437. self.path = path
  438. self.encoding = encoding
  439. def write(self):
  440. """ Emit the marked-up document to the output path. """
  441. with open(self.path, 'w', encoding=self.encoding) as outfile:
  442. content = self.document.as_markup(encoding=self.encoding)
  443. outfile.write(content)
  444. # This is free software: you may copy, modify, and/or distribute this work
  445. # under the terms of the GNU General Public License as published by the
  446. # Free Software Foundation; version 3 of that license or any later version.
  447. #
  448. # No warranty expressed or implied. See the file ‘LICENSE.GPL-3’ for details,
  449. # or view it online at <URL:https://www.gnu.org/licenses/gpl-3.0.html>.
  450. # Local variables:
  451. # coding: utf-8
  452. # mode: python
  453. # End:
  454. # vim: fileencoding=utf-8 filetype=python :