ansicat.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import shutil
  5. import subprocess
  6. import textwrap
  7. import time
  8. import html
  9. import urllib
  10. import argparse
  11. import mimetypes
  12. import fnmatch
  13. import netcache
  14. import offthemes
  15. from offutils import run,term_width,is_local,looks_like_base64, looks_like_url
  16. import base64
  17. from offutils import xdg
  18. try:
  19. from readability import Document
  20. _HAS_READABILITY = True
  21. except ModuleNotFoundError:
  22. _HAS_READABILITY = False
  23. try:
  24. from bs4 import BeautifulSoup
  25. from bs4 import Comment
  26. #if bs4 version >= 4.11, we need to silent some xml warnings
  27. import bs4
  28. version = bs4.__version__.split(".")
  29. recent = False
  30. if int(version[0]) > 4:
  31. recent = True
  32. elif int(version[0]) == 4:
  33. recent = int(version[1]) >= 11
  34. if recent:
  35. # As this is only for silencing some warnings, we fail
  36. # silently. We don’t really care
  37. try:
  38. from bs4 import XMLParsedAsHTMLWarning
  39. import warnings
  40. warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
  41. except:
  42. pass
  43. _HAS_SOUP = True
  44. except ModuleNotFoundError:
  45. _HAS_SOUP = False
  46. _DO_HTML = _HAS_SOUP #and _HAS_READABILITY
  47. if _DO_HTML and not _HAS_READABILITY:
  48. print("To improve your web experience (less cruft in webpages),")
  49. print("please install python3-readability or readability-lxml")
  50. try:
  51. import feedparser
  52. _DO_FEED = True
  53. except ModuleNotFoundError:
  54. _DO_FEED = False
  55. try:
  56. from PIL import Image
  57. _HAS_PIL = True
  58. except ModuleNotFoundError:
  59. _HAS_PIL = False
  60. _HAS_TIMG = shutil.which('timg')
  61. _HAS_CHAFA = shutil.which('chafa')
  62. _NEW_CHAFA = False
  63. _NEW_TIMG = False
  64. _RENDER_IMAGE = False
  65. # All this code to know if we render image inline or not
  66. if _HAS_CHAFA:
  67. # starting with 1.10, chafa can return only one frame
  68. # which allows us to drop dependancy for PIL
  69. output = run("chafa --version")
  70. # output is "Chafa version M.m.p"
  71. # check for m < 1.10
  72. try:
  73. chafa_major, chafa_minor, _ = output.split("\n")[0].split(" ")[-1].split(".")
  74. if int(chafa_major) >= 1 and int(chafa_minor) >= 10:
  75. _NEW_CHAFA = True
  76. except:
  77. pass
  78. if _NEW_CHAFA :
  79. _RENDER_IMAGE = True
  80. if _HAS_TIMG :
  81. try:
  82. output = run("timg --version")
  83. except subprocess.CalledProcessError:
  84. output = False
  85. # We don’t deal with timg before 1.3.2 (looping options)
  86. if output and output[5:10] > "1.3.2":
  87. _NEW_TIMG = True
  88. _RENDER_IMAGE = True
  89. elif _HAS_CHAFA and _HAS_PIL:
  90. _RENDER_IMAGE = True
  91. if not _RENDER_IMAGE:
  92. print("To render images inline, you need either chafa or timg.")
  93. if not _NEW_CHAFA and not _NEW_TIMG:
  94. print("Before Chafa 1.10, you also need python-pil")
  95. #return ANSI text that can be show by less
  96. def inline_image(img_file,width):
  97. #We don’t even try displaying pictures that are not there
  98. if not os.path.exists(img_file):
  99. return ""
  100. #Chafa is faster than timg inline. Let use that one by default
  101. #But we keep a list of "inlines" in case chafa fails
  102. inlines = []
  103. ansi_img = ""
  104. #We avoid errors by not trying to render non-image files
  105. if shutil.which("file"):
  106. mime = run("file -b --mime-type %s", parameter=img_file).strip()
  107. if not "image" in mime:
  108. return ansi_img
  109. if _HAS_CHAFA:
  110. if _HAS_PIL and not _NEW_CHAFA:
  111. # this code is a hack to remove frames from animated gif
  112. img_obj = Image.open(img_file)
  113. if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
  114. # we remove all frames but the first one
  115. img_obj.save(img_file,format="gif",save_all=False)
  116. inlines.append("chafa --bg white -s %s -f symbols")
  117. elif _NEW_CHAFA:
  118. inlines.append("chafa --bg white -t 1 -s %s -f symbols --animate=off")
  119. if _NEW_TIMG:
  120. inlines.append("timg --frames=1 -p q -g %sx1000")
  121. image_success = False
  122. while not image_success and len(inlines)>0:
  123. cmd = inlines.pop(0)%width + " %s"
  124. try:
  125. ansi_img = run(cmd, parameter=img_file)
  126. image_success = True
  127. except Exception as err:
  128. ansi_img = "***IMAGE ERROR***\n%s…\n…%s" %(str(err)[:50],str(err)[-50:])
  129. return ansi_img
  130. def terminal_image(img_file):
  131. #Render by timg is better than old chafa.
  132. # it is also centered
  133. cmds = []
  134. if _NEW_CHAFA:
  135. cmds.append("chafa -C on -d 0 --bg white -t 1 -w 1")
  136. elif _HAS_CHAFA:
  137. cmds.append("chafa -d 0 --bg white -t 1 -w 1")
  138. if _NEW_TIMG:
  139. cmds.append("timg --loops=1 -C")
  140. image_success = False
  141. while not image_success and len(cmds) > 0:
  142. cmd = cmds.pop(0) + " %s"
  143. try:
  144. run(cmd, parameter=img_file, direct_output=True)
  145. image_success = True
  146. except Exception as err:
  147. print(err)
  148. # First, we define the different content->text renderers, outside of the rest
  149. # (They could later be factorized in other files or replaced)
  150. class AbstractRenderer():
  151. def __init__(self,content,url,center=True):
  152. self.url = url
  153. self.body = str(content)
  154. #there’s one rendered text and one links table per mode
  155. self.rendered_text = {}
  156. self.links = {}
  157. self.images = {}
  158. self.title = None
  159. self.validity = True
  160. self.temp_files = {}
  161. self.center = center
  162. self.last_mode = "readable"
  163. self.theme = offthemes.default
  164. def display(self,mode=None,directdisplay=False):
  165. wtitle = self.get_formatted_title()
  166. if mode == "source":
  167. body = self.body
  168. else:
  169. body = wtitle + "\n" + self.get_body(mode=mode)
  170. if directdisplay:
  171. print(body)
  172. return True
  173. else:
  174. return body
  175. def has_direct_display(self):
  176. return False
  177. def set_theme(self,theme):
  178. if theme:
  179. self.theme.update(theme)
  180. def get_theme(self):
  181. return self.theme
  182. #This class hold an internal representation of the HTML text
  183. class representation:
  184. def __init__(self,width,title=None,center=True,theme={}):
  185. self.title=title
  186. self.center = center
  187. self.final_text = ""
  188. self.opened = []
  189. self.width = width
  190. self.last_line = ""
  191. self.last_line_colors = {}
  192. self.last_line_center = False
  193. self.new_paragraph = True
  194. self.i_indent = ""
  195. self.s_indent = ""
  196. self.r_indent = ""
  197. self.current_indent = ""
  198. self.disabled_indents = None
  199. # each color is an [open,close] pair code
  200. self.theme = theme
  201. self.colors = offthemes.colors
  202. def _insert(self,color,open=True):
  203. if open: o = 0
  204. else: o = 1
  205. pos = len(self.last_line)
  206. #we remember the position where to insert color codes
  207. if not pos in self.last_line_colors:
  208. self.last_line_colors[pos] = []
  209. #Two inverse code cancel each other
  210. if [color,int(not o)] in self.last_line_colors[pos]:
  211. self.last_line_colors[pos].remove([color,int(not o)])
  212. else:
  213. self.last_line_colors[pos].append([color,o])#+color+str(o))
  214. # Take self.last line and add ANSI codes to it before adding it to
  215. # self.final_text.
  216. def _endline(self):
  217. if len(self.last_line.strip()) > 0:
  218. for c in self.opened:
  219. self._insert(c,open=False)
  220. nextline = ""
  221. added_char = 0
  222. #we insert the color code at the saved positions
  223. while len (self.last_line_colors) > 0:
  224. pos,colors = self.last_line_colors.popitem()
  225. #popitem itterates LIFO.
  226. #So we go, backward, to the pos (starting at the end of last_line)
  227. nextline = self.last_line[pos:] + nextline
  228. ansicol = "\x1b["
  229. for c,o in colors:
  230. ansicol += self.colors[c][o] + ";"
  231. ansicol = ansicol[:-1]+"m"
  232. nextline = ansicol + nextline
  233. added_char += len(ansicol)
  234. self.last_line = self.last_line[:pos]
  235. nextline = self.last_line + nextline
  236. if self.last_line_center:
  237. #we have to care about the ansi char while centering
  238. width = term_width() + added_char
  239. nextline = nextline.strip().center(width)
  240. self.last_line_center = False
  241. else:
  242. #should we lstrip the nextline in the addition ?
  243. nextline = self.current_indent + nextline.lstrip() + self.r_indent
  244. self.current_indent = self.s_indent
  245. self.final_text += nextline
  246. self.last_line = ""
  247. self.final_text += "\n"
  248. for c in self.opened:
  249. self._insert(c,open=True)
  250. else:
  251. self.last_line = ""
  252. def center_line(self):
  253. self.last_line_center = True
  254. def open_theme(self,element):
  255. if element in self.theme:
  256. colors = self.theme[element]
  257. for c in colors:
  258. self.open_color(c)
  259. return True
  260. else:
  261. return False
  262. def close_theme(self,element):
  263. if element in self.theme:
  264. colors = self.theme[element]
  265. for c in colors:
  266. self.close_color(c)
  267. def open_color(self,color):
  268. if color in self.colors and color not in self.opened:
  269. self._insert(color,open=True)
  270. self.opened.append(color)
  271. def close_color(self,color):
  272. if color in self.colors and color in self.opened:
  273. self._insert(color,open=False)
  274. self.opened.remove(color)
  275. def close_all(self):
  276. if len(self.colors) > 0:
  277. self.last_line += "\x1b[0m"
  278. self.opened.clear()
  279. def startindent(self,indent,sub=None,reverse=None):
  280. self._endline()
  281. self.i_indent = indent
  282. self.current_indent = indent
  283. if sub:
  284. self.s_indent = sub
  285. else:
  286. self.s_indent = indent
  287. if reverse:
  288. self.r_indent = reverse
  289. else:
  290. self.r_indent = ""
  291. def endindent(self):
  292. self._endline()
  293. self.i_indent = ""
  294. self.s_indent = ""
  295. self.r_indent = ""
  296. self.current_indent = ""
  297. def _disable_indents(self):
  298. self.disabled_indents = []
  299. self.disabled_indents.append(self.current_indent)
  300. self.disabled_indents.append(self.i_indent)
  301. self.disabled_indents.append(self.s_indent)
  302. self.disabled_indents.append(self.r_indent)
  303. self.endindent()
  304. def _enable_indents(self):
  305. if self.disabled_indents:
  306. self.current_indent = self.disabled_indents[0]
  307. self.i_indent = self.disabled_indents[1]
  308. self.s_indent = self.disabled_indents[2]
  309. self.r_indent = self.disabled_indents[3]
  310. self.disabled_indents = None
  311. def newline(self):
  312. self._endline()
  313. #A new paragraph implies 2 newlines (1 blank line between paragraphs)
  314. #But it is only used if didn’t already started one to avoid plenty
  315. #of blank lines. force=True allows to bypass that limit.
  316. #new_paragraph becomes false as soon as text is entered into it
  317. def newparagraph(self,force=False):
  318. if force or not self.new_paragraph:
  319. self._endline()
  320. self.final_text += "\n"
  321. self.new_paragraph = True
  322. def add_space(self):
  323. if len(self.last_line) > 0 and self.last_line[-1] != " ":
  324. self.last_line += " "
  325. def _title_first(self,intext=None):
  326. if self.title:
  327. if not self.title == intext:
  328. self._disable_indents()
  329. self.open_theme("title")
  330. self.add_text(self.title)
  331. self.close_all()
  332. self.newparagraph()
  333. self._enable_indents()
  334. self.title = None
  335. # Beware, blocks are not wrapped nor indented and left untouched!
  336. # They are mostly useful for pictures and preformatted text.
  337. def add_block(self,intext,theme=None):
  338. # If necessary, we add the title before a block
  339. self._title_first()
  340. # we don’t want to indent blocks
  341. self._endline()
  342. self._disable_indents()
  343. #we have to apply the theme for every line in the intext
  344. #applying theme to preformatted is controversial as it could change it
  345. if theme:
  346. block = ""
  347. lines = intext.split("\n")
  348. for l in lines:
  349. self.open_theme(theme)
  350. self.last_line += self.current_indent + l
  351. self.close_theme(theme)
  352. self._endline()
  353. self.last_line += "\n"
  354. #one thing is sure : we need to keep unthemed blocks for images!
  355. else:
  356. self.final_text += self.current_indent + intext
  357. self.new_paragraph = False
  358. self._endline()
  359. self._enable_indents()
  360. def add_text(self,intext):
  361. self._title_first(intext=intext)
  362. lines = []
  363. last = (self.last_line + intext)
  364. self.last_line = ""
  365. # With the following, we basically cancel adding only spaces
  366. # on an empty line
  367. if len(last.strip()) > 0:
  368. self.new_paragraph = False
  369. else:
  370. last = last.strip()
  371. if len(last) > self.width:
  372. width = self.width - len(self.current_indent) - len(self.r_indent)
  373. spaces_left = len(last) - len(last.lstrip())
  374. spaces_right = len(last) - len(last.rstrip())
  375. lines = textwrap.wrap(last,width,drop_whitespace=True)
  376. self.last_line += spaces_left*" "
  377. while len(lines) > 1:
  378. l = lines.pop(0)
  379. self.last_line += l
  380. self._endline()
  381. if len(lines) == 1:
  382. li = lines[0]
  383. self.last_line += li + spaces_right*" "
  384. else:
  385. self.last_line = last
  386. def get_final(self):
  387. self.close_all()
  388. self._endline()
  389. #if no content, we still add the title
  390. self._title_first()
  391. lines = self.final_text.splitlines()
  392. lines2 = []
  393. termspace = shutil.get_terminal_size()[0]
  394. #Following code instert blanck spaces to center the content
  395. if self.center and termspace > term_width():
  396. margin = int((termspace - term_width())//2)
  397. else:
  398. margin = 0
  399. for l in lines :
  400. lines2.append(margin*" "+l)
  401. return "\n".join(lines2)
  402. def get_subscribe_links(self):
  403. return [[self.url,self.get_mime(),self.get_title()]]
  404. def is_valid(self):
  405. return self.validity
  406. def set_mode(self,mode):
  407. self.last_mode = mode
  408. def get_mode(self):
  409. return self.last_mode
  410. def get_link(self,nb):
  411. links = self.get_links()
  412. if len(links) < nb:
  413. print("Index too high! No link %s for %s" %(nb,self.url))
  414. return None
  415. else:
  416. return links[nb-1]
  417. #get_title is about the "content title", so the title in the page itself
  418. def get_title(self):
  419. return "Abstract title"
  420. def get_page_title(self):
  421. title = self.get_title()
  422. if not title or len(title) == 0:
  423. title = self.get_url_title()
  424. else:
  425. title += " (%s)" %self.get_url_title()
  426. return title
  427. def get_formatted_title(self):
  428. title = self.get_url_title()
  429. nbr = len(self.get_links())
  430. if is_local(self.url):
  431. title += " (%s items)"%nbr
  432. str_last = "local file"
  433. else:
  434. str_last = "last accessed on %s"\
  435. %time.ctime(netcache.cache_last_modified(self.url))
  436. title += " (%s links)"%nbr
  437. return self._window_title(title,info=str_last)
  438. #this function is about creating a title derived from the URL
  439. def get_url_title(self):
  440. #small intelligence to try to find a good name for a capsule
  441. #we try to find eithe ~username or /users/username
  442. #else we fallback to hostname
  443. if not self.url: return ""
  444. if is_local(self.url):
  445. splitpath = self.url.split("/")
  446. filename = splitpath[-1]
  447. return filename
  448. path = self.url
  449. parsed = urllib.parse.urlparse(self.url)
  450. red_title = parsed.hostname
  451. if "user" in path:
  452. i = 0
  453. splitted = path.split("/")
  454. while i < (len(splitted)-1):
  455. if splitted[i].startswith("user"):
  456. red_title = splitted[i+1]
  457. i += 1
  458. if "~" in path:
  459. for pp in path.split("/"):
  460. if pp.startswith("~"):
  461. red_title = pp[1:]
  462. return red_title
  463. # This function return a list of URL which should be downloaded
  464. # before displaying the page (images in HTML pages, typically)
  465. def get_images(self,mode=None):
  466. if not mode: mode = self.last_mode
  467. if not mode in self.images:
  468. self.get_body(mode=mode)
  469. # we also invalidate the body that was done without images
  470. self.rendered_text.pop(mode)
  471. if mode in self.images:
  472. return self.images[mode]
  473. else:
  474. return []
  475. #This function will give gemtext to the gemtext renderer
  476. def prepare(self,body,mode=None):
  477. return [[body,None]]
  478. def _build_body_and_links(self,mode,width=None):
  479. if not width:
  480. width = term_width()
  481. prepared_bodies = self.prepare(self.body,mode=mode)
  482. self.rendered_text[mode] = ""
  483. self.links[mode] = []
  484. for b in prepared_bodies:
  485. results = None
  486. size = len(self.links[mode])
  487. if b[1] in _FORMAT_RENDERERS:
  488. r = _FORMAT_RENDERERS[b[1]](b[0],self.url,center=self.center)
  489. results = r.render(b[0],width=width,mode=mode,startlinks=size)
  490. else:
  491. results = self.render(b[0],width=width,mode=mode,startlinks=size)
  492. if results:
  493. self.rendered_text[mode] += results[0] + "\n"
  494. #we should absolutize all URLs here
  495. for l in results[1]:
  496. ll = l.split()[0]
  497. try:
  498. abs_l = urllib.parse.urljoin(self.url,ll)
  499. self.links[mode].append(abs_l)
  500. except Exception as err:
  501. print("Urljoin Error: Could not make an URL out of %s and %s"%(self.url,ll))
  502. for l in self.get_subscribe_links()[1:]:
  503. self.links[mode].append(l[0])
  504. def get_body(self,width=None,mode=None):
  505. if not mode: mode = self.last_mode
  506. if mode not in self.rendered_text:
  507. self._build_body_and_links(mode,width)
  508. return self.rendered_text[mode]
  509. def get_links(self,mode=None):
  510. if not mode: mode = self.last_mode
  511. if mode not in self.links :
  512. self._build_body_and_links(mode)
  513. return self.links[mode]
  514. def _window_title(self,title,info=None):
  515. title_r = self.representation(term_width(),theme=self.theme)
  516. title_r.open_theme("window_title")
  517. title_r.add_text(title)
  518. title_r.close_theme("window_title")
  519. if info:
  520. title_r.open_theme("window_subtitle")
  521. title_r.add_text(" (%s)"%info)
  522. title_r.close_theme("window_subtitle")
  523. return title_r.get_final()
  524. # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
  525. # 3 modes are used : readable (by default), full and links_only (the fastest, when
  526. # rendered content is not used, only the links are needed)
  527. # The prepare() function is called before the rendering. It is useful if
  528. # your renderer output in a format suitable for another existing renderer (such as gemtext)
  529. # The prepare() function output a list of tuple. Each tuple is [output text, format] where
  530. # format should be in _FORMAT_RENDERERS. If None, current renderer is used
  531. class PlaintextRenderer(AbstractRenderer):
  532. def get_mime(self):
  533. return "text/plain"
  534. def get_title(self):
  535. if self.title:
  536. return self.title
  537. elif self.body:
  538. lines = self.body.splitlines()
  539. if len(lines) > 0:
  540. # If not title found, we take the first 50 char
  541. # of the first line
  542. title_line = lines[0].strip()
  543. if len(title_line) > 50:
  544. title_line = title_line[:49] + "…"
  545. self.title = title_line
  546. return self.title
  547. else:
  548. self.title = "Empty Page"
  549. return self.title
  550. else:
  551. return "(unknown)"
  552. def render(self,gemtext, width=None,mode=None,startlinks=0):
  553. return gemtext, []
  554. # Gemtext Rendering Engine
  555. class GemtextRenderer(AbstractRenderer):
  556. def get_mime(self):
  557. return "text/gemini"
  558. def get_title(self):
  559. if self.title:
  560. return self.title
  561. elif self.body:
  562. lines = self.body.splitlines()
  563. for line in lines:
  564. if line.startswith("#"):
  565. self.title = line.strip("#").strip()
  566. return self.title
  567. if len(lines) > 0:
  568. # If not title found, we take the first 50 char
  569. # of the first line
  570. title_line = lines[0].strip()
  571. if len(title_line) > 50:
  572. title_line = title_line[:49] + "…"
  573. self.title = title_line
  574. return self.title
  575. else:
  576. self.title = "Empty Page"
  577. return self.title
  578. else:
  579. return "(unknown)"
  580. #render_gemtext
  581. def render(self,gemtext, width=None,mode=None,startlinks=0):
  582. if not width:
  583. width = term_width()
  584. r = self.representation(width,theme=self.theme)
  585. links = []
  586. hidden_links = []
  587. preformatted = False
  588. def format_link(url,index,name=None):
  589. if "://" in url:
  590. protocol,adress = url.split("://",maxsplit=1)
  591. protocol = " %s" %protocol
  592. else:
  593. adress = url
  594. protocol = ""
  595. if "gemini" in protocol or "list" in protocol:
  596. protocol = ""
  597. if not name:
  598. name = adress
  599. line = "[%d%s] %s" % (index, protocol, name)
  600. return line
  601. for line in gemtext.splitlines():
  602. r.newline()
  603. if line.startswith("```"):
  604. preformatted = not preformatted
  605. if preformatted:
  606. r.open_theme("preformatted")
  607. else:
  608. r.close_theme("preformatted")
  609. elif preformatted:
  610. # infinite line to not wrap preformated
  611. r.add_block(line+"\n",theme="preformatted")
  612. elif len(line.strip()) == 0:
  613. r.newparagraph(force=True)
  614. elif line.startswith("=>"):
  615. strippedline = line[2:].strip()
  616. if strippedline:
  617. links.append(strippedline)
  618. splitted = strippedline.split(maxsplit=1)
  619. url = splitted[0]
  620. name = None
  621. if len(splitted) > 1:
  622. name = splitted[1]
  623. link = format_link(url,len(links)+startlinks,name=name)
  624. # If the link point to a page that has been cached less than
  625. # 600 seconds after this page, we consider it as a new_link
  626. current_modif = netcache.cache_last_modified(self.url)
  627. link_modif = netcache.cache_last_modified(url)
  628. if current_modif and link_modif and current_modif - link_modif < 600 and\
  629. r.open_theme("new_link"):
  630. theme = "new_link"
  631. elif r.open_theme("oneline_link"):
  632. theme = "oneline_link"
  633. else:
  634. theme = "link"
  635. r.open_theme("link")
  636. startpos = link.find("] ") + 2
  637. r.startindent("",sub=startpos*" ")
  638. r.add_text(link)
  639. r.close_theme(theme)
  640. r.endindent()
  641. elif line.startswith("* "):
  642. line = line[1:].lstrip("\t ")
  643. r.startindent("• ",sub=" ")
  644. r.add_text(line)
  645. r.endindent()
  646. elif line.startswith(">"):
  647. line = line[1:].lstrip("\t ")
  648. r.startindent("> ")
  649. r.open_theme("blockquote")
  650. r.add_text(line)
  651. r.close_theme("blockquote")
  652. r.endindent()
  653. elif line.startswith("###"):
  654. line = line[3:].lstrip("\t ")
  655. if r.open_theme("subsubtitle"):
  656. theme = "subsubtitle"
  657. else:
  658. r.open_theme("subtitle")
  659. theme = "subtitle"
  660. r.add_text(line)
  661. r.close_theme(theme)
  662. elif line.startswith("##"):
  663. line = line[2:].lstrip("\t ")
  664. r.open_theme("subtitle")
  665. r.add_text(line)
  666. r.close_theme("subtitle")
  667. elif line.startswith("#"):
  668. line = line[1:].lstrip("\t ")
  669. if not self.title:
  670. self.title = line
  671. r.open_theme("title")
  672. r.add_text(line)
  673. r.close_theme("title")
  674. else:
  675. if "://" in line:
  676. words = line.split()
  677. for w in words:
  678. if "://" in w and looks_like_url(w):
  679. hidden_links.append(w)
  680. r.add_text(line.rstrip())
  681. links += hidden_links
  682. return r.get_final(), links
  683. class EmptyRenderer(GemtextRenderer):
  684. def get_mime(self):
  685. return "text/empty"
  686. def prepare(self,body,mode=None):
  687. text= "(empty file)"
  688. return [[text, "GemtextRenderer"]]
  689. class GopherRenderer(AbstractRenderer):
  690. def get_mime(self):
  691. return "text/gopher"
  692. def get_title(self):
  693. if not self.title:
  694. self.title = ""
  695. if self.body:
  696. firstline = self.body.splitlines()[0]
  697. firstline = firstline.split("\t")[0]
  698. if firstline.startswith("i"):
  699. firstline = firstline[1:]
  700. self.title = firstline
  701. return self.title
  702. #menu_or_text
  703. def render(self,body,width=None,mode=None,startlinks=0):
  704. if not width:
  705. width = term_width()
  706. try:
  707. render,links = self._render_goph(body,width=width,mode=mode,startlinks=startlinks)
  708. except Exception as err:
  709. print("Error rendering Gopher ",err)
  710. r = self.representation(width,theme=self.theme)
  711. r.add_block(body)
  712. render = r.get_final()
  713. links = []
  714. return render,links
  715. def _render_goph(self,body,width=None,mode=None,startlinks=0):
  716. if not width:
  717. width = term_width()
  718. # This was copied straight from Agena (then later adapted)
  719. links = []
  720. r = self.representation(width,theme=self.theme)
  721. for line in self.body.split("\n"):
  722. r.newline()
  723. if line.startswith("i"):
  724. towrap = line[1:].split("\t")[0]
  725. if len(towrap.strip()) > 0:
  726. r.add_text(towrap)
  727. else:
  728. r.newparagraph()
  729. elif not line.strip() in [".",""]:
  730. parts = line.split("\t")
  731. parts[-1] = parts[-1].strip()
  732. if parts[-1] == "+":
  733. parts = parts[:-1]
  734. if len(parts) == 4:
  735. name,path,host,port = parts
  736. itemtype = name[0]
  737. name = name[1:]
  738. if port == "70":
  739. port = ""
  740. else:
  741. port = ":%s"%port
  742. if itemtype == "h" and path.startswith("URL:"):
  743. url = path[4:]
  744. else:
  745. url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
  746. url = url.replace(" ","%20")
  747. linkline = url + " " + name
  748. links.append(linkline)
  749. number = len(links) + startlinks
  750. towrap = "[%s] "%str(number)+ name
  751. r.add_text(towrap)
  752. else:
  753. r.add_text(line)
  754. return r.get_final(),links
  755. class FolderRenderer(GemtextRenderer):
  756. #it was initialized with:
  757. #self.renderer = FolderRenderer("",self.get_cache_path(),datadir=xdg("data"))
  758. def __init__(self,content,url,center=True,datadir=None):
  759. GemtextRenderer.__init__(self,content,url,center)
  760. self.datadir = datadir
  761. def get_mime(self):
  762. return "Directory"
  763. def prepare(self,body,mode=None):
  764. def get_first_line(l):
  765. path = os.path.join(listdir,l+".gmi")
  766. with open(path) as f:
  767. first_line = f.readline().strip()
  768. f.close()
  769. if first_line.startswith("#"):
  770. return first_line
  771. else:
  772. return None
  773. def write_list(l):
  774. body = ""
  775. for li in l:
  776. path = "list:///%s"%li
  777. r = renderer_from_file(netcache.get_cache_path(path))
  778. size = len(r.get_links())
  779. body += "=> %s %s (%s items)\n" %(str(path),li,size)
  780. return body
  781. listdir = os.path.join(self.datadir,"lists")
  782. self.title = "My lists"
  783. lists = []
  784. if os.path.exists(listdir):
  785. listfiles = os.listdir(listdir)
  786. if len(listfiles) > 0:
  787. for l in listfiles:
  788. #removing the .gmi at the end of the name
  789. lists.append(l[:-4])
  790. if len(lists) > 0:
  791. body = ""
  792. my_lists = []
  793. system_lists = []
  794. subscriptions = []
  795. frozen = []
  796. lists.sort()
  797. for l in lists:
  798. if l in ["history","to_fetch","archives","tour"]:
  799. system_lists.append(l)
  800. else:
  801. first_line = get_first_line(l)
  802. if first_line and "#subscribed" in first_line:
  803. subscriptions.append(l)
  804. elif first_line and "#frozen" in first_line:
  805. frozen.append(l)
  806. else:
  807. my_lists.append(l)
  808. if len(my_lists) > 0:
  809. body+= "\n## Bookmarks Lists (updated during sync)\n"
  810. body += write_list(my_lists)
  811. if len(subscriptions) > 0:
  812. body +="\n## Subscriptions (new links in those are added to tour)\n"
  813. body += write_list(subscriptions)
  814. if len(frozen) > 0:
  815. body +="\n## Frozen (fetched but never updated)\n"
  816. body += write_list(frozen)
  817. if len(system_lists) > 0:
  818. body +="\n## System Lists\n"
  819. body += write_list(system_lists)
  820. return [[body,None]]
  821. class FeedRenderer(GemtextRenderer):
  822. def get_mime(self):
  823. return "application/rss+xml"
  824. def is_valid(self):
  825. if _DO_FEED:
  826. try:
  827. parsed = feedparser.parse(self.body)
  828. except:
  829. parsed = False
  830. else:
  831. return False
  832. if not parsed:
  833. return False
  834. elif parsed.bozo:
  835. return False
  836. else:
  837. #If no content, then fallback to HTML
  838. return len(parsed.entries) > 0
  839. def get_title(self):
  840. if not self.title:
  841. self.get_body()
  842. return self.title
  843. def prepare(self,content,mode=None,width=None):
  844. if not mode: mode = self.last_mode
  845. if not width:
  846. width = term_width()
  847. self.title = "RSS/Atom feed"
  848. toreturn = []
  849. page = ""
  850. if _DO_FEED:
  851. parsed = feedparser.parse(content)
  852. else:
  853. page += "Please install python-feedparser to handle RSS/Atom feeds\n"
  854. self.validity = False
  855. return page
  856. if parsed.bozo:
  857. page += "Invalid RSS feed\n\n"
  858. page += str(parsed.bozo_exception)
  859. self.validity = False
  860. else:
  861. if "title" in parsed.feed:
  862. t = parsed.feed.title
  863. else:
  864. t = "Unknown"
  865. self.title = "%s (XML feed)" %t
  866. title = "# %s"%self.title
  867. page += title + "\n"
  868. if "updated" in parsed.feed:
  869. page += "Last updated on %s\n\n" %parsed.feed.updated
  870. if "subtitle" in parsed.feed:
  871. page += parsed.feed.subtitle + "\n"
  872. if "link" in parsed.feed:
  873. page += "=> %s\n" %parsed.feed.link
  874. page += "\n## Entries\n"
  875. toreturn.append([page,None])
  876. if len(parsed.entries) < 1:
  877. self.validity = False
  878. postslist = ""
  879. for i in parsed.entries:
  880. if "link" in i:
  881. line = "=> %s " %i.link
  882. elif "links" in i and len(i.links) > 0:
  883. link = None
  884. j = 0
  885. while not link and j < len(i.links):
  886. link = i.links[j].href
  887. if link:
  888. line = "=> %s "%link
  889. else:
  890. line = "* "
  891. else:
  892. line = "* "
  893. if "published" in i:
  894. #sometimes fails so protect it
  895. try:
  896. pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
  897. line += pub_date + " : "
  898. except:
  899. pass
  900. if "title" in i:
  901. line += "%s" %(i.title)
  902. if "author" in i:
  903. line += " (by %s)"%i.author
  904. if mode == "full":
  905. toreturn.append([line,None])
  906. if "summary" in i:
  907. toreturn.append([i.summary,"text/html"])
  908. toreturn.append(["------------",None])
  909. else:
  910. postslist += line + "\n"
  911. #If each posts is append to toreturn, a \n is inserted
  912. #between each item of the list. I don’t like it. Hence this hack
  913. if mode != "full":
  914. toreturn.append([postslist,None])
  915. return toreturn
  916. class ImageRenderer(AbstractRenderer):
  917. def get_mime(self):
  918. return "image/*"
  919. def is_valid(self):
  920. if _RENDER_IMAGE:
  921. return True
  922. else:
  923. return False
  924. def get_links(self,mode=None):
  925. return []
  926. def get_title(self):
  927. return "Picture file"
  928. def render(self,img,width=None,mode=None,startlinks=0):
  929. #with inline, we use symbols to be rendered with less.
  930. #else we use the best possible renderer.
  931. if mode in ["full_links_only","links_only"]:
  932. return "", []
  933. if not width:
  934. width = term_width()
  935. spaces = 0
  936. else:
  937. spaces = int((term_width() - width)//2)
  938. ansi_img = inline_image(img,width)
  939. #Now centering the image
  940. lines = ansi_img.splitlines()
  941. new_img = ""
  942. for l in lines:
  943. new_img += spaces*" " + l + "\n"
  944. return new_img, []
  945. def has_direct_display(self):
  946. return _RENDER_IMAGE
  947. def display(self,mode=None,directdisplay=False):
  948. wtitle = self.get_formatted_title()
  949. if not directdisplay:
  950. body = wtitle + "\n" + self.get_body(mode=mode)
  951. return body
  952. else:
  953. print(self._window_title(wtitle))
  954. terminal_image(self.body)
  955. return True
  956. class HtmlRenderer(AbstractRenderer):
  957. def get_mime(self):
  958. return "text/html"
  959. def is_valid(self):
  960. if not _DO_HTML:
  961. print("HTML document detected. Please install python-bs4 and python-readability.")
  962. return _DO_HTML and self.validity
  963. def get_subscribe_links(self):
  964. subs = [[self.url,self.get_mime(),self.get_title()]]
  965. soup = BeautifulSoup(self.body, 'html.parser')
  966. links = soup.find_all("link",rel="alternate",recursive=True)
  967. for l in links:
  968. ty = l.get("type")
  969. if ty :
  970. if "rss" in ty or "atom" in ty or "feed" in ty:
  971. # some rss links are relatives: we absolutise_url
  972. sublink = urllib.parse.urljoin(self.url, l.get("href"))
  973. subs.append([sublink,ty,l.get("title")])
  974. return subs
  975. def get_title(self):
  976. if self.title:
  977. return self.title
  978. elif self.body:
  979. if _HAS_READABILITY:
  980. try:
  981. readable = Document(self.body)
  982. self.title = readable.short_title()
  983. return self.title
  984. except Exception as err:
  985. pass
  986. soup = BeautifulSoup(self.body,"html.parser")
  987. if soup.title:
  988. self.title = str(soup.title.string)
  989. else:
  990. self.title = ""
  991. return self.title
  992. else:
  993. return ""
  994. # Our own HTML engine (crazy, isn’t it?)
  995. # Return [rendered_body, list_of_links]
  996. # mode is either links_only, readable or full
  997. def render(self,body,mode=None,width=None,add_title=True,startlinks=0):
  998. if not mode: mode = self.last_mode
  999. if not width:
  1000. width = term_width()
  1001. if not _DO_HTML:
  1002. print("HTML document detected. Please install python-bs4 and python-readability.")
  1003. return
  1004. # This method recursively parse the HTML
  1005. r = self.representation(width,title=self.get_title(),center=self.center,theme=self.theme)
  1006. links = []
  1007. # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
  1008. # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
  1009. # HTML is real crap. At least the one people are generating.
  1010. def render_image(src,width=40,mode=None):
  1011. ansi_img = ""
  1012. imgurl,imgdata = looks_like_base64(src,self.url)
  1013. if _RENDER_IMAGE and mode not in ["full_links_only","links_only"] and imgurl:
  1014. try:
  1015. #4 followings line are there to translate the URL into cache path
  1016. img = netcache.get_cache_path(imgurl)
  1017. if imgdata:
  1018. os.makedirs(os.path.dirname(img), exist_ok=True)
  1019. with open(img,"wb") as cached:
  1020. cached.write(base64.b64decode(imgdata))
  1021. cached.close()
  1022. if netcache.is_cache_valid(img):
  1023. renderer = ImageRenderer(img,imgurl)
  1024. # Image are 40px wide except if terminal is smaller
  1025. if width > 40:
  1026. size = 40
  1027. else:
  1028. size = width
  1029. ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
  1030. except Exception as err:
  1031. #we sometimes encounter really bad formatted files or URL
  1032. ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
  1033. return ansi_img
  1034. def sanitize_string(string):
  1035. #never start with a "\n"
  1036. #string = string.lstrip("\n")
  1037. string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
  1038. endspace = string.endswith(" ") or string.endswith("\xa0")
  1039. startspace = string.startswith(" ") or string.startswith("\xa0")
  1040. toreturn = string.replace("\n", " ").replace("\t"," ").strip()
  1041. while " " in toreturn:
  1042. toreturn = toreturn.replace(" "," ")
  1043. toreturn = html.unescape(toreturn)
  1044. if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
  1045. toreturn += " "
  1046. if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
  1047. toreturn = " " + toreturn
  1048. return toreturn
  1049. def recursive_render(element,indent="",preformatted=False):
  1050. if element.name in ["blockquote", "dd"]:
  1051. r.newparagraph()
  1052. r.startindent(" ",reverse=" ")
  1053. for child in element.children:
  1054. r.open_theme("blockquote")
  1055. recursive_render(child,indent="\t")
  1056. r.close_theme("blockquote")
  1057. r.endindent()
  1058. elif element.name in ["div","p","dt"]:
  1059. r.newparagraph()
  1060. for child in element.children:
  1061. recursive_render(child,indent=indent)
  1062. r.newparagraph()
  1063. elif element.name in ["span"]:
  1064. r.add_space()
  1065. for child in element.children:
  1066. recursive_render(child,indent=indent)
  1067. r.add_space()
  1068. elif element.name in ["h1","h2","h3","h4","h5","h6"]:
  1069. if element.name in ["h1"]:
  1070. r.open_theme("title")
  1071. elif element.name in ["h2","h3"]:
  1072. r.open_theme("subtitle")
  1073. elif element.name in ["h4","h5","h6"]:
  1074. if not r.open_theme("subsubtitle"):
  1075. r.open_theme("subtitle")
  1076. r.newparagraph()
  1077. for child in element.children:
  1078. recursive_render(child)
  1079. #r.close_all()
  1080. r.close_all()
  1081. r.newparagraph()
  1082. elif element.name in ["code","tt"]:
  1083. for child in element.children:
  1084. recursive_render(child,indent=indent,preformatted=True)
  1085. elif element.name in ["pre"]:
  1086. r.newparagraph()
  1087. r.add_block(element.text,theme="preformatted")
  1088. r.newparagraph(force=True)
  1089. elif element.name in ["li"]:
  1090. r.startindent(" • ",sub=" ")
  1091. for child in element.children:
  1092. recursive_render(child,indent=indent)
  1093. r.endindent()
  1094. elif element.name in ["tr"]:
  1095. r.startindent("|",reverse="|")
  1096. for child in element.children:
  1097. recursive_render(child,indent=indent)
  1098. r.endindent()
  1099. elif element.name in ["td","th"]:
  1100. r.add_text("| ")
  1101. for child in element.children:
  1102. recursive_render(child)
  1103. r.add_text(" |")
  1104. # italics
  1105. elif element.name in ["em","i"]:
  1106. r.open_color("italic")
  1107. for child in element.children:
  1108. recursive_render(child,indent=indent,preformatted=preformatted)
  1109. r.close_color("italic")
  1110. #bold
  1111. elif element.name in ["b","strong"]:
  1112. r.open_color("bold")
  1113. for child in element.children:
  1114. recursive_render(child,indent=indent,preformatted=preformatted)
  1115. r.close_color("bold")
  1116. elif element.name == "a":
  1117. link = element.get('href')
  1118. # support for images nested in links
  1119. if link:
  1120. text = ""
  1121. imgtext = ""
  1122. #we display images first in a link
  1123. for child in element.children:
  1124. if child.name == "img":
  1125. recursive_render(child)
  1126. imgtext = "[IMG LINK %s]"
  1127. links.append(link+" "+text)
  1128. link_id = str(len(links)+startlinks)
  1129. r.open_theme("link")
  1130. for child in element.children:
  1131. if child.name != "img":
  1132. recursive_render(child,preformatted=preformatted)
  1133. if imgtext != "":
  1134. r.center_line()
  1135. r.add_text(imgtext%link_id)
  1136. else:
  1137. r.add_text(" [%s]"%link_id)
  1138. r.close_theme("link")
  1139. else:
  1140. #No real link found
  1141. for child in element.children:
  1142. recursive_render(child,preformatted=preformatted)
  1143. elif element.name == "img":
  1144. src = element.get("src")
  1145. text = ""
  1146. ansi_img = render_image(src,width=width,mode=mode)
  1147. alt = element.get("alt")
  1148. if alt:
  1149. alt = sanitize_string(alt)
  1150. text += "[IMG] %s"%alt
  1151. else:
  1152. text += "[IMG]"
  1153. if src:
  1154. if not mode in self.images:
  1155. self.images[mode] = []
  1156. abs_url,data = looks_like_base64(src,self.url)
  1157. #if abs_url is None, it means we don’t support
  1158. #the image (such as svg+xml). So we hide it.
  1159. if abs_url:
  1160. links.append(abs_url+" "+text)
  1161. self.images[mode].append(abs_url)
  1162. link_id = " [%s]"%(len(links)+startlinks)
  1163. r.add_block(ansi_img)
  1164. r.open_theme("image_link")
  1165. r.center_line()
  1166. r.add_text(text + link_id)
  1167. r.close_theme("image_link")
  1168. r.newline()
  1169. elif element.name == "video":
  1170. poster = element.get("poster")
  1171. src = element.get("src")
  1172. for child in element.children:
  1173. if not src:
  1174. if child.name == "source":
  1175. src = child.get("src")
  1176. text = ""
  1177. if poster:
  1178. ansi_img = render_image(poster,width=width,mode=mode)
  1179. alt = element.get("alt")
  1180. if alt:
  1181. alt = sanitize_string(alt)
  1182. text += "[VIDEO] %s"%alt
  1183. else:
  1184. text += "[VIDEO]"
  1185. if poster:
  1186. if not mode in self.images:
  1187. self.images[mode] = []
  1188. poster_url,d = looks_like_base64(poster,self.url)
  1189. if poster_url:
  1190. vid_url,d2 = looks_like_base64(src,self.url)
  1191. self.images[mode].append(poster_url)
  1192. r.add_block(ansi_img)
  1193. r.open_theme("image_link")
  1194. r.center_line()
  1195. if vid_url and src:
  1196. links.append(vid_url+" "+text)
  1197. link_id = " [%s]"%(len(links)+startlinks)
  1198. r.add_text(text + link_id)
  1199. else:
  1200. r.add_text(text)
  1201. r.close_theme("image_link")
  1202. r.newline()
  1203. elif src:
  1204. vid_url,d = looks_like_base64(src,self.url)
  1205. links.append(vid_url+" "+text)
  1206. link_id = " [%s]"%(len(links)+startlinks)
  1207. r.open_theme("image_link")
  1208. r.center_line()
  1209. r.add_text(text + link_id)
  1210. r.close_theme("image_link")
  1211. r.newline()
  1212. elif element.name == "br":
  1213. r.newline()
  1214. elif element.name not in ["script","style","template"] and type(element) != Comment:
  1215. if element.string:
  1216. if preformatted :
  1217. r.open_theme("preformatted")
  1218. r.add_text(element.string)
  1219. r.close_theme("preformatted")
  1220. else:
  1221. s = sanitize_string(element.string)
  1222. if len(s.strip()) > 0:
  1223. r.add_text(s)
  1224. else:
  1225. for child in element.children:
  1226. recursive_render(child,indent=indent)
  1227. # the real render_html hearth
  1228. if mode in ["full","full_links_only"]:
  1229. summary = body
  1230. elif _HAS_READABILITY:
  1231. try:
  1232. readable = Document(body)
  1233. summary = readable.summary()
  1234. except Exception as err:
  1235. summary = body
  1236. else:
  1237. summary = body
  1238. soup = BeautifulSoup(summary, 'html.parser')
  1239. #soup = BeautifulSoup(summary, 'html5lib')
  1240. if soup :
  1241. if soup.body :
  1242. recursive_render(soup.body)
  1243. else:
  1244. recursive_render(soup)
  1245. return r.get_final(),links
  1246. # Mapping mimetypes with renderers
  1247. # (any content with a mimetype text/* not listed here will be rendered with as GemText)
  1248. _FORMAT_RENDERERS = {
  1249. "text/gemini": GemtextRenderer,
  1250. "text/html" : HtmlRenderer,
  1251. "text/xml" : FeedRenderer,
  1252. "text/plain" : PlaintextRenderer,
  1253. "application/xml" : FeedRenderer,
  1254. "application/rss+xml" : FeedRenderer,
  1255. "application/atom+xml" : FeedRenderer,
  1256. "text/gopher": GopherRenderer,
  1257. "image/*": ImageRenderer,
  1258. "application/javascript": HtmlRenderer,
  1259. "application/json": HtmlRenderer,
  1260. "text/empty": EmptyRenderer,
  1261. }
  1262. def get_mime(path,url=None):
  1263. #Beware, this one is really a shaddy ad-hoc function
  1264. if not path:
  1265. return None
  1266. #If the file is empty, simply returns it
  1267. elif os.path.exists(path) and os.stat(path).st_size == 0:
  1268. return "text/empty"
  1269. elif url and url.startswith("gopher://"):
  1270. #special case for gopher
  1271. #code copy/pasted from netcache
  1272. parsed = urllib.parse.urlparse(url)
  1273. if len(parsed.path) >= 2:
  1274. itemtype = parsed.path[1]
  1275. path = parsed.path[2:]
  1276. else:
  1277. itemtype = "1"
  1278. path = ""
  1279. if itemtype == "0":
  1280. mime = "text/gemini"
  1281. elif itemtype == "1":
  1282. mime = "text/gopher"
  1283. elif itemtype == "h":
  1284. mime = "text/html"
  1285. elif itemtype in ("9","g","I","s",";"):
  1286. mime = "binary"
  1287. else:
  1288. mime = "text/gopher"
  1289. elif path.startswith("mailto:"):
  1290. mime = "mailto"
  1291. elif os.path.isdir(path):
  1292. mime = "Local Folder"
  1293. elif path.endswith(".gmi"):
  1294. mime = "text/gemini"
  1295. elif path.endswith("gophermap"):
  1296. mime = "text/gopher"
  1297. elif shutil.which("file") :
  1298. mime = run("file -b --mime-type %s", parameter=path).strip()
  1299. mime2,encoding = mimetypes.guess_type(path,strict=False)
  1300. #If we hesitate between html and xml, takes the xml one
  1301. #because the FeedRendered fallback to HtmlRenderer
  1302. if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
  1303. mime = "text/xml"
  1304. # If it’s a xml file, consider it as such, regardless of what file thinks
  1305. elif path.endswith(".xml"):
  1306. mime = "text/xml"
  1307. # If it doesn’t end with .svg, it is probably an xml, not a SVG file
  1308. elif "svg" in mime and not path.endswith(".svg"):
  1309. mime = "text/xml"
  1310. #Some xml/html document are considered as octet-stream
  1311. if mime == "application/octet-stream":
  1312. mime = "text/xml"
  1313. else:
  1314. mime,encoding = mimetypes.guess_type(path,strict=False)
  1315. #gmi Mimetype is not recognized yet
  1316. if not mime and not shutil.which("file") :
  1317. print("Cannot guess the mime type of the file. Please install \"file\".")
  1318. if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
  1319. if mime2 and mime2 in _FORMAT_RENDERERS:
  1320. mime = mime2
  1321. else:
  1322. #by default, we consider it’s gemini except for html
  1323. mime = "text/gemini"
  1324. #file doesn’t recognise gemtext. It should be the default renderer.
  1325. #the only case were it doesn’t make sense is if the file is .txt
  1326. if mime == "text/plain" and not path.endswith(".txt"):
  1327. mime = "text/gemini"
  1328. return mime
  1329. def renderer_from_file(path,url=None,theme=None):
  1330. if not path:
  1331. return None
  1332. mime = get_mime(path,url=url)
  1333. if not url:
  1334. url = path
  1335. if os.path.exists(path):
  1336. if mime.startswith("text/") or mime in _FORMAT_RENDERERS:
  1337. with open(path,errors="ignore") as f:
  1338. content = f.read()
  1339. f.close()
  1340. else:
  1341. content = path
  1342. toreturn = set_renderer(content,url,mime,theme=theme)
  1343. else:
  1344. toreturn = None
  1345. return toreturn
  1346. def set_renderer(content,url,mime,theme=None):
  1347. renderer = None
  1348. if mime == "Local Folder":
  1349. renderer = FolderRenderer("",url,datadir=xdg("data"))
  1350. if theme:
  1351. renderer.set_theme(theme)
  1352. return renderer
  1353. mime_to_use = []
  1354. for m in _FORMAT_RENDERERS:
  1355. if fnmatch.fnmatch(mime, m):
  1356. mime_to_use.append(m)
  1357. if len(mime_to_use) > 0:
  1358. current_mime = mime_to_use[0]
  1359. func = _FORMAT_RENDERERS[current_mime]
  1360. if current_mime.startswith("text"):
  1361. renderer = func(content,url)
  1362. # We double check if the renderer is correct.
  1363. # If not, we fallback to html
  1364. # (this is currently only for XHTML, often being
  1365. # mislabelled as xml thus RSS feeds)
  1366. if not renderer.is_valid():
  1367. func = _FORMAT_RENDERERS["text/html"]
  1368. #print("Set (fallback)RENDERER to html instead of %s"%mime)
  1369. renderer = func(content,url)
  1370. else:
  1371. #TODO: check this code and then remove one if.
  1372. #we don’t parse text, we give the file to the renderer
  1373. renderer = func(content,url)
  1374. if not renderer.is_valid():
  1375. renderer = None
  1376. if renderer and theme:
  1377. renderer.set_theme(theme)
  1378. return renderer
  1379. def render(input,path=None,format="auto",mime=None,url=None,mode=None):
  1380. if not url: url = ""
  1381. else: url=url[0]
  1382. if format == "gemtext":
  1383. r = GemtextRenderer(input,url)
  1384. elif format == "html":
  1385. r = HtmlRenderer(input,url)
  1386. elif format == "feed":
  1387. r = FeedRenderer(input,url)
  1388. elif format == "gopher":
  1389. r = GopherRenderer(input,url)
  1390. elif format == "image":
  1391. r = ImageRenderer(input,url)
  1392. elif format == "folder":
  1393. r = FolderRenderer(input,url)
  1394. elif format in ["plaintext","text"]:
  1395. r = PlaintextRenderer(input,url)
  1396. else:
  1397. if not mime and path:
  1398. r= renderer_from_file(path,url)
  1399. else:
  1400. r = set_renderer(input,url,mime)
  1401. if r:
  1402. r.display(directdisplay=True,mode=mode)
  1403. else:
  1404. print("Could not render %s"%input)
  1405. def main():
  1406. descri = "ansicat is a terminal rendering tool that will render multiple formats (HTML, \
  1407. Gemtext, RSS, Gophermap, Image) into ANSI text and colors.\n\
  1408. When used on a file, ansicat will try to autodetect the format. When used with \
  1409. standard input, the format must be manually specified.\n\
  1410. If the content contains links, the original URL of the content can be specified \
  1411. in order to correctly modify relatives links."
  1412. parser = argparse.ArgumentParser(prog="ansicat",description=descri)
  1413. parser.add_argument("--format", choices=["auto","gemtext","html","feed","gopher","image","folder","text","plaintext"],
  1414. help="Renderer to use. Available: auto, gemtext, html, feed, gopher, image, folder, plaintext")
  1415. parser.add_argument("--mime", help="Mime of the content to parse")
  1416. ## The argument needs to be a path to a file. If none, then stdin is used which allows
  1417. ## to pipe text directly into ansirenderer
  1418. parser.add_argument("--url",metavar="URL", nargs="*",
  1419. help="Original URL of the content")
  1420. parser.add_argument("--mode", metavar="MODE",
  1421. help="Which mode should be used to render: normal (default), full or source.\
  1422. With HTML, the normal mode try to extract the article.")
  1423. parser.add_argument("content",metavar="INPUT", nargs="*", type=argparse.FileType("r"),
  1424. default=sys.stdin, help="Path to the text to render (default to stdin)")
  1425. args = parser.parse_args()
  1426. # Detect if we are running interactively or in a pipe
  1427. if sys.stdin.isatty():
  1428. #we are interactive, not in stdin, we can have multiple files as input
  1429. if isinstance(args.content,list):
  1430. for f in args.content:
  1431. path = os.path.abspath(f.name)
  1432. try:
  1433. content = f.read()
  1434. except UnicodeDecodeError:
  1435. content = f
  1436. render(content,path=path,format=args.format,url=args.url,mime=args.mime,mode=args.mode)
  1437. else:
  1438. print("Ansicat needs at least one file as an argument")
  1439. else:
  1440. #we are in stdin
  1441. if not args.format and not args.mime:
  1442. print("Format or mime should be specified when running with stdin")
  1443. else:
  1444. render(args.content.read(),path=None,format=args.format,url=args.url,mime=args.mime,mode=args.mode)
  1445. if __name__ == '__main__':
  1446. main()