offpunk.py 167 KB


  1. #!/usr/bin/env python3
  2. # Offpunk Offline Gemini client
  3. # Derived from AV-98 by Solderpunk,
  4. # (C) 2021, 2022 Ploum <offpunk@ploum.eu>
  5. # (C) 2019, 2020 Solderpunk <solderpunk@sdf.org>
  6. # With contributions from:
  7. # - danceka <hannu.hartikainen@gmail.com>
  8. # - <jprjr@tilde.club>
  9. # - <vee@vnsf.xyz>
  10. # - Klaus Alexander Seistrup <klaus@seistrup.dk>
  11. # - govynnus <govynnus@sdf.org>
  12. # - Björn Wärmedal <bjorn.warmedal@gmail.com>
  13. # - <jake@rmgr.dev>
  14. _VERSION = "1.5"
  15. global BETA
  16. BETA = False
  17. import argparse
  18. import cmd
  19. import cgi
  20. import codecs
  21. import datetime
  22. import fnmatch
  23. import getpass
  24. import glob
  25. import hashlib
  26. import io
  27. import mimetypes
  28. import os
  29. import os.path
  30. import filecmp
  31. import random
  32. import shlex
  33. import shutil
  34. import socket
  35. import sqlite3
  36. import ssl
  37. from ssl import CertificateError
  38. import sys
  39. import tempfile
  40. import time
  41. import urllib.parse
  42. import uuid
  43. import webbrowser
  44. import html
  45. import subprocess
  46. def run(cmd,direct_output=False):
  47. if not direct_output:
  48. result = subprocess.check_output(cmd,shell=True)
  49. return result.decode()
  50. else:
  51. subprocess.run(cmd,shell=True)
  52. try:
  53. import setproctitle
  54. setproctitle.setproctitle("offpunk")
  55. _HAS_SETPROCTITLE = True
  56. except ModuleNotFoundError:
  57. _HAS_SETPROCTITLE = False
  58. import textwrap
  59. global TERM_WIDTH
  60. TERM_WIDTH = 80
  61. def term_width():
  62. width = TERM_WIDTH
  63. cur = shutil.get_terminal_size()[0]
  64. if cur < width:
  65. width = cur
  66. return width
  67. try:
  68. from PIL import Image
  69. _HAS_PIL = True
  70. except ModuleNotFoundError:
  71. _HAS_PIL = False
  72. _HAS_TIMG = shutil.which('timg')
  73. _HAS_CHAFA = shutil.which('chafa')
  74. _NEW_CHAFA = False
  75. # All this code to know if we render image inline or not
  76. if _HAS_CHAFA:
  77. # starting with 1.10, chafa can return only one frame
  78. # which allows us to drop dependancy for PIL
  79. output = run("chafa --version")
  80. # with chafa < 1.10, --version was returned to stderr instead of stdout.
  81. if output != '':
  82. _NEW_CHAFA = True
  83. if _NEW_CHAFA :
  84. _RENDER_IMAGE = True
  85. elif _HAS_TIMG :
  86. _RENDER_IMAGE = True
  87. elif _HAS_CHAFA and _HAS_PIL:
  88. _RENDER_IMAGE = True
  89. else:
  90. _RENDER_IMAGE = False
  91. print("To render images inline, you need either chafa or timg.")
  92. if not _NEW_CHAFA and not _HAS_TIMG:
  93. print("Before Chafa 1.10, you also need python-pil")
  94. #return ANSI text that can be show by less
  95. def inline_image(img_file,width):
  96. #Chafa is faster than timg inline. Let use that one by default
  97. inline = None
  98. ansi_img = ""
  99. #We avoid errors by not trying to render non-image files
  100. if shutil.which("file"):
  101. mime = run("file -b --mime-type \"%s\""%img_file).strip()
  102. if not "image" in mime:
  103. return ansi_img
  104. if _HAS_CHAFA:
  105. if _HAS_PIL and not _NEW_CHAFA:
  106. # this code is a hack to remove frames from animated gif
  107. img_obj = Image.open(img_file)
  108. if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
  109. # we remove all frames but the first one
  110. img_obj.save(img_file,format="gif",save_all=False)
  111. inline = "chafa --bg white -s %s -f symbols"
  112. elif _NEW_CHAFA:
  113. inline = "chafa --bg white -t 1 -s %s -f symbols --animate=off"
  114. if not inline and _HAS_TIMG:
  115. inline = "timg --frames=1 -p q -g %sx1000"
  116. if inline:
  117. cmd = inline%width+ " \"%s\""%img_file
  118. try:
  119. ansi_img = run(cmd)
  120. except Exception as err:
  121. ansi_img = "***image failed : %s***\n" %err
  122. return ansi_img
  123. def terminal_image(img_file):
  124. #Render by timg is better than old chafa.
  125. # it is also centered
  126. cmd = None
  127. if _HAS_TIMG:
  128. cmd = "timg --loops=1 -C"
  129. elif _HAS_CHAFA:
  130. cmd = "chafa -d 0 --bg white -t 1 -w 1"
  131. if cmd:
  132. cmd = cmd + " \"%s\""%img_file
  133. run(cmd,direct_output=True)
  134. _HAS_XSEL = shutil.which('xsel')
  135. _HAS_XDGOPEN = shutil.which('xdg-open')
  136. try:
  137. from cryptography import x509
  138. from cryptography.hazmat.backends import default_backend
  139. _HAS_CRYPTOGRAPHY = True
  140. _BACKEND = default_backend()
  141. except ModuleNotFoundError:
  142. _HAS_CRYPTOGRAPHY = False
  143. try:
  144. import requests
  145. _DO_HTTP = True
  146. except ModuleNotFoundError:
  147. _DO_HTTP = False
  148. try:
  149. from readability import Document
  150. _HAS_READABILITY = True
  151. except ModuleNotFoundError:
  152. _HAS_READABILITY = False
  153. try:
  154. from bs4 import BeautifulSoup
  155. from bs4 import Comment
  156. _HAS_SOUP = True
  157. except ModuleNotFoundError:
  158. _HAS_SOUP = False
  159. _DO_HTML = _HAS_SOUP #and _HAS_READABILITY
  160. if _DO_HTML and not _HAS_READABILITY:
  161. print("To improve your web experience (less cruft in webpages),")
  162. print("please install python3-reability or readability-lxml")
  163. try:
  164. import feedparser
  165. _DO_FEED = True
  166. except ModuleNotFoundError:
  167. _DO_FEED = False
  168. ## Config directories
  169. ## We implement our own python-xdg to avoid conflict with existing libraries.
  170. _home = os.path.expanduser('~')
  171. data_home = os.environ.get('XDG_DATA_HOME') or \
  172. os.path.join(_home,'.local','share')
  173. config_home = os.environ.get('XDG_CONFIG_HOME') or \
  174. os.path.join(_home,'.config')
  175. cache_home = os.environ.get('XDG_CACHE_HOME') or\
  176. os.path.join(_home,'.cache')
  177. _CACHE_PATH = os.path.join(cache_home,"offpunk/")
  178. _CONFIG_DIR = os.path.join(config_home,"offpunk/")
  179. _DATA_DIR = os.path.join(data_home,"offpunk/")
  180. _old_config = os.path.expanduser("~/.offpunk/")
  181. ## Look for pre-existing config directory, if any
  182. if os.path.exists(_old_config):
  183. _CONFIG_DIR = _old_config
  184. #if no XDG .local/share and not XDG .config, we use the old config
  185. if not os.path.exists(data_home) and os.path.exists(_old_config):
  186. _DATA_DIR = _CONFIG_DIR
  187. _MAX_REDIRECTS = 5
  188. _MAX_CACHE_SIZE = 10
  189. _MAX_CACHE_AGE_SECS = 180
  190. _GREP = "grep --color=auto"
  191. less_version = 0
  192. if not shutil.which("less"):
  193. print("Please install the pager \"less\" to run Offpunk.")
  194. print("If you wish to use another pager, send your request to offpunk@ploum.eu.")
  195. print("(I’m really curious to hear about people not having \"less\" on their system.)")
  196. sys.exit()
  197. output = run("less --version")
  198. # We get less Version (which is the only integer on the first line)
  199. words = output.split("\n")[0].split()
  200. less_version = 0
  201. for w in words:
  202. if w.isdigit():
  203. less_version = int(w)
  204. # restoring position only works for version of less > 572
  205. if less_version >= 572:
  206. _LESS_RESTORE_POSITION = True
  207. else:
  208. _LESS_RESTORE_POSITION = False
  209. #_DEFAULT_LESS = "less -EXFRfM -PMurl\ lines\ \%lt-\%lb/\%L\ \%Pb\%$ %s"
  210. # -E : quit when reaching end of file (to behave like "cat")
  211. # -F : quit if content fits the screen (behave like "cat")
  212. # -X : does not clear the screen
  213. # -R : interpret ANSI colors correctly
  214. # -f : suppress warning for some contents
  215. # -M : long prompt (to have info about where you are in the file)
  216. # -W : hilite the new first line after a page skip (space)
  217. # -i : ignore case in search
  218. # -S : do not wrap long lines. Wrapping is done by offpunk, longlines
  219. # are there on purpose (surch in asciiart)
  220. #--incsearch : incremental search starting rev581
  221. if less_version >= 581:
  222. less_base = "less --incsearch --save-marks -~ -XRfMWiS"
  223. elif less_version >= 572:
  224. less_base = "less --save-marks -XRfMWiS"
  225. else:
  226. less_base = "less -XRfMWiS"
  227. _DEFAULT_LESS = less_base + " \"+''\" %s"
  228. _DEFAULT_CAT = less_base + " -EF %s"
  229. def less_cmd(file, histfile=None,cat=False,grep=None):
  230. file = "\"%s\""%file
  231. if histfile:
  232. prefix = "LESSHISTFILE=%s "%histfile
  233. else:
  234. prefix = ""
  235. if cat:
  236. cmd_str = prefix + _DEFAULT_CAT % file
  237. elif grep:
  238. grep_cmd = _GREP
  239. #case insensitive for lowercase search
  240. if grep.islower():
  241. grep_cmd += " -i"
  242. cmd_str = prefix + _DEFAULT_CAT % file + "|" + grep_cmd + " %s"%grep
  243. else:
  244. cmd_str = prefix + _DEFAULT_LESS % file
  245. run(cmd_str,direct_output=True)
  246. # Command abbreviations
  247. _ABBREVS = {
  248. "a": "add",
  249. "b": "back",
  250. "bb": "blackbox",
  251. "bm": "bookmarks",
  252. "book": "bookmarks",
  253. "cp": "copy",
  254. "f": "forward",
  255. "g": "go",
  256. "h": "history",
  257. "hist": "history",
  258. "l": "view",
  259. "less": "view",
  260. "man": "help",
  261. "mv": "move",
  262. "n": "next",
  263. "off": "offline",
  264. "on": "online",
  265. "p": "previous",
  266. "prev": "previous",
  267. "q": "quit",
  268. "r": "reload",
  269. "s": "save",
  270. "se": "search",
  271. "/": "search",
  272. "t": "tour",
  273. "u": "up",
  274. "v": "view",
  275. }
  276. _MIME_HANDLERS = {
  277. "application/pdf": "zathura %s",
  278. "audio/mpeg": "mpg123 %s",
  279. "audio/ogg": "ogg123 %s",
  280. "image/*": "feh -. %s",
  281. #"text/html": "lynx -dump -force_html %s",
  282. }
  283. # monkey-patch Gemini support in urllib.parse
  284. # see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py
  285. urllib.parse.uses_relative.append("gemini")
  286. urllib.parse.uses_netloc.append("gemini")
  287. urllib.parse.uses_relative.append("spartan")
  288. urllib.parse.uses_netloc.append("spartan")
  289. #An IPV6 URL should be put between []
  290. #We try to detect them has location with more than 2 ":"
  291. def fix_ipv6_url(url):
  292. if not url or url.startswith("mailto"):
  293. return url
  294. if "://" in url:
  295. schema, schemaless = url.split("://",maxsplit=1)
  296. else:
  297. schema, schemaless = None, url
  298. if "/" in schemaless:
  299. netloc, rest = schemaless.split("/",1)
  300. if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
  301. schemaless = "[" + netloc + "]" + "/" + rest
  302. elif schemaless.count(":") > 2:
  303. schemaless = "[" + schemaless + "]/"
  304. if schema:
  305. return schema + "://" + schemaless
  306. return schemaless
  307. # This list is also used as a list of supported protocols
  308. standard_ports = {
  309. "gemini" : 1965,
  310. "gopher" : 70,
  311. "http" : 80,
  312. "https" : 443,
  313. "spartan": 300,
  314. }
  315. # First, we define the different content->text renderers, outside of the rest
  316. # (They could later be factorized in other files or replaced)
  317. class AbstractRenderer():
  318. def __init__(self,content,url,center=True):
  319. self.url = url
  320. self.body = content
  321. #there’s one rendered text and one links table per mode
  322. self.rendered_text = {}
  323. self.links = {}
  324. self.images = {}
  325. self.title = None
  326. self.validity = True
  327. self.temp_file = {}
  328. self.less_histfile = {}
  329. self.center = center
  330. #This class hold an internal representation of the HTML text
  331. class representation:
  332. def __init__(self,width,title=None,center=True):
  333. self.title=title
  334. self.center = center
  335. self.final_text = ""
  336. self.opened = []
  337. self.width = width
  338. self.last_line = ""
  339. self.last_line_colors = {}
  340. self.last_line_center = False
  341. self.new_paragraph = True
  342. self.i_indent = ""
  343. self.s_indent = ""
  344. self.r_indent = ""
  345. self.current_indent = ""
  346. self.disabled_indents = None
  347. # each color is an [open,close] pair code
  348. self.colors = {
  349. "bold" : ["1","22"],
  350. "faint" : ["2","22"],
  351. "italic" : ["3","23"],
  352. "underline": ["4","24"],
  353. "red" : ["31","39"],
  354. "yellow" : ["33","39"],
  355. "blue" : ["34","39"],
  356. }
  357. def _insert(self,color,open=True):
  358. if open: o = 0
  359. else: o = 1
  360. pos = len(self.last_line)
  361. #we remember the position where to insert color codes
  362. if not pos in self.last_line_colors:
  363. self.last_line_colors[pos] = []
  364. #Two inverse code cancel each other
  365. if [color,int(not o)] in self.last_line_colors[pos]:
  366. self.last_line_colors[pos].remove([color,int(not o)])
  367. else:
  368. self.last_line_colors[pos].append([color,o])#+color+str(o))
  369. # Take self.last line and add ANSI codes to it before adding it to
  370. # self.final_text.
  371. def _endline(self):
  372. if len(self.last_line.strip()) > 0:
  373. for c in self.opened:
  374. self._insert(c,open=False)
  375. nextline = ""
  376. added_char = 0
  377. #we insert the color code at the saved positions
  378. while len (self.last_line_colors) > 0:
  379. pos,colors = self.last_line_colors.popitem()
  380. #popitem itterates LIFO.
  381. #So we go, backward, to the pos (starting at the end of last_line)
  382. nextline = self.last_line[pos:] + nextline
  383. ansicol = "\x1b["
  384. for c,o in colors:
  385. ansicol += self.colors[c][o] + ";"
  386. ansicol = ansicol[:-1]+"m"
  387. nextline = ansicol + nextline
  388. added_char += len(ansicol)
  389. self.last_line = self.last_line[:pos]
  390. nextline = self.last_line + nextline
  391. if self.last_line_center:
  392. #we have to care about the ansi char while centering
  393. width = term_width() + added_char
  394. nextline = nextline.strip().center(width)
  395. self.last_line_center = False
  396. else:
  397. #should we lstrip the nextline in the addition ?
  398. nextline = self.current_indent + nextline.lstrip() + self.r_indent
  399. self.current_indent = self.s_indent
  400. self.final_text += nextline
  401. self.last_line = ""
  402. self.final_text += "\n"
  403. for c in self.opened:
  404. self._insert(c,open=True)
  405. else:
  406. self.last_line = ""
  407. def center_line(self):
  408. self.last_line_center = True
  409. def open_color(self,color):
  410. if color in self.colors and color not in self.opened:
  411. self._insert(color,open=True)
  412. self.opened.append(color)
  413. def close_color(self,color):
  414. if color in self.colors and color in self.opened:
  415. self._insert(color,open=False)
  416. self.opened.remove(color)
  417. def close_all(self):
  418. if len(self.colors) > 0:
  419. self.last_line += "\x1b[0m"
  420. self.opened.clear()
  421. def startindent(self,indent,sub=None,reverse=None):
  422. self._endline()
  423. self.i_indent = indent
  424. self.current_indent = indent
  425. if sub:
  426. self.s_indent = sub
  427. else:
  428. self.s_indent = indent
  429. if reverse:
  430. self.r_indent = reverse
  431. else:
  432. self.r_indent = ""
  433. def endindent(self):
  434. self._endline()
  435. self.i_indent = ""
  436. self.s_indent = ""
  437. self.r_indent = ""
  438. self.current_indent = ""
  439. def _disable_indents(self):
  440. self.disabled_indents = []
  441. self.disabled_indents.append(self.current_indent)
  442. self.disabled_indents.append(self.i_indent)
  443. self.disabled_indents.append(self.s_indent)
  444. self.disabled_indents.append(self.r_indent)
  445. self.endindent()
  446. def _enable_indents(self):
  447. if self.disabled_indents:
  448. self.current_indent = self.disabled_indents[0]
  449. self.i_indent = self.disabled_indents[1]
  450. self.s_indent = self.disabled_indents[2]
  451. self.r_indent = self.disabled_indents[3]
  452. self.disabled_indents = None
  453. def newline(self):
  454. self._endline()
  455. #A new paragraph implies 2 newlines (1 blank line between paragraphs)
  456. #But it is only used if didn’t already started one to avoid plenty
  457. #of blank lines. force=True allows to bypass that limit.
  458. #new_paragraph becomes false as soon as text is entered into it
  459. def newparagraph(self,force=False):
  460. if force or not self.new_paragraph:
  461. self._endline()
  462. self.final_text += "\n"
  463. self.new_paragraph = True
  464. def add_space(self):
  465. if len(self.last_line) > 0 and self.last_line[-1] != " ":
  466. self.last_line += " "
  467. def _title_first(self,intext=None):
  468. if self.title:
  469. if not self.title == intext:
  470. self._disable_indents()
  471. self.open_color("blue")
  472. self.open_color("bold")
  473. self.open_color("underline")
  474. self.add_text(self.title)
  475. self.close_all()
  476. self.newparagraph()
  477. self._enable_indents()
  478. self.title = None
  479. # Beware, blocks are not wrapped nor indented and left untouched!
  480. # They are mostly useful for pictures and preformatted text.
  481. def add_block(self,intext):
  482. # If necessary, we add the title before a block
  483. self._title_first()
  484. # we don’t want to indent blocks
  485. self._endline()
  486. self._disable_indents()
  487. self.final_text += self.current_indent + intext
  488. self.new_paragraph = False
  489. self._endline()
  490. self._enable_indents()
  491. def add_text(self,intext):
  492. self._title_first(intext=intext)
  493. lines = []
  494. last = (self.last_line + intext)
  495. self.last_line = ""
  496. # With the following, we basically cancel adding only spaces
  497. # on an empty line
  498. if len(last.strip()) > 0:
  499. self.new_paragraph = False
  500. else:
  501. last = last.strip()
  502. if len(last) > self.width:
  503. width = self.width - len(self.current_indent) - len(self.r_indent)
  504. spaces_left = len(last) - len(last.lstrip())
  505. spaces_right = len(last) - len(last.rstrip())
  506. lines = textwrap.wrap(last,width,drop_whitespace=True)
  507. self.last_line += spaces_left*" "
  508. while len(lines) > 1:
  509. l = lines.pop(0)
  510. self.last_line += l
  511. self._endline()
  512. if len(lines) == 1:
  513. li = lines[0]
  514. self.last_line += li + spaces_right*" "
  515. else:
  516. self.last_line = last
  517. def get_final(self):
  518. self.close_all()
  519. self._endline()
  520. #if no content, we still add the title
  521. self._title_first()
  522. lines = self.final_text.splitlines()
  523. lines2 = []
  524. termspace = shutil.get_terminal_size()[0]
  525. #Following code instert blanck spaces to center the content
  526. if self.center and termspace > term_width():
  527. margin = int((termspace - term_width())//2)
  528. else:
  529. margin = 0
  530. for l in lines :
  531. lines2.append(margin*" "+l)
  532. return "\n".join(lines2)
  533. def get_subscribe_links(self):
  534. return [[self.url,self.get_mime(),self.get_title()]]
  535. def is_valid(self):
  536. return self.validity
  537. def get_links(self,mode="links_only"):
  538. if mode not in self.links :
  539. prepared_body = self.prepare(self.body,mode=mode)
  540. results = self.render(prepared_body,mode=mode)
  541. if results:
  542. self.links[mode] = results[1]
  543. return self.links[mode]
  544. def get_title(self):
  545. return "Abstract title"
  546. # This function return a list of URL which should be downloaded
  547. # before displaying the page (images in HTML pages, typically)
  548. def get_images(self,mode="readable"):
  549. if not mode in self.images:
  550. self.get_body(mode=mode)
  551. # we also invalidate the body that was done without images
  552. self.rendered_text.pop(mode)
  553. if mode in self.images:
  554. return self.images[mode]
  555. else:
  556. return []
  557. #This function will give gemtext to the gemtext renderer
  558. def prepare(self,body,mode=None):
  559. return body
  560. def get_body(self,width=None,mode="readable"):
  561. if not width:
  562. width = term_width()
  563. if mode not in self.rendered_text:
  564. prepared_body = self.prepare(self.body,mode=mode)
  565. result = self.render(prepared_body,width=width,mode=mode)
  566. if result:
  567. self.rendered_text[mode] = result[0]
  568. self.links[mode] = result[1]
  569. return self.rendered_text[mode]
  570. def _window_title(self,title,info=None):
  571. title_r = self.representation(term_width())
  572. title_r.open_color("red")
  573. title_r.open_color("bold")
  574. title_r.add_text(title)
  575. title_r.close_color("bold")
  576. if info:
  577. title_r.add_text(" (%s)"%info)
  578. title_r.close_color("red")
  579. return title_r.get_final()
  580. def display(self,mode="readable",window_title="",window_info=None,grep=None):
  581. wtitle = self._window_title(window_title,info=window_info)
  582. body = wtitle + "\n" + self.get_body(mode=mode)
  583. if not body:
  584. return False
  585. # We actually put the body in a tmpfile before giving it to less
  586. if mode not in self.temp_file:
  587. tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
  588. self.temp_file[mode] = tmpf.name
  589. tmpf.write(body)
  590. tmpf.close()
  591. if mode not in self.less_histfile:
  592. firsttime = True
  593. tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
  594. self.less_histfile[mode] = tmpf.name
  595. else:
  596. firsttime = False
  597. less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
  598. return True
  599. def get_temp_file(self,mode="readable"):
  600. if mode in self.temp_file:
  601. return self.temp_file[mode]
  602. else:
  603. return None
  604. # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
  605. # 3 modes are used : readable (by default), full and links_only (the fastest, when
  606. # rendered content is not used, only the links are needed)
  607. # The prepare() function is called before the rendering. It is useful if
  608. # your renderer output in a format suitable for another existing renderer (such as gemtext)
  609. # Gemtext Rendering Engine
  610. class GemtextRenderer(AbstractRenderer):
  611. def get_mime(self):
  612. return "text/gemini"
  613. def get_title(self):
  614. if self.title:
  615. return self.title
  616. elif self.body:
  617. lines = self.body.splitlines()
  618. for line in lines:
  619. if line.startswith("#"):
  620. self.title = line.strip("#").strip()
  621. return self.title
  622. if len(lines) > 0:
  623. # If not title found, we take the first 50 char
  624. # of the first line
  625. title_line = lines[0].strip()
  626. if len(title_line) > 50:
  627. title_line = title_line[:49] + "…"
  628. self.title = title_line
  629. return self.title
  630. else:
  631. self.title = "Empty Page"
  632. return self.title
  633. else:
  634. return "Unknown Gopher Page"
  635. #render_gemtext
  636. def render(self,gemtext, width=None,mode=None):
  637. if not width:
  638. width = term_width()
  639. r = self.representation(width)
  640. links = []
  641. preformatted = False
  642. def format_link(url,index,name=None):
  643. if "://" in url:
  644. protocol,adress = url.split("://",maxsplit=1)
  645. protocol = " %s" %protocol
  646. else:
  647. adress = url
  648. protocol = ""
  649. if "gemini" in protocol or "list" in protocol:
  650. protocol = ""
  651. if not name:
  652. name = adress
  653. line = "[%d%s] %s" % (index, protocol, name)
  654. return line
  655. for line in gemtext.splitlines():
  656. r.newline()
  657. if line.startswith("```"):
  658. preformatted = not preformatted
  659. elif preformatted:
  660. # infinite line to not wrap preformated
  661. r.add_block(line+"\n")
  662. elif len(line.strip()) == 0:
  663. r.newparagraph(force=True)
  664. elif line.startswith("=>"):
  665. strippedline = line[2:].strip()
  666. if strippedline:
  667. links.append(strippedline)
  668. splitted = strippedline.split(maxsplit=1)
  669. url = splitted[0]
  670. name = None
  671. if len(splitted) > 1:
  672. name = splitted[1]
  673. link = format_link(url,len(links),name=name)
  674. #r.open_color("blue")
  675. #r.open_color("faint")
  676. #r.open_color("underline")
  677. startpos = link.find("] ") + 2
  678. r.startindent("",sub=startpos*" ")
  679. r.add_text(link)
  680. r.endindent()
  681. #r.close_all()
  682. elif line.startswith("* "):
  683. line = line[1:].lstrip("\t ")
  684. r.startindent("• ",sub=" ")
  685. r.add_text(line)
  686. r.endindent()
  687. elif line.startswith(">"):
  688. line = line[1:].lstrip("\t ")
  689. r.startindent("> ")
  690. r.add_text(line)
  691. r.endindent()
  692. elif line.startswith("###"):
  693. line = line[3:].lstrip("\t ")
  694. r.open_color("blue")
  695. r.add_text(line)
  696. r.close_color("blue")
  697. elif line.startswith("##"):
  698. line = line[2:].lstrip("\t ")
  699. r.open_color("blue")
  700. r.add_text(line)
  701. r.close_color("blue")
  702. elif line.startswith("#"):
  703. line = line[1:].lstrip("\t ")
  704. if not self.title:
  705. self.title = line
  706. r.open_color("bold")
  707. r.open_color("blue")
  708. r.open_color("underline")
  709. r.add_text(line)
  710. r.close_color("underline")
  711. r.close_color("bold")
  712. r.close_color("blue")
  713. else:
  714. r.add_text(line.rstrip())
  715. return r.get_final(), links
  716. class GopherRenderer(AbstractRenderer):
  717. def get_mime(self):
  718. return "text/gopher"
  719. def get_title(self):
  720. if not self.title:
  721. self.title = ""
  722. if self.body:
  723. firstline = self.body.splitlines()[0]
  724. firstline = firstline.split("\t")[0]
  725. if firstline.startswith("i"):
  726. firstline = firstline[1:]
  727. self.title = firstline
  728. return self.title
  729. #menu_or_text
  730. def render(self,body,width=None,mode=None):
  731. if not width:
  732. width = term_width()
  733. try:
  734. render,links = self._render_goph(body,width=width,mode=mode)
  735. except Exception as err:
  736. print("Error rendering Gopher ",err)
  737. r = self.representation(width)
  738. r.add_block(body)
  739. render = r.get_final()
  740. links = []
  741. return render,links
  742. def _render_goph(self,body,width=None,mode=None):
  743. if not width:
  744. width = term_width()
  745. # This was copied straight from Agena (then later adapted)
  746. links = []
  747. r = self.representation(width)
  748. for line in self.body.split("\n"):
  749. r.newline()
  750. if line.startswith("i"):
  751. towrap = line[1:].split("\t")[0]
  752. if len(towrap.strip()) > 0:
  753. r.add_text(towrap)
  754. else:
  755. r.newparagraph()
  756. elif not line.strip() in [".",""]:
  757. parts = line.split("\t")
  758. parts[-1] = parts[-1].strip()
  759. if parts[-1] == "+":
  760. parts = parts[:-1]
  761. if len(parts) == 4:
  762. name,path,host,port = parts
  763. itemtype = name[0]
  764. name = name[1:]
  765. if port == "70":
  766. port = ""
  767. else:
  768. port = ":%s"%port
  769. if itemtype == "h" and path.startswith("URL:"):
  770. url = path[4:]
  771. else:
  772. if not path.startswith("/"):
  773. path = "/"+path
  774. url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
  775. url = url.replace(" ","%20")
  776. linkline = url + " " + name
  777. links.append(linkline)
  778. towrap = "[%s] "%len(links)+ name
  779. r.add_text(towrap)
  780. else:
  781. r.add_text(line)
  782. return r.get_final(),links
  783. class FolderRenderer(GemtextRenderer):
  784. def get_mime(self):
  785. return "Directory"
  786. def prepare(self,body,mode=None):
  787. def get_first_line(l):
  788. path = os.path.join(listdir,l+".gmi")
  789. with open(path) as f:
  790. first_line = f.readline().strip()
  791. f.close()
  792. if first_line.startswith("#"):
  793. return first_line
  794. else:
  795. return None
  796. def write_list(l):
  797. body = ""
  798. for li in l:
  799. path = "list:///%s"%li
  800. gi = GeminiItem(path)
  801. size = len(gi.get_links())
  802. body += "=> %s %s (%s items)\n" %(str(path),li,size)
  803. return body
  804. listdir = os.path.join(_DATA_DIR,"lists")
  805. if self.url != listdir:
  806. return "This is folder %s" %self.url
  807. else:
  808. self.title = "My lists"
  809. lists = []
  810. if os.path.exists(listdir):
  811. listfiles = os.listdir(listdir)
  812. if len(listfiles) > 0:
  813. for l in listfiles:
  814. #removing the .gmi at the end of the name
  815. lists.append(l[:-4])
  816. if len(lists) > 0:
  817. body = ""
  818. my_lists = []
  819. system_lists = []
  820. subscriptions = []
  821. frozen = []
  822. lists.sort()
  823. for l in lists:
  824. if l in ["history","to_fetch","archives","tour"]:
  825. system_lists.append(l)
  826. else:
  827. first_line = get_first_line(l)
  828. if first_line and "#subscribed" in first_line:
  829. subscriptions.append(l)
  830. elif first_line and "#frozen" in first_line:
  831. frozen.append(l)
  832. else:
  833. my_lists.append(l)
  834. if len(my_lists) > 0:
  835. body+= "\n## Bookmarks Lists (updated during sync)\n"
  836. body += write_list(my_lists)
  837. if len(subscriptions) > 0:
  838. body +="\n## Subscriptions (new links in those are added to tour)\n"
  839. body += write_list(subscriptions)
  840. if len(frozen) > 0:
  841. body +="\n## Frozen (fetched but never updated)\n"
  842. body += write_list(frozen)
  843. if len(system_lists) > 0:
  844. body +="\n## System Lists\n"
  845. body += write_list(system_lists)
  846. return body
  847. class FeedRenderer(GemtextRenderer):
  848. def get_mime(self):
  849. return "application/rss+xml"
  850. def is_valid(self):
  851. if _DO_FEED:
  852. parsed = feedparser.parse(self.body)
  853. else:
  854. return False
  855. if parsed.bozo:
  856. return False
  857. else:
  858. #If no content, then fallback to HTML
  859. return len(parsed.entries) > 0
  860. def get_title(self):
  861. if not self.title:
  862. self.get_body()
  863. return self.title
  864. def prepare(self,content,mode="readable",width=None):
  865. if not width:
  866. width = term_width()
  867. self.title = "RSS/Atom feed"
  868. page = ""
  869. if _DO_FEED:
  870. parsed = feedparser.parse(content)
  871. else:
  872. page += "Please install python-feedparser to handle RSS/Atom feeds\n"
  873. self.validity = False
  874. return page
  875. if parsed.bozo:
  876. page += "Invalid RSS feed\n\n"
  877. page += str(parsed.bozo_exception)
  878. self.validity = False
  879. else:
  880. if "title" in parsed.feed:
  881. t = parsed.feed.title
  882. else:
  883. t = "Unknown"
  884. self.title = "%s (XML feed)" %t
  885. title = "# %s"%self.title
  886. page += title + "\n"
  887. if "updated" in parsed.feed:
  888. page += "Last updated on %s\n\n" %parsed.feed.updated
  889. if "subtitle" in parsed.feed:
  890. page += parsed.feed.subtitle + "\n"
  891. if "link" in parsed.feed:
  892. page += "=> %s\n" %parsed.feed.link
  893. page += "\n## Entries\n"
  894. if len(parsed.entries) < 1:
  895. self.validity = False
  896. for i in parsed.entries:
  897. line = "=> %s " %i.link
  898. if "published" in i:
  899. pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
  900. line += pub_date + " : "
  901. line += "%s" %(i.title)
  902. if "author" in i:
  903. line += " (by %s)"%i.author
  904. page += line + "\n"
  905. if mode == "full":
  906. if "summary" in i:
  907. html = HtmlRenderer(i.summary,self.url,center=False)
  908. rendered = html.get_body(width=None,mode="full")
  909. page += rendered
  910. page += "\n"
  911. return page
  912. class ImageRenderer(AbstractRenderer):
  913. def get_mime(self):
  914. return "image/*"
  915. def is_valid(self):
  916. if _RENDER_IMAGE:
  917. return True
  918. else:
  919. return False
  920. def get_links(self,mode=None):
  921. return []
  922. def get_title(self):
  923. return "Picture file"
  924. def render(self,img,width=None,mode=None):
  925. #with inline, we use symbols to be rendered with less.
  926. #else we use the best possible renderer.
  927. if mode == "links_only":
  928. return "", []
  929. if not width:
  930. width = term_width()
  931. spaces = 0
  932. else:
  933. spaces = int((term_width() - width)//2)
  934. ansi_img = inline_image(img,width)
  935. #Now centering the image
  936. lines = ansi_img.splitlines()
  937. new_img = ""
  938. for l in lines:
  939. new_img += spaces*" " + l + "\n"
  940. return new_img, []
  941. def display(self,mode=None,window_title=None,window_info=None,grep=None):
  942. if window_title:
  943. print(self._window_title(window_title,info=window_info))
  944. terminal_image(self.body)
  945. return True
  946. class HtmlRenderer(AbstractRenderer):
  947. def get_mime(self):
  948. return "text/html"
  949. def is_valid(self):
  950. if not _DO_HTML:
  951. print("HTML document detected. Please install python-bs4 and python-readability.")
  952. return _DO_HTML and self.validity
  953. def get_subscribe_links(self):
  954. subs = [[self.url,self.get_mime(),self.get_title()]]
  955. soup = BeautifulSoup(self.body, 'html.parser')
  956. links = soup.find_all("link",rel="alternate",recursive=True)
  957. for l in links:
  958. ty = l.get("type")
  959. if ty :
  960. if "rss" in ty or "atom" in ty or "feed" in ty:
  961. subs.append([l.get("href"),ty,l.get("title")])
  962. return subs
  963. def get_title(self):
  964. if self.title:
  965. return self.title
  966. elif self.body:
  967. if _HAS_READABILITY:
  968. try:
  969. readable = Document(self.body)
  970. self.title = readable.short_title()
  971. return self.title
  972. except Exception as err:
  973. pass
  974. soup = BeautifulSoup(self.body,"html.parser")
  975. self.title = str(soup.title.string)
  976. else:
  977. return ""
  978. # Our own HTML engine (crazy, isn’t it?)
  979. # Return [rendered_body, list_of_links]
  980. # mode is either links_only, readable or full
  981. def render(self,body,mode="readable",width=None,add_title=True):
  982. if not width:
  983. width = term_width()
  984. if not _DO_HTML:
  985. print("HTML document detected. Please install python-bs4 and python-readability.")
  986. return
  987. # This method recursively parse the HTML
  988. r = self.representation(width,title=self.get_title(),center=self.center)
  989. links = []
  990. # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
  991. # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
  992. # HTML is real crap. At least the one people are generating.
  993. def render_image(src,width=40,mode=None):
  994. ansi_img = ""
  995. abs_url = urllib.parse.urljoin(self.url, src)
  996. if _RENDER_IMAGE and mode != "links_only" and src:
  997. try:
  998. #4 followings line are there to translate the URL into cache path
  999. g = GeminiItem(abs_url)
  1000. if g.is_cache_valid():
  1001. img = g.get_cache_path()
  1002. renderer = ImageRenderer(img,abs_url)
  1003. # Image are 40px wide except if terminal is smaller
  1004. if width > 40:
  1005. size = 40
  1006. else:
  1007. size = width
  1008. ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
  1009. except Exception as err:
  1010. #we sometimes encounter really bad formatted files or URL
  1011. ansi_img = textwrap.fill("[BAD IMG] %s"%src,width) + "\n"
  1012. return ansi_img
  1013. def sanitize_string(string):
  1014. #never start with a "\n"
  1015. #string = string.lstrip("\n")
  1016. string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
  1017. endspace = string.endswith(" ") or string.endswith("\xa0")
  1018. startspace = string.startswith(" ") or string.startswith("\xa0")
  1019. toreturn = string.replace("\n", " ").replace("\t"," ").strip()
  1020. while " " in toreturn:
  1021. toreturn = toreturn.replace(" "," ")
  1022. toreturn = html.unescape(toreturn)
  1023. if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
  1024. toreturn += " "
  1025. if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
  1026. toreturn = " " + toreturn
  1027. return toreturn
  1028. def recursive_render(element,indent="",preformatted=False):
  1029. if element.name == "blockquote":
  1030. r.newparagraph()
  1031. r.startindent(" ",reverse=" ")
  1032. for child in element.children:
  1033. r.open_color("italic")
  1034. recursive_render(child,indent="\t")
  1035. r.close_color("italic")
  1036. r.endindent()
  1037. elif element.name in ["div","p"]:
  1038. r.newparagraph()
  1039. for child in element.children:
  1040. recursive_render(child,indent=indent)
  1041. r.newparagraph()
  1042. elif element.name in ["span"]:
  1043. r.add_space()
  1044. for child in element.children:
  1045. recursive_render(child,indent=indent)
  1046. r.add_space()
  1047. elif element.name in ["h1","h2","h3","h4","h5","h6"]:
  1048. r.open_color("blue")
  1049. if element.name in ["h1"]:
  1050. r.open_color("bold")
  1051. r.open_color("underline")
  1052. elif element.name in ["h2"]:
  1053. r.open_color("bold")
  1054. elif element.name in ["h5","h6"]:
  1055. r.open_color("faint")
  1056. for child in element.children:
  1057. r.newparagraph()
  1058. recursive_render(child)
  1059. r.newparagraph()
  1060. r.close_all()
  1061. elif element.name in ["code","tt"]:
  1062. for child in element.children:
  1063. recursive_render(child,indent=indent,preformatted=True)
  1064. elif element.name in ["pre"]:
  1065. r.newparagraph()
  1066. r.add_block(element.text)
  1067. r.newparagraph()
  1068. elif element.name in ["li"]:
  1069. r.startindent(" • ",sub=" ")
  1070. for child in element.children:
  1071. recursive_render(child,indent=indent)
  1072. r.endindent()
  1073. elif element.name in ["tr"]:
  1074. r.startindent("|",reverse="|")
  1075. for child in element.children:
  1076. recursive_render(child,indent=indent)
  1077. r.endindent()
  1078. elif element.name in ["td","th"]:
  1079. r.add_text("| ")
  1080. for child in element.children:
  1081. recursive_render(child)
  1082. r.add_text(" |")
  1083. # italics
  1084. elif element.name in ["em","i"]:
  1085. r.open_color("italic")
  1086. for child in element.children:
  1087. recursive_render(child,indent=indent,preformatted=preformatted)
  1088. r.close_color("italic")
  1089. #bold
  1090. elif element.name in ["b","strong"]:
  1091. r.open_color("bold")
  1092. for child in element.children:
  1093. recursive_render(child,indent=indent,preformatted=preformatted)
  1094. r.close_color("bold")
  1095. elif element.name == "a":
  1096. link = element.get('href')
  1097. # support for images nested in links
  1098. if link:
  1099. text = ""
  1100. imgtext = ""
  1101. #we display images first in a link
  1102. for child in element.children:
  1103. if child.name == "img":
  1104. recursive_render(child)
  1105. imgtext = "[IMG LINK %s]"
  1106. links.append(link+" "+text)
  1107. link_id = str(len(links))
  1108. r.open_color("blue")
  1109. r.open_color("faint")
  1110. for child in element.children:
  1111. if child.name != "img":
  1112. recursive_render(child,preformatted=preformatted)
  1113. if imgtext != "":
  1114. r.center_line()
  1115. r.add_text(imgtext%link_id)
  1116. else:
  1117. r.add_text(" [%s]"%link_id)
  1118. r.close_color("blue")
  1119. r.close_color("faint")
  1120. else:
  1121. #No real link found
  1122. for child in element.children:
  1123. recursive_render(child,preformatted=preformatted)
  1124. elif element.name == "img":
  1125. src = element.get("src")
  1126. text = ""
  1127. ansi_img = render_image(src,width=width,mode=mode)
  1128. alt = element.get("alt")
  1129. if alt:
  1130. alt = sanitize_string(alt)
  1131. text += "[IMG] %s"%alt
  1132. else:
  1133. text += "[IMG]"
  1134. if src:
  1135. links.append(src+" "+text)
  1136. if not mode in self.images:
  1137. self.images[mode] = []
  1138. abs_url = urllib.parse.urljoin(self.url, src)
  1139. self.images[mode].append(abs_url)
  1140. link_id = " [%s]"%(len(links))
  1141. r.add_block(ansi_img)
  1142. r.open_color("faint")
  1143. r.open_color("yellow")
  1144. r.center_line()
  1145. r.add_text(text + link_id)
  1146. r.close_color("faint")
  1147. r.close_color("yellow")
  1148. r.newline()
  1149. elif element.name == "br":
  1150. r.newline()
  1151. elif element.name not in ["script","style","template"] and type(element) != Comment:
  1152. if element.string:
  1153. if preformatted :
  1154. r.open_color("faint")
  1155. r.add_text(element.string)
  1156. r.close_color("faint")
  1157. else:
  1158. s = sanitize_string(element.string)
  1159. if len(s.strip()) > 0:
  1160. r.add_text(s)
  1161. else:
  1162. for child in element.children:
  1163. recursive_render(child,indent=indent)
  1164. # the real render_html hearth
  1165. if mode == "full":
  1166. summary = body
  1167. elif _HAS_READABILITY:
  1168. try:
  1169. readable = Document(body)
  1170. summary = readable.summary()
  1171. except Exception as err:
  1172. summary = body
  1173. else:
  1174. summary = body
  1175. soup = BeautifulSoup(summary, 'html.parser')
  1176. #soup = BeautifulSoup(summary, 'html5lib')
  1177. if soup :
  1178. if soup.body :
  1179. recursive_render(soup.body)
  1180. else:
  1181. recursive_render(soup)
  1182. return r.get_final(),links
  1183. # Mapping mimetypes with renderers
  1184. # (any content with a mimetype text/* not listed here will be rendered with as GemText)
  1185. _FORMAT_RENDERERS = {
  1186. "text/gemini": GemtextRenderer,
  1187. "text/html" : HtmlRenderer,
  1188. "text/xml" : FeedRenderer,
  1189. "application/xml" : FeedRenderer,
  1190. "application/rss+xml" : FeedRenderer,
  1191. "application/atom+xml" : FeedRenderer,
  1192. "text/gopher": GopherRenderer,
  1193. "image/*": ImageRenderer
  1194. }
  1195. # Offpunk is organized as follow:
  1196. # - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
  1197. # - There’s only one GeminiClient. Each page is a GeminiItem (name is historical, as
  1198. # it could be non-gemini content)
  1199. # - A GeminiItem is created with an URL from which it will derives content.
  1200. # - Content include : a title, a body (raw source) and a renderer. The renderer will provide
  1201. # ANSI rendered version of the content and a list of links
  1202. # - Each GeminiItem generates a "cache_path" in which it maintains a cached version of its content.
  1203. class GeminiItem():
  1204. def __init__(self, url, name=""):
  1205. if "://" not in url and ("./" not in url and url[0] != "/"):
  1206. if not url.startswith("mailto:"):
  1207. url = "gemini://" + url
  1208. self.last_mode = None
  1209. findmode = url.split("##offpunk_mode=")
  1210. if len(findmode) > 1:
  1211. self.url = findmode[0]
  1212. if findmode[1] in ["full"] or findmode[1].isnumeric():
  1213. self.last_mode = findmode[1]
  1214. else:
  1215. self.url = url
  1216. self.url = fix_ipv6_url(self.url).strip()
  1217. self._cache_path = None
  1218. self.name = name
  1219. self.mime = None
  1220. self.renderer = None
  1221. self.body = None
  1222. parsed = urllib.parse.urlparse(self.url)
  1223. if url[0] == "/" or url.startswith("./"):
  1224. self.scheme = "file"
  1225. else:
  1226. self.scheme = parsed.scheme
  1227. if self.scheme in ["file","mailto","list"]:
  1228. self.local = True
  1229. self.host = ""
  1230. self.port = None
  1231. # file:// is 7 char
  1232. if self.url.startswith("file://"):
  1233. self.path = self.url[7:]
  1234. elif self.scheme == "mailto":
  1235. self.path = parsed.path
  1236. elif self.url.startswith("list://"):
  1237. listdir = os.path.join(_DATA_DIR,"lists")
  1238. listname = self.url[7:].lstrip("/")
  1239. if listname in [""]:
  1240. self.name = "My Lists"
  1241. self.path = listdir
  1242. else:
  1243. self.name = listname
  1244. self.path = os.path.join(listdir, "%s.gmi"%listname)
  1245. else:
  1246. self.path = self.url
  1247. else:
  1248. self.local = False
  1249. self.host = parsed.hostname
  1250. self.port = parsed.port or standard_ports.get(self.scheme, 0)
  1251. # special gopher selector case
  1252. if self.scheme == "gopher":
  1253. if parsed.path and parsed.path[0] == "/" and len(parsed.path) > 1:
  1254. splitted = parsed.path.split("/")
  1255. # We check if we have well a gopher type
  1256. if len(splitted[1]) == 1:
  1257. itemtype = parsed.path[1]
  1258. selector = parsed.path[2:]
  1259. else:
  1260. itemtype = "1"
  1261. selector = parsed.path
  1262. self.path = selector
  1263. else:
  1264. itemtype = "1"
  1265. self.path = parsed.path
  1266. if itemtype == "0":
  1267. self.mime = "text/gemini"
  1268. elif itemtype == "1":
  1269. self.mime = "text/gopher"
  1270. elif itemtype == "h":
  1271. self.mime = "text/html"
  1272. elif itemtype in ("9","g","I","s"):
  1273. self.mime = "binary"
  1274. else:
  1275. self.mime = "text/gopher"
  1276. else:
  1277. self.path = parsed.path
  1278. if parsed.query:
  1279. # we don’t add the query if path is too long because path above 260 char
  1280. # are not supported and crash python.
  1281. # Also, very long query are usually useless stuff
  1282. if len(self.path+parsed.query) < 258:
  1283. self.path += "/" + parsed.query
  1284. def get_cache_path(self):
  1285. if self._cache_path and not os.path.isdir(self._cache_path):
  1286. return self._cache_path
  1287. elif self.local:
  1288. self._cache_path = self.path
  1289. #if not local, we create a local cache path.
  1290. else:
  1291. self._cache_path = os.path.expanduser(_CACHE_PATH + self.scheme +\
  1292. "/" + self.host + self.path)
  1293. #There’s an OS limitation of 260 characters per path.
  1294. #We will thus cut the path enough to add the index afterward
  1295. self._cache_path = self._cache_path[:249]
  1296. # FIXME : this is a gross hack to give a name to
  1297. # index files. This will break if the index is not
  1298. # index.gmi. I don’t know how to know the real name
  1299. # of the file. But first, we need to ensure that the domain name
  1300. # finish by "/". Else, the cache will create a file, not a folder.
  1301. if self.scheme.startswith("http"):
  1302. index = "index.html"
  1303. elif self.scheme == "gopher":
  1304. index = "index.txt"
  1305. else:
  1306. index = "index.gmi"
  1307. if self.path == "" or os.path.isdir(self._cache_path):
  1308. if not self._cache_path.endswith("/"):
  1309. self._cache_path += "/"
  1310. if not self.url.endswith("/"):
  1311. self.url += "/"
  1312. if self._cache_path.endswith("/"):
  1313. self._cache_path += index
  1314. #sometimes, the index itself is a dir
  1315. #like when folder/index.gmi?param has been created
  1316. #and we try to access folder
  1317. if os.path.isdir(self._cache_path):
  1318. self._cache_path += "/" + index
  1319. return self._cache_path
  1320. def get_capsule_title(self):
  1321. #small intelligence to try to find a good name for a capsule
  1322. #we try to find eithe ~username or /users/username
  1323. #else we fallback to hostname
  1324. if self.local:
  1325. if self.name != "":
  1326. red_title = self.name
  1327. else:
  1328. red_title = self.path
  1329. else:
  1330. red_title = self.host
  1331. if "user" in self.path:
  1332. i = 0
  1333. splitted = self.path.split("/")
  1334. while i < (len(splitted)-1):
  1335. if splitted[i].startswith("user"):
  1336. red_title = splitted[i+1]
  1337. i += 1
  1338. if "~" in self.path:
  1339. for pp in self.path.split("/"):
  1340. if pp.startswith("~"):
  1341. red_title = pp[1:]
  1342. return red_title
  1343. def get_page_title(self):
  1344. title = ""
  1345. if not self.renderer:
  1346. self._set_renderer()
  1347. if self.renderer:
  1348. title = self.renderer.get_title()
  1349. if not title or len(title) == 0:
  1350. title = self.get_capsule_title()
  1351. else:
  1352. title += " (%s)" %self.get_capsule_title()
  1353. return title
  1354. def is_cache_valid(self,validity=0):
  1355. # Validity is the acceptable time for
  1356. # a cache to be valid (in seconds)
  1357. # If 0, then any cache is considered as valid
  1358. # (use validity = 1 if you want to refresh everything)
  1359. cache = self.get_cache_path()
  1360. if self.local:
  1361. return os.path.exists(cache)
  1362. elif cache :
  1363. # If path is too long, we always return True to avoid
  1364. # fetching it.
  1365. if len(cache) > 259:
  1366. print("We return False because path is too long")
  1367. return False
  1368. if os.path.exists(cache) and not os.path.isdir(cache):
  1369. if validity > 0 :
  1370. last_modification = self.cache_last_modified()
  1371. now = time.time()
  1372. age = now - last_modification
  1373. return age < validity
  1374. else:
  1375. return True
  1376. else:
  1377. #Cache has not been build
  1378. return False
  1379. else:
  1380. #There’s not even a cache!
  1381. return False
  1382. def cache_last_modified(self):
  1383. path = self.get_cache_path()
  1384. if path:
  1385. return os.path.getmtime(path)
  1386. elif self.local:
  1387. return 0
  1388. else:
  1389. print("ERROR : NO CACHE in cache_last_modified")
  1390. return None
  1391. def get_body(self,as_file=False):
  1392. if self.body and not as_file:
  1393. return self.body
  1394. if self.is_cache_valid():
  1395. path = self.get_cache_path()
  1396. else:
  1397. path = None
  1398. if path:
  1399. # There’s on OS limit on path length
  1400. if len(path) > 259:
  1401. toreturn = "Path is too long. This is an OS limitation.\n\n"
  1402. toreturn += self.url
  1403. return toreturn
  1404. elif as_file:
  1405. return path
  1406. else:
  1407. with open(path) as f:
  1408. body = f.read()
  1409. f.close()
  1410. return body
  1411. else:
  1412. #print("ERROR: NO CACHE for %s" %self._cache_path)
  1413. return None
  1414. def get_images(self,mode=None):
  1415. if not self.renderer:
  1416. self._set_renderer()
  1417. if self.renderer:
  1418. return self.renderer.get_images(mode=mode)
  1419. else:
  1420. return []
  1421. # This method is used to load once the list of links in a gi
  1422. # Links can be followed, after a space, by a description/title
  1423. def get_links(self,mode=None):
  1424. links = []
  1425. toreturn = []
  1426. if not self.renderer:
  1427. self._set_renderer()
  1428. if self.renderer:
  1429. if not mode:
  1430. mode = self.last_mode
  1431. links = self.renderer.get_links(mode=mode)
  1432. for l in links:
  1433. #split between link and potential name
  1434. splitted = l.split(maxsplit=1)
  1435. url = self.absolutise_url(splitted[0])
  1436. if looks_like_url(url):
  1437. if len(splitted) > 1:
  1438. #We add a name only for Gopher items
  1439. if url.startswith("gopher://"):
  1440. newgi = GeminiItem(url,name=splitted[1])
  1441. else:
  1442. newgi = GeminiItem(url)
  1443. else:
  1444. newgi = GeminiItem(url)
  1445. toreturn.append(newgi)
  1446. return toreturn
  1447. def get_link(self,nb):
  1448. # == None allows to return False, even if the list is empty
  1449. links = self.get_links()
  1450. if len(links) < nb:
  1451. print("Index too high! No link %s for %s" %(nb,self.url))
  1452. return None
  1453. else:
  1454. return links[nb-1]
  1455. def get_subscribe_links(self):
  1456. if not self.renderer:
  1457. self._set_renderer()
  1458. if self.renderer:
  1459. subs = self.renderer.get_subscribe_links()
  1460. abssubs = []
  1461. # some rss links are relatives
  1462. for s in subs:
  1463. s[0] = self.absolutise_url(s[0])
  1464. abssubs.append(s)
  1465. return abssubs
  1466. else:
  1467. return []
  1468. def _set_renderer(self,mime=None):
  1469. if self.local and os.path.isdir(self.get_cache_path()):
  1470. self.renderer = FolderRenderer("",self.get_cache_path())
  1471. return
  1472. if not mime:
  1473. mime = self.get_mime()
  1474. #we don’t even have a mime (so probably we don’t have a cache)
  1475. if not mime:
  1476. return
  1477. mime_to_use = []
  1478. for m in _FORMAT_RENDERERS:
  1479. if fnmatch.fnmatch(mime, m):
  1480. mime_to_use.append(m)
  1481. if len(mime_to_use) > 0:
  1482. current_mime = mime_to_use[0]
  1483. func = _FORMAT_RENDERERS[current_mime]
  1484. if current_mime.startswith("text"):
  1485. self.renderer = func(self.get_body(),self.url)
  1486. # We double check if the renderer is correct.
  1487. # If not, we fallback to html
  1488. # (this is currently only for XHTML, often being
  1489. # mislabelled as xml thus RSS feeds)
  1490. if not self.renderer.is_valid():
  1491. func = _FORMAT_RENDERERS["text/html"]
  1492. #print("Set (fallback)RENDERER to html instead of %s"%mime)
  1493. self.renderer = func(self.get_body(),self.url)
  1494. else:
  1495. #we don’t parse text, we give the file to the renderer
  1496. self.renderer = func(self.get_cache_path(),self.url)
  1497. if not self.renderer.is_valid():
  1498. self.renderer = None
  1499. def display(self,mode=None,grep=None):
  1500. if not self.renderer:
  1501. self._set_renderer()
  1502. if self.renderer and self.renderer.is_valid():
  1503. if not mode:
  1504. mode = self.last_mode
  1505. else:
  1506. self.last_mode = mode
  1507. title = self.get_capsule_title()
  1508. if self.is_cache_valid(): #and self.offline_only and not self.local:
  1509. nbr = len(self.get_links(mode=mode))
  1510. if self.local:
  1511. title += " (%s items)"%nbr
  1512. str_last = "local file"
  1513. else:
  1514. str_last = "last accessed on %s" %time.ctime(self.cache_last_modified())
  1515. title += " (%s links)"%nbr
  1516. return self.renderer.display(mode=mode,window_title=title,window_info=str_last,grep=grep)
  1517. else:
  1518. return False
  1519. else:
  1520. return False
  1521. def get_filename(self):
  1522. filename = os.path.basename(self.get_cache_path())
  1523. return filename
  1524. def get_temp_filename(self):
  1525. tmpf = None
  1526. if not self.renderer:
  1527. self._set_renderer()
  1528. if self.renderer and self.renderer.is_valid():
  1529. tmpf = self.renderer.get_temp_file()
  1530. if not tmpf:
  1531. tmpf = self.get_cache_path()
  1532. return tmpf
  1533. def write_body(self,body,mime=None):
  1534. ## body is a copy of the raw gemtext
  1535. ## Write_body() also create the cache !
  1536. # DEFAULT GEMINI MIME
  1537. self.body = body
  1538. if mime:
  1539. self.mime, mime_options = cgi.parse_header(mime)
  1540. if not self.local:
  1541. if self.mime and self.mime.startswith("text/"):
  1542. mode = "w"
  1543. else:
  1544. mode = "wb"
  1545. cache_dir = os.path.dirname(self.get_cache_path())
  1546. # If the subdirectory already exists as a file (not a folder)
  1547. # We remove it (happens when accessing URL/subfolder before
  1548. # URL/subfolder/file.gmi.
  1549. # This causes loss of data in the cache
  1550. # proper solution would be to save "sufolder" as "sufolder/index.gmi"
  1551. # If the subdirectory doesn’t exist, we recursively try to find one
  1552. # until it exists to avoid a file blocking the creation of folders
  1553. root_dir = cache_dir
  1554. while not os.path.exists(root_dir):
  1555. root_dir = os.path.dirname(root_dir)
  1556. if os.path.isfile(root_dir):
  1557. os.remove(root_dir)
  1558. os.makedirs(cache_dir,exist_ok=True)
  1559. with open(self.get_cache_path(), mode=mode) as f:
  1560. f.write(body)
  1561. f.close()
  1562. def get_mime(self):
  1563. #Beware, this one is really a shaddy ad-hoc function
  1564. if self.mime:
  1565. return self.mime
  1566. elif self.is_cache_valid():
  1567. path = self.get_cache_path()
  1568. if self.scheme == "mailto":
  1569. mime = "mailto"
  1570. elif os.path.isdir(path):
  1571. mime = "Local Folder"
  1572. elif path.endswith(".gmi"):
  1573. mime = "text/gemini"
  1574. elif shutil.which("file") :
  1575. mime = run("file -b --mime-type \"%s\""%path).strip()
  1576. mime2,encoding = mimetypes.guess_type(path,strict=False)
  1577. #If we hesitate between html and xml, takes the xml one
  1578. #because the FeedRendered fallback to HtmlRenderer
  1579. if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
  1580. mime = "text/xml"
  1581. #Some xml/html document are considered as octet-stream
  1582. if mime == "application/octet-stream":
  1583. mime = "text/xml"
  1584. else:
  1585. mime,encoding = mimetypes.guess_type(path,strict=False)
  1586. #gmi Mimetype is not recognized yet
  1587. if not mime and not shutil.which("file") :
  1588. print("Cannot guess the mime type of the file. Please install \"file\".")
  1589. print("(and send me an email, I’m curious of systems without \"file\" installed!")
  1590. if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
  1591. if mime2 and mime2 in _FORMAT_RENDERERS:
  1592. mime = mime2
  1593. else:
  1594. #by default, we consider it’s gemini except for html
  1595. mime = "text/gemini"
  1596. self.mime = mime
  1597. return self.mime
  1598. def set_error(self,err):
  1599. # If we get an error, we want to keep an existing cache
  1600. # but we need to touch it or to create an empty one
  1601. # to avoid hitting the error at each refresh
  1602. cache = self.get_cache_path()
  1603. if self.is_cache_valid():
  1604. os.utime(cache)
  1605. else:
  1606. cache_dir = os.path.dirname(cache)
  1607. root_dir = cache_dir
  1608. while not os.path.exists(root_dir):
  1609. root_dir = os.path.dirname(root_dir)
  1610. if os.path.isfile(root_dir):
  1611. os.remove(root_dir)
  1612. os.makedirs(cache_dir,exist_ok=True)
  1613. if os.path.isdir(cache_dir):
  1614. with open(cache, "w") as cache:
  1615. cache.write(str(datetime.datetime.now())+"\n")
  1616. cache.write("ERROR while caching %s\n\n" %self.url)
  1617. cache.write("*****\n\n")
  1618. cache.write(str(type(err)) + " = " + str(err))
  1619. #cache.write("\n" + str(err.with_traceback(None)))
  1620. cache.write("\n*****\n\n")
  1621. cache.write("If you believe this error was temporary, type ""reload"".\n")
  1622. cache.write("The ressource will be tentatively fetched during next sync.\n")
  1623. cache.close()
  1624. def root(self):
  1625. return GeminiItem(self._derive_url("/"))
  1626. def up(self,level=1):
  1627. path = self.path.rstrip('/')
  1628. count = 0
  1629. while count < level:
  1630. pathbits = list(os.path.split(path))
  1631. # Don't try to go higher than root or in config
  1632. if self.local or len(pathbits) == 1 :
  1633. return self
  1634. # Get rid of bottom component
  1635. if len(pathbits) > 1:
  1636. pathbits.pop()
  1637. path = os.path.join(*pathbits)
  1638. count += 1
  1639. if self.scheme == "gopher":
  1640. path = "/1" + path
  1641. return GeminiItem(self._derive_url(path))
  1642. def query(self, query):
  1643. query = urllib.parse.quote(query)
  1644. return GeminiItem(self._derive_url(query=query))
  1645. def _derive_url(self, path="", query=""):
  1646. """
  1647. A thin wrapper around urlunparse which avoids inserting standard ports
  1648. into URLs just to keep things clean.
  1649. """
  1650. if not self.port or self.port == standard_ports[self.scheme] :
  1651. host = self.host
  1652. else:
  1653. host = self.host + ":" + str(self.port)
  1654. return urllib.parse.urlunparse((self.scheme,host,path or self.path, "", query, ""))
  1655. def absolutise_url(self, relative_url):
  1656. """
  1657. Convert a relative URL to an absolute URL by using the URL of this
  1658. GeminiItem as a base.
  1659. """
  1660. abs_url = urllib.parse.urljoin(self.url, relative_url)
  1661. return abs_url
  1662. def url_mode(self):
  1663. url = self.url
  1664. if self.last_mode and self.last_mode != "readable":
  1665. url += "##offpunk_mode=" + self.last_mode
  1666. return url
  1667. def to_map_line(self):
  1668. return "=> {} {}\n".format(self.url_mode(), self.get_page_title())
  1669. CRLF = '\r\n'
  1670. # Cheap and cheerful URL detector
  1671. def looks_like_url(word):
  1672. try:
  1673. if not word.strip():
  1674. return False
  1675. url = fix_ipv6_url(word).strip()
  1676. parsed = urllib.parse.urlparse(url)
  1677. #sometimes, urllib crashed only when requesting the port
  1678. port = parsed.port
  1679. mailto = word.startswith("mailto:")
  1680. scheme = word.split("://")[0]
  1681. start = scheme in standard_ports
  1682. local = scheme in ["file","list"]
  1683. if not start and not local and not mailto:
  1684. return looks_like_url("gemini://"+word)
  1685. elif mailto:
  1686. return "@" in word
  1687. elif not local:
  1688. return "." in word
  1689. else:
  1690. return "/" in word
  1691. except ValueError:
  1692. return False
  1693. class UserAbortException(Exception):
  1694. pass
  1695. # GeminiClient Decorators
  1696. def needs_gi(inner):
  1697. def outer(self, *args, **kwargs):
  1698. if not self.gi:
  1699. print("You need to 'go' somewhere, first")
  1700. return None
  1701. else:
  1702. return inner(self, *args, **kwargs)
  1703. outer.__doc__ = inner.__doc__
  1704. return outer
  1705. def restricted(inner):
  1706. def outer(self, *args, **kwargs):
  1707. if self.restricted:
  1708. print("Sorry, this command is not available in restricted mode!")
  1709. return None
  1710. else:
  1711. return inner(self, *args, **kwargs)
  1712. outer.__doc__ = inner.__doc__
  1713. return outer
  1714. class GeminiClient(cmd.Cmd):
  1715. def __init__(self, completekey="tab", restricted=False, synconly=False):
  1716. cmd.Cmd.__init__(self)
  1717. # Set umask so that nothing we create can be read by anybody else.
  1718. # The certificate cache and TOFU database contain "browser history"
  1719. # type sensitivie information.
  1720. os.umask(0o077)
  1721. self.no_cert_prompt = "\x1b[38;5;76m" + "ON" + "\x1b[38;5;255m" + "> " + "\x1b[0m"
  1722. self.cert_prompt = "\x1b[38;5;202m" + "ON" + "\x1b[38;5;255m"
  1723. self.offline_prompt = "\x1b[38;5;76m" + "OFF" + "\x1b[38;5;255m" + "> " + "\x1b[0m"
  1724. self.prompt = self.no_cert_prompt
  1725. self.gi = None
  1726. self.hist_index = 0
  1727. self.index = []
  1728. self.index_index = -1
  1729. self.lookup = self.index
  1730. self.marks = {}
  1731. self.page_index = 0
  1732. self.permanent_redirects = {}
  1733. self.previous_redirectors = set()
  1734. # Sync-only mode is restriced by design
  1735. self.restricted = restricted or synconly
  1736. self.visited_hosts = set()
  1737. self.offline_only = False
  1738. self.sync_only = False
  1739. self.support_http = _DO_HTTP
  1740. self.automatic_choice = "n"
  1741. self.client_certs = {
  1742. "active": None
  1743. }
  1744. self.active_cert_domains = []
  1745. self.active_is_transient = False
  1746. self.transient_certs_created = []
  1747. self.options = {
  1748. "debug" : False,
  1749. "beta" : False,
  1750. "ipv6" : True,
  1751. "timeout" : 600,
  1752. "short_timeout" : 5,
  1753. "width" : 72,
  1754. "auto_follow_redirects" : True,
  1755. "tls_mode" : "tofu",
  1756. "archives_size" : 200,
  1757. "history_size" : 200,
  1758. "max_size_download" : 10,
  1759. "editor" : None,
  1760. "download_images_first" : True,
  1761. "redirects" : True,
  1762. }
  1763. self.redirects = {
  1764. "twitter.com" : "nitter.42l.fr",
  1765. "facebook.com" : "blocked",
  1766. "google-analytics.com" : "blocked",
  1767. "youtube.com" : "yewtu.be",
  1768. "reddit.com" : "libredd.it",
  1769. "old.reddit.com": "libredd.it",
  1770. "medium.com" : "scribe.rip",
  1771. }
  1772. global TERM_WIDTH
  1773. TERM_WIDTH = self.options["width"]
  1774. self.log = {
  1775. "start_time": time.time(),
  1776. "requests": 0,
  1777. "ipv4_requests": 0,
  1778. "ipv6_requests": 0,
  1779. "bytes_recvd": 0,
  1780. "ipv4_bytes_recvd": 0,
  1781. "ipv6_bytes_recvd": 0,
  1782. "dns_failures": 0,
  1783. "refused_connections": 0,
  1784. "reset_connections": 0,
  1785. "timeouts": 0,
  1786. "cache_hits": 0,
  1787. }
  1788. self._connect_to_tofu_db()
  1789. def _connect_to_tofu_db(self):
  1790. db_path = os.path.join(_CONFIG_DIR, "tofu.db")
  1791. self.db_conn = sqlite3.connect(db_path)
  1792. self.db_cur = self.db_conn.cursor()
  1793. self.db_cur.execute("""CREATE TABLE IF NOT EXISTS cert_cache
  1794. (hostname text, address text, fingerprint text,
  1795. first_seen date, last_seen date, count integer)""")
  1796. def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,\
  1797. mode=None,limit_size=False):
  1798. """This method might be considered "the heart of Offpunk".
  1799. Everything involved in fetching a gemini resource happens here:
  1800. sending the request over the network, parsing the response,
  1801. storing the response in a temporary file, choosing
  1802. and calling a handler program, and updating the history.
  1803. Nothing is returned."""
  1804. if not gi:
  1805. return
  1806. # Don't try to speak to servers running other protocols
  1807. elif gi.scheme == "mailto":
  1808. if handle and not self.sync_only:
  1809. resp = input("Send an email to %s Y/N? " %gi.path)
  1810. self.gi = gi
  1811. if resp.strip().lower() in ("y", "yes"):
  1812. if _HAS_XDGOPEN :
  1813. cmd = "xdg-open \"mailto:%s\"" %gi.path
  1814. run(cmd,direct_output=True)
  1815. else:
  1816. print("Cannot find a mail client to send mail to %s" %gi.path)
  1817. print("Please install xdg-open (usually from xdg-util package)")
  1818. return
  1819. elif gi.scheme not in ["file","list"] and gi.scheme not in standard_ports \
  1820. and not self.sync_only:
  1821. print("Sorry, no support for {} links.".format(gi.scheme))
  1822. return
  1823. # Obey permanent redirects
  1824. if gi.url in self.permanent_redirects:
  1825. new_gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name)
  1826. self._go_to_gi(new_gi)
  1827. return
  1828. # Use cache or mark as to_fetch if resource is not cached
  1829. # Why is this code useful ? It set the mimetype !
  1830. if self.offline_only:
  1831. if not gi.is_cache_valid():
  1832. self.get_list("to_fetch")
  1833. r = self.list_add_line("to_fetch",gi=gi,verbose=False)
  1834. if r:
  1835. print("%s not available, marked for syncing"%gi.url)
  1836. else:
  1837. print("%s already marked for syncing"%gi.url)
  1838. return
  1839. # check if local file exists.
  1840. if gi.local and not os.path.exists(gi.path):
  1841. print("Local file %s does not exist!" %gi.path)
  1842. return
  1843. elif not self.offline_only and not gi.local:
  1844. try:
  1845. if gi.scheme in ("http", "https"):
  1846. if self.support_http:
  1847. if limit_size:
  1848. # Let’s cap automatic downloads to 20Mo
  1849. max_download = int(self.options["max_size_download"])*1000000
  1850. else:
  1851. max_download = None
  1852. gi = self._fetch_http(gi,max_length=max_download)
  1853. elif handle and not self.sync_only:
  1854. if not _DO_HTTP:
  1855. print("Install python3-requests to handle http requests natively")
  1856. webbrowser.open_new_tab(gi.url)
  1857. return
  1858. else:
  1859. return
  1860. elif gi.scheme in ("gopher"):
  1861. gi = self._fetch_gopher(gi,timeout=self.options["short_timeout"])
  1862. elif gi.scheme in ("spartan"):
  1863. gi = self._fetch_spartan(gi)
  1864. else:
  1865. gi = self._fetch_over_network(gi)
  1866. except UserAbortException:
  1867. return
  1868. except Exception as err:
  1869. gi.set_error(err)
  1870. # Print an error message
  1871. # we fail silently when sync_only
  1872. print_error = not self.sync_only
  1873. if isinstance(err, socket.gaierror):
  1874. self.log["dns_failures"] += 1
  1875. if print_error:
  1876. print("ERROR: DNS error!")
  1877. elif isinstance(err, ConnectionRefusedError):
  1878. self.log["refused_connections"] += 1
  1879. if print_error:
  1880. print("ERROR1: Connection refused!")
  1881. elif isinstance(err, ConnectionResetError):
  1882. self.log["reset_connections"] += 1
  1883. if print_error:
  1884. print("ERROR2: Connection reset!")
  1885. elif isinstance(err, (TimeoutError, socket.timeout)):
  1886. self.log["timeouts"] += 1
  1887. if print_error:
  1888. print("""ERROR3: Connection timed out!
  1889. Slow internet connection? Use 'set timeout' to be more patient.""")
  1890. elif isinstance(err, FileExistsError):
  1891. print("""ERROR5: Trying to create a directory which already exists
  1892. in the cache : """)
  1893. print(err)
  1894. else:
  1895. if print_error:
  1896. print("ERROR4: " + str(type(err)) + " : " + str(err))
  1897. print("\n" + str(err.with_traceback(None)))
  1898. return
  1899. # Pass file to handler, unless we were asked not to
  1900. if gi :
  1901. display = handle and not self.sync_only
  1902. if display and _RENDER_IMAGE and self.options["download_images_first"] \
  1903. and not self.offline_only:
  1904. # We download images first
  1905. for image in gi.get_images(mode=mode):
  1906. if image and image.startswith("http"):
  1907. img_gi = GeminiItem(image)
  1908. if not img_gi.is_cache_valid():
  1909. width = term_width() - 1
  1910. toprint = "Downloading %s" %image
  1911. toprint = toprint[:width]
  1912. toprint += " "*(width-len(toprint))
  1913. print(toprint,end="\r")
  1914. self._go_to_gi(img_gi, update_hist=False, check_cache=True, \
  1915. handle=False,limit_size=True)
  1916. if display and gi.display(mode=mode):
  1917. self.index = gi.get_links()
  1918. self.lookup = self.index
  1919. self.page_index = 0
  1920. self.index_index = -1
  1921. # Update state (external files are not added to history)
  1922. self.gi = gi
  1923. if update_hist and not self.sync_only:
  1924. self._update_history(gi)
  1925. elif display :
  1926. cmd_str = self._get_handler_cmd(gi.get_mime())
  1927. try:
  1928. # get tmpfile from gi !
  1929. tmpfile = "\"%s\""%gi.get_body(as_file=True)
  1930. run(cmd_str%tmpfile,direct_output=True)
  1931. except FileNotFoundError:
  1932. print("Handler program %s not found!" % shlex.split(cmd_str)[0])
  1933. print("You can use the ! command to specify another handler program or pipeline.")
  1934. def _fetch_http(self,gi,max_length=None):
  1935. def set_error(item,length,max_length):
  1936. err = "Size of %s is %s Mo\n"%(item.url,length)
  1937. err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000)
  1938. err += "To retrieve this content anyway, type 'reload'."
  1939. item.set_error(err)
  1940. return item
  1941. header = {}
  1942. header["User-Agent"] = "Offpunk browser v%s"%_VERSION
  1943. parsed = urllib.parse.urlparse(gi.url)
  1944. # Code to translate URLs to better frontends (think twitter.com -> nitter)
  1945. if self.options["redirects"]:
  1946. netloc = parsed.netloc
  1947. if netloc.startswith("www."):
  1948. netloc = netloc[4:]
  1949. if netloc in self.redirects:
  1950. if self.redirects[netloc] == "blocked":
  1951. text = "This website has been blocked.\n"
  1952. text += "Use the redirect command to unblock it."
  1953. gi.write_body(text,"text/gemini")
  1954. return gi
  1955. else:
  1956. parsed = parsed._replace(netloc = self.redirects[netloc])
  1957. url = urllib.parse.urlunparse(parsed)
  1958. with requests.get(url,headers=header, stream=True,timeout=5) as response:
  1959. #print("This is header for %s"%gi.url)
  1960. #print(response.headers)
  1961. if "content-type" in response.headers:
  1962. mime = response.headers['content-type']
  1963. else:
  1964. mime = None
  1965. if "content-length" in response.headers:
  1966. length = int(response.headers['content-length'])
  1967. else:
  1968. length = 0
  1969. if max_length and length > max_length:
  1970. response.close()
  1971. return set_error(gi,str(length/1000000),max_length)
  1972. elif max_length and length == 0:
  1973. body = b''
  1974. for r in response.iter_content():
  1975. body += r
  1976. #We divide max_size for streamed content
  1977. #in order to catch them faster
  1978. if sys.getsizeof(body) > max_length/2:
  1979. response.close()
  1980. return set_error(gi,"streaming",max_length)
  1981. response.close()
  1982. else:
  1983. body = response.content
  1984. response.close()
  1985. if mime and "text/" in mime:
  1986. body = body.decode("UTF-8","replace")
  1987. gi.write_body(body,mime)
  1988. return gi
  1989. def _fetch_gopher(self,gi,timeout=10):
  1990. if not looks_like_url(gi.url):
  1991. print("%s is not a valide url" %gi.url)
  1992. parsed =urllib.parse.urlparse(gi.url)
  1993. host = parsed.hostname
  1994. port = parsed.port or 70
  1995. if parsed.path and parsed.path[0] == "/" and len(parsed.path) > 1:
  1996. splitted = parsed.path.split("/")
  1997. # We check if we have well a gopher type
  1998. if len(splitted[1]) == 1:
  1999. itemtype = parsed.path[1]
  2000. selector = parsed.path[2:]
  2001. else:
  2002. itemtype = "1"
  2003. selector = parsed.path
  2004. else:
  2005. itemtype = "1"
  2006. selector = parsed.path
  2007. addresses = socket.getaddrinfo(host, port, family=0,type=socket.SOCK_STREAM)
  2008. s = socket.create_connection((host,port))
  2009. for address in addresses:
  2010. self._debug("Connecting to: " + str(address[4]))
  2011. s = socket.socket(address[0], address[1])
  2012. s.settimeout(timeout)
  2013. try:
  2014. s.connect(address[4])
  2015. break
  2016. except OSError as e:
  2017. err = e
  2018. else:
  2019. # If we couldn't connect to *any* of the addresses, just
  2020. # bubble up the exception from the last attempt and deny
  2021. # knowledge of earlier failures.
  2022. raise err
  2023. if parsed.query:
  2024. request = selector + "\t" + parsed.query
  2025. else:
  2026. request = selector
  2027. request += "\r\n"
  2028. s.sendall(request.encode("UTF-8"))
  2029. response = s.makefile("rb").read()
  2030. # Transcode response into UTF-8
  2031. #if itemtype in ("0","1","h"):
  2032. if not itemtype in ("9","g","I","s"):
  2033. # Try most common encodings
  2034. for encoding in ("UTF-8", "ISO-8859-1"):
  2035. try:
  2036. response = response.decode("UTF-8")
  2037. break
  2038. except UnicodeDecodeError:
  2039. pass
  2040. else:
  2041. # try to find encoding
  2042. #if _HAS_CHARDET:
  2043. detected = chardet.detect(response)
  2044. response = response.decode(detected["encoding"])
  2045. #else:
  2046. #raise UnicodeDecodeError
  2047. if itemtype == "0":
  2048. mime = "text/gemini"
  2049. elif itemtype == "1":
  2050. mime = "text/gopher"
  2051. elif itemtype == "h":
  2052. mime = "text/html"
  2053. elif itemtype in ("9","g","I","s"):
  2054. mime = None
  2055. else:
  2056. # by default, we should consider Gopher
  2057. mime = "text/gopher"
  2058. gi.write_body(response,mime)
  2059. return gi
  2060. # Copied from reference spartan client by Michael Lazar
  2061. def _fetch_spartan(self,gi):
  2062. url_parts = urllib.parse.urlparse(gi.url)
  2063. host = url_parts.hostname
  2064. port = url_parts.port or 300
  2065. path = url_parts.path or "/"
  2066. query = url_parts.query
  2067. redirect_url = None
  2068. with socket.create_connection((host,port)) as sock:
  2069. if query:
  2070. data = urllib.parse.unquote_to_bytes(query)
  2071. else:
  2072. data = b""
  2073. encoded_host = host.encode("idna")
  2074. ascii_path = urllib.parse.unquote_to_bytes(path)
  2075. encoded_path = urllib.parse.quote_from_bytes(ascii_path).encode("ascii")
  2076. sock.send(b"%s %s %d\r\n" % (encoded_host,encoded_path,len(data)))
  2077. fp = sock.makefile("rb")
  2078. response = fp.readline(4096).decode("ascii").strip("\r\n")
  2079. parts = response.split(" ",maxsplit=1)
  2080. code,meta = int(parts[0]),parts[1]
  2081. if code == 2:
  2082. body = fp.read()
  2083. if meta.startswith("text"):
  2084. body = body.decode("UTF-8")
  2085. gi.write_body(body,meta)
  2086. elif code == 3:
  2087. redirect_url = url_parts._replace(path=meta).geturl()
  2088. else:
  2089. gi.set_error("Spartan code %s: Error %s"%(code,meta))
  2090. if redirect_url:
  2091. gi = GeminiItem(redirect_url)
  2092. self._fetch_spartan(gi)
  2093. return gi
  2094. # fetch_over_network will modify with gi.write_body(body,mime)
  2095. # before returning the gi
  2096. def _fetch_over_network(self, gi):
  2097. # Be careful with client certificates!
  2098. # Are we crossing a domain boundary?
  2099. if self.active_cert_domains and gi.host not in self.active_cert_domains:
  2100. if self.active_is_transient:
  2101. print("Permanently delete currently active transient certificate?")
  2102. resp = input("Y/N? ")
  2103. if resp.strip().lower() in ("y", "yes"):
  2104. print("Destroying certificate.")
  2105. self._deactivate_client_cert()
  2106. else:
  2107. print("Staying here.")
  2108. raise UserAbortException()
  2109. else:
  2110. print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?")
  2111. resp = input("Y/N? ")
  2112. if resp.strip().lower() in ("n", "no"):
  2113. print("Keeping certificate active for {}".format(gi.host))
  2114. else:
  2115. print("Deactivating certificate.")
  2116. self._deactivate_client_cert()
  2117. # Suggest reactivating previous certs
  2118. if not self.client_certs["active"] and gi.host in self.client_certs:
  2119. print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host))
  2120. resp = input("Y/N? ")
  2121. if resp.strip().lower() in ("y", "yes"):
  2122. self._activate_client_cert(*self.client_certs[gi.host])
  2123. else:
  2124. print("Remaining unidentified.")
  2125. self.client_certs.pop(gi.host)
  2126. # Is this a local file?
  2127. if gi.local:
  2128. address, f = None, open(gi.path, "rb")
  2129. else:
  2130. address, f = self._send_request(gi)
  2131. # Spec dictates <META> should not exceed 1024 bytes,
  2132. # so maximum valid header length is 1027 bytes.
  2133. header = f.readline(1027)
  2134. header = header.decode("UTF-8")
  2135. if not header or header[-1] != '\n':
  2136. raise RuntimeError("Received invalid header from server!")
  2137. header = header.strip()
  2138. self._debug("Response header: %s." % header)
  2139. # Validate header
  2140. status, meta = header.split(maxsplit=1)
  2141. if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
  2142. f.close()
  2143. raise RuntimeError("Received invalid header from server!")
  2144. # Update redirect loop/maze escaping state
  2145. if not status.startswith("3"):
  2146. self.previous_redirectors = set()
  2147. # Handle non-SUCCESS headers, which don't have a response body
  2148. # Inputs
  2149. if status.startswith("1"):
  2150. if self.sync_only:
  2151. return None
  2152. else:
  2153. print(meta)
  2154. if status == "11":
  2155. user_input = getpass.getpass("> ")
  2156. else:
  2157. user_input = input("> ")
  2158. return self._fetch_over_network(gi.query(user_input))
  2159. # Redirects
  2160. elif status.startswith("3"):
  2161. new_gi = GeminiItem(gi.absolutise_url(meta))
  2162. if new_gi.url == gi.url:
  2163. raise RuntimeError("URL redirects to itself!")
  2164. elif new_gi.url in self.previous_redirectors:
  2165. raise RuntimeError("Caught in redirect loop!")
  2166. elif len(self.previous_redirectors) == _MAX_REDIRECTS:
  2167. raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
  2168. elif self.sync_only:
  2169. follow = self.automatic_choice
  2170. # Never follow cross-domain redirects without asking
  2171. elif new_gi.host != gi.host:
  2172. follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
  2173. # Never follow cross-protocol redirects without asking
  2174. elif new_gi.scheme != gi.scheme:
  2175. follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
  2176. # Don't follow *any* redirect without asking if auto-follow is off
  2177. elif not self.options["auto_follow_redirects"]:
  2178. follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
  2179. # Otherwise, follow away
  2180. else:
  2181. follow = "yes"
  2182. if follow.strip().lower() not in ("y", "yes"):
  2183. raise UserAbortException()
  2184. self._debug("Following redirect to %s." % new_gi.url)
  2185. self._debug("This is consecutive redirect number %d." % len(self.previous_redirectors))
  2186. self.previous_redirectors.add(gi.url)
  2187. if status == "31":
  2188. # Permanent redirect
  2189. self.permanent_redirects[gi.url] = new_gi.url
  2190. return self._fetch_over_network(new_gi)
  2191. # Errors
  2192. elif status.startswith("4") or status.startswith("5"):
  2193. raise RuntimeError(meta)
  2194. # Client cert
  2195. elif status.startswith("6"):
  2196. self._handle_cert_request(meta)
  2197. return self._fetch_over_network(gi)
  2198. # Invalid status
  2199. elif not status.startswith("2"):
  2200. raise RuntimeError("Server returned undefined status code %s!" % status)
  2201. # If we're here, this must be a success and there's a response body
  2202. assert status.startswith("2")
  2203. mime = meta
  2204. # Read the response body over the network
  2205. fbody = f.read()
  2206. # DEFAULT GEMINI MIME
  2207. if mime == "":
  2208. mime = "text/gemini; charset=utf-8"
  2209. shortmime, mime_options = cgi.parse_header(mime)
  2210. if "charset" in mime_options:
  2211. try:
  2212. codecs.lookup(mime_options["charset"])
  2213. except LookupError:
  2214. raise RuntimeError("Header declared unknown encoding %s" % value)
  2215. if shortmime.startswith("text/"):
  2216. #Get the charset and default to UTF-8 in none
  2217. encoding = mime_options.get("charset", "UTF-8")
  2218. try:
  2219. body = fbody.decode(encoding)
  2220. except UnicodeError:
  2221. raise RuntimeError("Could not decode response body using %s\
  2222. encoding declared in header!" % encoding)
  2223. else:
  2224. body = fbody
  2225. gi.write_body(body,mime)
  2226. return gi
  2227. def _send_request(self, gi):
  2228. """Send a selector to a given host and port.
  2229. Returns the resolved address and binary file with the reply."""
  2230. host, port = gi.host, gi.port
  2231. # Do DNS resolution
  2232. addresses = self._get_addresses(host, port)
  2233. # Prepare TLS context
  2234. protocol = ssl.PROTOCOL_TLS_CLIENT if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2
  2235. context = ssl.SSLContext(protocol)
  2236. # Use CAs or TOFU
  2237. if self.options["tls_mode"] == "ca":
  2238. context.verify_mode = ssl.CERT_REQUIRED
  2239. context.check_hostname = True
  2240. context.load_default_certs()
  2241. else:
  2242. context.check_hostname = False
  2243. context.verify_mode = ssl.CERT_NONE
  2244. # Impose minimum TLS version
  2245. ## In 3.7 and above, this is easy...
  2246. if sys.version_info.minor >= 7:
  2247. context.minimum_version = ssl.TLSVersion.TLSv1_2
  2248. ## Otherwise, it seems very hard...
  2249. ## The below is less strict than it ought to be, but trying to disable
  2250. ## TLS v1.1 here using ssl.OP_NO_TLSv1_1 produces unexpected failures
  2251. ## with recent versions of OpenSSL. What a mess...
  2252. else:
  2253. context.options |= ssl.OP_NO_SSLv3
  2254. context.options |= ssl.OP_NO_SSLv2
  2255. # Try to enforce sensible ciphers
  2256. try:
  2257. context.set_ciphers("AESGCM+ECDHE:AESGCM+DHE:CHACHA20+ECDHE:CHACHA20+DHE:!DSS:!SHA1:!MD5:@STRENGTH")
  2258. except ssl.SSLError:
  2259. # Rely on the server to only support sensible things, I guess...
  2260. pass
  2261. # Load client certificate if needed
  2262. if self.client_certs["active"]:
  2263. certfile, keyfile = self.client_certs["active"]
  2264. context.load_cert_chain(certfile, keyfile)
  2265. # Connect to remote host by any address possible
  2266. err = None
  2267. for address in addresses:
  2268. self._debug("Connecting to: " + str(address[4]))
  2269. s = socket.socket(address[0], address[1])
  2270. if self.sync_only:
  2271. timeout = self.options["short_timeout"]
  2272. else:
  2273. timeout = self.options["timeout"]
  2274. s.settimeout(timeout)
  2275. s = context.wrap_socket(s, server_hostname = gi.host)
  2276. try:
  2277. s.connect(address[4])
  2278. break
  2279. except OSError as e:
  2280. err = e
  2281. else:
  2282. # If we couldn't connect to *any* of the addresses, just
  2283. # bubble up the exception from the last attempt and deny
  2284. # knowledge of earlier failures.
  2285. raise err
  2286. if sys.version_info.minor >=5:
  2287. self._debug("Established {} connection.".format(s.version()))
  2288. self._debug("Cipher is: {}.".format(s.cipher()))
  2289. # Do TOFU
  2290. if self.options["tls_mode"] != "ca":
  2291. cert = s.getpeercert(binary_form=True)
  2292. self._validate_cert(address[4][0], host, cert)
  2293. # Remember that we showed the current cert to this domain...
  2294. if self.client_certs["active"]:
  2295. self.active_cert_domains.append(gi.host)
  2296. self.client_certs[gi.host] = self.client_certs["active"]
  2297. # Send request and wrap response in a file descriptor
  2298. self._debug("Sending %s<CRLF>" % gi.url)
  2299. s.sendall((gi.url + CRLF).encode("UTF-8"))
  2300. mf= s.makefile(mode = "rb")
  2301. return address, mf
  2302. def _get_addresses(self, host, port):
  2303. # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled
  2304. if ":" in host:
  2305. # This is likely a literal IPv6 address, so we can *only* ask for
  2306. # IPv6 addresses or getaddrinfo will complain
  2307. family_mask = socket.AF_INET6
  2308. elif socket.has_ipv6 and self.options["ipv6"]:
  2309. # Accept either IPv4 or IPv6 addresses
  2310. family_mask = 0
  2311. else:
  2312. # IPv4 only
  2313. family_mask = socket.AF_INET
  2314. addresses = socket.getaddrinfo(host, port, family=family_mask,
  2315. type=socket.SOCK_STREAM)
  2316. # Sort addresses so IPv6 ones come first
  2317. addresses.sort(key=lambda add: add[0] == socket.AF_INET6, reverse=True)
  2318. return addresses
  2319. def _handle_cert_request(self, meta):
  2320. # Don't do client cert stuff in restricted mode, as in principle
  2321. # it could be used to fill up the disk by creating a whole lot of
  2322. # certificates
  2323. if self.restricted:
  2324. print("The server is requesting a client certificate.")
  2325. print("These are not supported in restricted mode, sorry.")
  2326. raise UserAbortException()
  2327. print("SERVER SAYS: ", meta)
  2328. # Present different messages for different 6x statuses, but
  2329. # handle them the same.
  2330. if status in ("64", "65"):
  2331. print("The server rejected your certificate because it is either expired or not yet valid.")
  2332. elif status == "63":
  2333. print("The server did not accept your certificate.")
  2334. print("You may need to e.g. coordinate with the admin to get your certificate fingerprint whitelisted.")
  2335. else:
  2336. print("The site {} is requesting a client certificate.".format(gi.host))
  2337. print("This will allow the site to recognise you across requests.")
  2338. # Give the user choices
  2339. print("What do you want to do?")
  2340. print("1. Give up.")
  2341. print("2. Generate a new transient certificate.")
  2342. print("3. Generate a new persistent certificate.")
  2343. print("4. Load a previously generated persistent.")
  2344. print("5. Load certificate from an external file.")
  2345. if self.sync_only:
  2346. choice = 1
  2347. else:
  2348. choice = input("> ").strip()
  2349. if choice == "2":
  2350. self._generate_transient_cert_cert()
  2351. elif choice == "3":
  2352. self._generate_persistent_client_cert()
  2353. elif choice == "4":
  2354. self._choose_client_cert()
  2355. elif choice == "5":
  2356. self._load_client_cert()
  2357. else:
  2358. print("Giving up.")
  2359. raise UserAbortException()
  2360. def _validate_cert(self, address, host, cert):
  2361. """
  2362. Validate a TLS certificate in TOFU mode.
  2363. If the cryptography module is installed:
  2364. - Check the certificate Common Name or SAN matches `host`
  2365. - Check the certificate's not valid before date is in the past
  2366. - Check the certificate's not valid after date is in the future
  2367. Whether the cryptography module is installed or not, check the
  2368. certificate's fingerprint against the TOFU database to see if we've
  2369. previously encountered a different certificate for this IP address and
  2370. hostname.
  2371. """
  2372. now = datetime.datetime.utcnow()
  2373. if _HAS_CRYPTOGRAPHY:
  2374. # Using the cryptography module we can get detailed access
  2375. # to the properties of even self-signed certs, unlike in
  2376. # the standard ssl library...
  2377. c = x509.load_der_x509_certificate(cert, _BACKEND)
  2378. # Check certificate validity dates
  2379. if c.not_valid_before >= now:
  2380. raise CertificateError("Certificate not valid until: {}!".format(c.not_valid_before))
  2381. elif c.not_valid_after <= now:
  2382. raise CertificateError("Certificate expired as of: {})!".format(c.not_valid_after))
  2383. # Check certificate hostnames
  2384. names = []
  2385. common_name = c.subject.get_attributes_for_oid(x509.oid.NameOID.COMMON_NAME)
  2386. if common_name:
  2387. names.append(common_name[0].value)
  2388. try:
  2389. names.extend([alt.value for alt in c.extensions.get_extension_for_oid(x509.oid.ExtensionOID.SUBJECT_ALTERNATIVE_NAME).value])
  2390. except x509.ExtensionNotFound:
  2391. pass
  2392. names = set(names)
  2393. for name in names:
  2394. try:
  2395. ssl._dnsname_match(name, host)
  2396. break
  2397. except CertificateError:
  2398. continue
  2399. else:
  2400. # If we didn't break out, none of the names were valid
  2401. raise CertificateError("Hostname does not match certificate common name or any alternative names.")
  2402. sha = hashlib.sha256()
  2403. sha.update(cert)
  2404. fingerprint = sha.hexdigest()
  2405. # Have we been here before?
  2406. self.db_cur.execute("""SELECT fingerprint, first_seen, last_seen, count
  2407. FROM cert_cache
  2408. WHERE hostname=? AND address=?""", (host, address))
  2409. cached_certs = self.db_cur.fetchall()
  2410. # If so, check for a match
  2411. if cached_certs:
  2412. max_count = 0
  2413. most_frequent_cert = None
  2414. for cached_fingerprint, first, last, count in cached_certs:
  2415. if count > max_count:
  2416. max_count = count
  2417. most_frequent_cert = cached_fingerprint
  2418. if fingerprint == cached_fingerprint:
  2419. # Matched!
  2420. self._debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint))
  2421. self.db_cur.execute("""UPDATE cert_cache
  2422. SET last_seen=?, count=?
  2423. WHERE hostname=? AND address=? AND fingerprint=?""",
  2424. (now, count+1, host, address, fingerprint))
  2425. self.db_conn.commit()
  2426. break
  2427. else:
  2428. if _HAS_CRYPTOGRAPHY:
  2429. # Load the most frequently seen certificate to see if it has
  2430. # expired
  2431. certdir = os.path.join(_CONFIG_DIR, "cert_cache")
  2432. with open(os.path.join(certdir, most_frequent_cert+".crt"), "rb") as fp:
  2433. previous_cert = fp.read()
  2434. previous_cert = x509.load_der_x509_certificate(previous_cert, _BACKEND)
  2435. previous_ttl = previous_cert.not_valid_after - now
  2436. print(previous_ttl)
  2437. self._debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint))
  2438. print("****************************************")
  2439. print("[SECURITY WARNING] Unrecognised certificate!")
  2440. print("The certificate presented for {} ({}) has never been seen before.".format(host, address))
  2441. print("This MIGHT be a Man-in-the-Middle attack.")
  2442. print("A different certificate has previously been seen {} times.".format(max_count))
  2443. if _HAS_CRYPTOGRAPHY:
  2444. if previous_ttl < datetime.timedelta():
  2445. print("That certificate has expired, which reduces suspicion somewhat.")
  2446. else:
  2447. print("That certificate is still valid for: {}".format(previous_ttl))
  2448. print("****************************************")
  2449. print("Attempt to verify the new certificate fingerprint out-of-band:")
  2450. print(fingerprint)
  2451. if self.sync_only:
  2452. choice = self.automatic_choice
  2453. else:
  2454. choice = input("Accept this new certificate? Y/N ").strip().lower()
  2455. if choice in ("y", "yes"):
  2456. self.db_cur.execute("""INSERT INTO cert_cache
  2457. VALUES (?, ?, ?, ?, ?, ?)""",
  2458. (host, address, fingerprint, now, now, 1))
  2459. self.db_conn.commit()
  2460. with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp:
  2461. fp.write(cert)
  2462. else:
  2463. raise Exception("TOFU Failure!")
  2464. # If not, cache this cert
  2465. else:
  2466. self._debug("TOFU: Blindly trusting first ever certificate for this host!")
  2467. self.db_cur.execute("""INSERT INTO cert_cache
  2468. VALUES (?, ?, ?, ?, ?, ?)""",
  2469. (host, address, fingerprint, now, now, 1))
  2470. self.db_conn.commit()
  2471. certdir = os.path.join(_CONFIG_DIR, "cert_cache")
  2472. if not os.path.exists(certdir):
  2473. os.makedirs(certdir)
  2474. with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp:
  2475. fp.write(cert)
  2476. def _get_handler_cmd(self, mimetype):
  2477. # Now look for a handler for this mimetype
  2478. # Consider exact matches before wildcard matches
  2479. exact_matches = []
  2480. wildcard_matches = []
  2481. for handled_mime, cmd_str in _MIME_HANDLERS.items():
  2482. if "*" in handled_mime:
  2483. wildcard_matches.append((handled_mime, cmd_str))
  2484. else:
  2485. exact_matches.append((handled_mime, cmd_str))
  2486. for handled_mime, cmd_str in exact_matches + wildcard_matches:
  2487. if fnmatch.fnmatch(mimetype, handled_mime):
  2488. break
  2489. else:
  2490. # Use "xdg-open" as a last resort.
  2491. if _HAS_XDGOPEN:
  2492. cmd_str = "xdg-open \"%s\""
  2493. else:
  2494. cmd_str = "echo ""Can’t find how to open %s"""
  2495. print("Please install xdg-open (usually from xdg-util package)")
  2496. self._debug("Using handler: %s" % cmd_str)
  2497. return cmd_str
  2498. #TODO: remove format_geminiitem
  2499. def _format_geminiitem(self, index, gi, url=False):
  2500. protocol = "" if gi.scheme == "gemini" else " %s" % gi.scheme
  2501. line = "[%d%s] %s" % (index, protocol, gi.name or gi.url)
  2502. if gi.name and url:
  2503. line += " (%s)" % gi.url
  2504. return line
  2505. def _show_lookup(self, offset=0, end=None, url=False):
  2506. for n, gi in enumerate(self.lookup[offset:end]):
  2507. print(self._format_geminiitem(n+offset+1, gi, url))
  2508. def _update_history(self, gi):
  2509. # We never update while in sync_only
  2510. if self.sync_only:
  2511. return
  2512. # We don’t add lists to history
  2513. #if not gi or os.path.join(_DATA_DIR,"lists") in gi.url:
  2514. # return
  2515. histlist = self.get_list("history")
  2516. links = self.list_get_links("history")
  2517. # avoid duplicate
  2518. length = len(links)
  2519. if length > self.options["history_size"]:
  2520. length = self.options["history_size"]
  2521. if length > 0 and links[self.hist_index] == gi:
  2522. return
  2523. self.list_add_top("history",limit=self.options["history_size"],truncate_lines=self.hist_index)
  2524. self.hist_index = 0
  2525. def _log_visit(self, gi, address, size):
  2526. if not address:
  2527. return
  2528. self.log["requests"] += 1
  2529. self.log["bytes_recvd"] += size
  2530. self.visited_hosts.add(address)
  2531. if address[0] == socket.AF_INET:
  2532. self.log["ipv4_requests"] += 1
  2533. self.log["ipv4_bytes_recvd"] += size
  2534. elif address[0] == socket.AF_INET6:
  2535. self.log["ipv6_requests"] += 1
  2536. self.log["ipv6_bytes_recvd"] += size
  2537. def _debug(self, debug_text):
  2538. if not self.options["debug"]:
  2539. return
  2540. debug_text = "\x1b[0;32m[DEBUG] " + debug_text + "\x1b[0m"
  2541. print(debug_text)
  2542. def _load_client_cert(self):
  2543. """
  2544. Interactively load a TLS client certificate from the filesystem in PEM
  2545. format.
  2546. """
  2547. print("Loading client certificate file, in PEM format (blank line to cancel)")
  2548. certfile = input("Certfile path: ").strip()
  2549. if not certfile:
  2550. print("Aborting.")
  2551. return
  2552. certfile = os.path.expanduser(certfile)
  2553. if not os.path.isfile(certfile):
  2554. print("Certificate file {} does not exist.".format(certfile))
  2555. return
  2556. print("Loading private key file, in PEM format (blank line to cancel)")
  2557. keyfile = input("Keyfile path: ").strip()
  2558. if not keyfile:
  2559. print("Aborting.")
  2560. return
  2561. keyfile = os.path.expanduser(keyfile)
  2562. if not os.path.isfile(keyfile):
  2563. print("Private key file {} does not exist.".format(keyfile))
  2564. return
  2565. self._activate_client_cert(certfile, keyfile)
  2566. def _generate_transient_cert_cert(self):
  2567. """
  2568. Use `openssl` command to generate a new transient client certificate
  2569. with 24 hours of validity.
  2570. """
  2571. certdir = os.path.join(_CONFIG_DIR, "transient_certs")
  2572. name = str(uuid.uuid4())
  2573. self._generate_client_cert(certdir, name, transient=True)
  2574. self.active_is_transient = True
  2575. self.transient_certs_created.append(name)
  2576. def _generate_persistent_client_cert(self):
  2577. """
  2578. Interactively use `openssl` command to generate a new persistent client
  2579. certificate with one year of validity.
  2580. """
  2581. certdir = os.path.join(_CONFIG_DIR, "client_certs")
  2582. print("What do you want to name this new certificate?")
  2583. print("Answering `mycert` will create `{0}/mycert.crt` and `{0}/mycert.key`".format(certdir))
  2584. name = input("> ")
  2585. if not name.strip():
  2586. print("Aborting.")
  2587. return
  2588. self._generate_client_cert(certdir, name)
  2589. def _generate_client_cert(self, certdir, basename, transient=False):
  2590. """
  2591. Use `openssl` binary to generate a client certificate (which may be
  2592. transient or persistent) and save the certificate and private key to the
  2593. specified directory with the specified basename.
  2594. """
  2595. if not os.path.exists(certdir):
  2596. os.makedirs(certdir)
  2597. certfile = os.path.join(certdir, basename+".crt")
  2598. keyfile = os.path.join(certdir, basename+".key")
  2599. cmd = "openssl req -x509 -newkey rsa:2048 -days {} -nodes -keyout {} -out {}".format(1 if transient else 365, keyfile, certfile)
  2600. if transient:
  2601. cmd += " -subj '/CN={}'".format(basename)
  2602. os.system(cmd)
  2603. self._activate_client_cert(certfile, keyfile)
  2604. def _choose_client_cert(self):
  2605. """
  2606. Interactively select a previously generated client certificate and
  2607. activate it.
  2608. """
  2609. certdir = os.path.join(_CONFIG_DIR, "client_certs")
  2610. certs = glob.glob(os.path.join(certdir, "*.crt"))
  2611. if len(certs) == 0:
  2612. print("There are no previously generated certificates.")
  2613. return
  2614. certdir = {}
  2615. for n, cert in enumerate(certs):
  2616. certdir[str(n+1)] = (cert, os.path.splitext(cert)[0] + ".key")
  2617. print("{}. {}".format(n+1, os.path.splitext(os.path.basename(cert))[0]))
  2618. choice = input("> ").strip()
  2619. if choice in certdir:
  2620. certfile, keyfile = certdir[choice]
  2621. self._activate_client_cert(certfile, keyfile)
  2622. else:
  2623. print("What?")
  2624. def _activate_client_cert(self, certfile, keyfile):
  2625. self.client_certs["active"] = (certfile, keyfile)
  2626. self.active_cert_domains = []
  2627. self.prompt = self.cert_prompt + "+" + os.path.basename(certfile).replace('.crt','') + "> " + "\x1b[0m"
  2628. self._debug("Using ID {} / {}.".format(*self.client_certs["active"]))
  2629. def _deactivate_client_cert(self):
  2630. if self.active_is_transient:
  2631. for filename in self.client_certs["active"]:
  2632. os.remove(filename)
  2633. for domain in self.active_cert_domains:
  2634. self.client_certs.pop(domain)
  2635. self.client_certs["active"] = None
  2636. self.active_cert_domains = []
  2637. self.prompt = self.no_cert_prompt
  2638. self.active_is_transient = False
  2639. # Cmd implementation follows
  2640. def default(self, line):
  2641. if line.strip() == "EOF":
  2642. return self.onecmd("quit")
  2643. elif line.strip() == "..":
  2644. return self.do_up()
  2645. elif line.startswith("/"):
  2646. return self.do_find(line[1:])
  2647. # Expand abbreviated commands
  2648. first_word = line.split()[0].strip()
  2649. if first_word in _ABBREVS:
  2650. full_cmd = _ABBREVS[first_word]
  2651. expanded = line.replace(first_word, full_cmd, 1)
  2652. return self.onecmd(expanded)
  2653. # Try to access it like an URL
  2654. if looks_like_url(line):
  2655. return self.do_go(line)
  2656. # Try to parse numerical index for lookup table
  2657. try:
  2658. n = int(line.strip())
  2659. except ValueError:
  2660. print("What?")
  2661. return
  2662. try:
  2663. gi = self.lookup[n-1]
  2664. except IndexError:
  2665. print ("Index too high!")
  2666. return
  2667. self.index_index = n
  2668. self._go_to_gi(gi)
  2669. ### Settings
  2670. @restricted
  2671. def do_redirect(self,line):
  2672. """Display and manage the list of redirected URLs. This features is mostly useful to use privacy-friendly frontends for popular websites."""
  2673. if len(line.split()) == 1:
  2674. if line in self.redirects:
  2675. print("%s is redirected to %s" %(line,self.redirects[line]))
  2676. else:
  2677. print("Please add a destination to redirect %s" %line)
  2678. elif len(line.split()) >= 2:
  2679. orig, dest = line.split(" ",1)
  2680. if dest.lower() == "none":
  2681. if orig in self.redirects:
  2682. self.redirects.pop(orig)
  2683. print("Redirection for %s has been removed"%orig)
  2684. else:
  2685. print("%s was not redirected. Nothing has changed."%orig)
  2686. elif dest.lower() == "block":
  2687. self.redirects[orig] = "blocked"
  2688. print("%s will now be blocked"%orig)
  2689. else:
  2690. self.redirects[orig] = dest
  2691. print("%s will now be redirected to %s" %(orig,dest))
  2692. else:
  2693. toprint="Current redirections:\n"
  2694. toprint+="--------------------\n"
  2695. for r in self.redirects:
  2696. toprint += ("%s\t->\t%s\n" %(r,self.redirects[r]))
  2697. toprint +="\nTo add new, use \"redirect origine.com destination.org\""
  2698. toprint +="\nTo remove a redirect, use \"redirect origine.com NONE\""
  2699. toprint +="\nTo completely block a website, use \"redirect origine.com BLOCK\""
  2700. print(toprint)
  2701. @restricted
  2702. def do_set(self, line):
  2703. """View or set various options."""
  2704. if not line.strip():
  2705. # Show all current settings
  2706. for option in sorted(self.options.keys()):
  2707. print("%s %s" % (option, self.options[option]))
  2708. elif len(line.split()) == 1 :
  2709. # Show current value of one specific setting
  2710. option = line.strip()
  2711. if option in self.options:
  2712. print("%s %s" % (option, self.options[option]))
  2713. else:
  2714. print("Unrecognised option %s" % option)
  2715. else:
  2716. # Set value of one specific setting
  2717. option, value = line.split(" ", 1)
  2718. if option not in self.options:
  2719. print("Unrecognised option %s" % option)
  2720. return
  2721. # Validate / convert values
  2722. elif option == "tls_mode":
  2723. if value.lower() not in ("ca", "tofu"):
  2724. print("TLS mode must be `ca` or `tofu`!")
  2725. return
  2726. elif option == "width":
  2727. if value.isnumeric():
  2728. value = int(value)
  2729. print("changing width to ",value)
  2730. global TERM_WIDTH
  2731. TERM_WIDTH = value
  2732. else:
  2733. print("%s is not a valid width (integer required)"%value)
  2734. elif option == "beta":
  2735. if value.lower() == "true":
  2736. global BETA
  2737. BETA = True
  2738. print("Experimental features are enabled.")
  2739. else:
  2740. # global BETA
  2741. BETA = False
  2742. print("Experimental features are disabled.")
  2743. self.options["beta"] = BETA
  2744. elif value.isnumeric():
  2745. value = int(value)
  2746. elif value.lower() == "false":
  2747. value = False
  2748. elif value.lower() == "true":
  2749. value = True
  2750. else:
  2751. try:
  2752. value = float(value)
  2753. except ValueError:
  2754. pass
  2755. self.options[option] = value
  2756. @restricted
  2757. def do_cert(self, line):
  2758. """Manage client certificates"""
  2759. print("Managing client certificates")
  2760. if self.client_certs["active"]:
  2761. print("Active certificate: {}".format(self.client_certs["active"][0]))
  2762. print("1. Deactivate client certificate.")
  2763. print("2. Generate new certificate.")
  2764. print("3. Load previously generated certificate.")
  2765. print("4. Load externally created client certificate from file.")
  2766. print("Enter blank line to exit certificate manager.")
  2767. choice = input("> ").strip()
  2768. if choice == "1":
  2769. print("Deactivating client certificate.")
  2770. self._deactivate_client_cert()
  2771. elif choice == "2":
  2772. self._generate_persistent_client_cert()
  2773. elif choice == "3":
  2774. self._choose_client_cert()
  2775. elif choice == "4":
  2776. self._load_client_cert()
  2777. else:
  2778. print("Aborting.")
  2779. @restricted
  2780. def do_handler(self, line):
  2781. """View or set handler commands for different MIME types."""
  2782. if not line.strip():
  2783. # Show all current handlers
  2784. for mime in sorted(_MIME_HANDLERS.keys()):
  2785. print("%s %s" % (mime, _MIME_HANDLERS[mime]))
  2786. elif len(line.split()) == 1:
  2787. mime = line.strip()
  2788. if mime in _MIME_HANDLERS:
  2789. print("%s %s" % (mime, _MIME_HANDLERS[mime]))
  2790. else:
  2791. print("No handler set for MIME type %s" % mime)
  2792. else:
  2793. mime, handler = line.split(" ", 1)
  2794. _MIME_HANDLERS[mime] = handler
  2795. if "%s" not in handler:
  2796. print("Are you sure you don't want to pass the filename to the handler?")
  2797. def do_abbrevs(self, *args):
  2798. """Print all Offpunk command abbreviations."""
  2799. header = "Command Abbreviations:"
  2800. self.stdout.write("\n{}\n".format(str(header)))
  2801. if self.ruler:
  2802. self.stdout.write("{}\n".format(str(self.ruler * len(header))))
  2803. for k, v in _ABBREVS.items():
  2804. self.stdout.write("{:<7} {}\n".format(k, v))
  2805. self.stdout.write("\n")
  2806. def do_offline(self, *args):
  2807. """Use Offpunk offline by only accessing cached content"""
  2808. if self.offline_only:
  2809. print("Offline and undisturbed.")
  2810. else:
  2811. self.offline_only = True
  2812. self.prompt = self.offline_prompt
  2813. print("Offpunk is now offline and will only access cached content")
  2814. def do_online(self, *args):
  2815. """Use Offpunk online with a direct connection"""
  2816. if self.offline_only:
  2817. self.offline_only = False
  2818. self.prompt = self.no_cert_prompt
  2819. print("Offpunk is online and will access the network")
  2820. else:
  2821. print("Already online. Try offline.")
  2822. def do_copy(self, arg):
  2823. """Copy the content of the last visited page as gemtext in the clipboard.
  2824. Use with "url" as argument to only copy the adress.
  2825. Use with "raw" to copy ANSI content as seen in your terminal (not gemtext).
  2826. Use with "cache" to copy the path of the cached content."""
  2827. if self.gi:
  2828. if _HAS_XSEL:
  2829. args = arg.split()
  2830. if args and args[0] == "url":
  2831. if len(args) > 1 and args[1].isdecimal():
  2832. gi = self.index[int(args[1])-1]
  2833. url = gi.url
  2834. else:
  2835. url = self.gi.url
  2836. run("echo %s |xsel -b -i" % url,direct_output=True)
  2837. elif args and args[0] == "raw":
  2838. run("cat \"%s\" |xsel -b -i" % self.gi.get_temp_filename(),direct_output=True)
  2839. elif args and args[0] == "cache":
  2840. run("echo %s |xsel -b -i" % self.gi.get_cache_path(), direct_output=True)
  2841. else:
  2842. run("cat \"%s\" |xsel -b -i" % self.gi.get_body(as_file=True), direct_output=True)
  2843. else:
  2844. print("Please install xsel to use copy")
  2845. else:
  2846. print("No content to copy, visit a page first")
  2847. ### Stuff for getting around
  2848. def do_go(self, line):
  2849. """Go to a gemini URL or marked item."""
  2850. line = line.strip()
  2851. if not line:
  2852. if shutil.which('xsel'):
  2853. clipboards = []
  2854. urls = []
  2855. for selec in ["-p","-s","-b"]:
  2856. try:
  2857. clipboards.append(run("xsel "+selec))
  2858. except Exception as err:
  2859. #print("Skippink clipboard %s because %s"%(selec,err))
  2860. pass
  2861. for u in clipboards:
  2862. if "://" in u and looks_like_url(u) and u not in urls :
  2863. urls.append(u)
  2864. if len(urls) > 1:
  2865. self.lookup = []
  2866. for u in urls:
  2867. self.lookup.append(GeminiItem(u))
  2868. print("Where do you want to go today?")
  2869. self._show_lookup()
  2870. elif len(urls) == 1:
  2871. self.do_go(urls[0])
  2872. else:
  2873. print("Go where? (hint: simply copy an URL in your clipboard)")
  2874. else:
  2875. print("Go where? (hint: install xsel to go to copied URLs)")
  2876. # First, check for possible marks
  2877. elif line in self.marks:
  2878. gi = self.marks[line]
  2879. self._go_to_gi(gi)
  2880. # or a local file
  2881. elif os.path.exists(os.path.expanduser(line)):
  2882. self._go_to_gi(GeminiItem(line))
  2883. # If this isn't a mark, treat it as a URL
  2884. elif looks_like_url(line):
  2885. self._go_to_gi(GeminiItem(line))
  2886. else:
  2887. print("%s is not a valid URL to go"%line)
  2888. @needs_gi
  2889. def do_reload(self, *args):
  2890. """Reload the current URL."""
  2891. if self.offline_only:
  2892. self.get_list("to_fetch")
  2893. r = self.list_add_line("to_fetch",gi=self.gi,verbose=False)
  2894. if r:
  2895. print("%s marked for syncing" %self.gi.url)
  2896. else:
  2897. print("%s already marked for syncing" %self.gi.url)
  2898. else:
  2899. self._go_to_gi(self.gi, check_cache=False)
  2900. @needs_gi
  2901. def do_up(self, *args):
  2902. """Go up one directory in the path.
  2903. Take an integer as argument to go up multiple times."""
  2904. level = 1
  2905. if args[0].isnumeric():
  2906. level = int(args[0])
  2907. elif args[0] != "":
  2908. print("Up only take integer as arguments")
  2909. self._go_to_gi(self.gi.up(level=level))
  2910. def do_back(self, *args):
  2911. """Go back to the previous gemini item."""
  2912. histfile = self.get_list("history")
  2913. links = self.list_get_links("history")
  2914. if self.hist_index >= len(links) -1:
  2915. return
  2916. self.hist_index += 1
  2917. gi = links[self.hist_index]
  2918. self._go_to_gi(gi, update_hist=False)
  2919. def do_forward(self, *args):
  2920. """Go forward to the next gemini item."""
  2921. histfile = self.get_list("history")
  2922. links = self.list_get_links("history")
  2923. if self.hist_index <= 0:
  2924. return
  2925. self.hist_index -= 1
  2926. gi = links[self.hist_index]
  2927. self._go_to_gi(gi, update_hist=False)
  2928. @needs_gi
  2929. def do_root(self, *args):
  2930. """Go to root selector of the server hosting current item."""
  2931. self._go_to_gi(self.gi.root())
  2932. def do_tour(self, line):
  2933. """Add index items as waypoints on a tour, which is basically a FIFO
  2934. queue of gemini items.
  2935. `tour` or `t` alone brings you to the next item in your tour.
  2936. Items can be added with `tour 1 2 3 4` or ranges like `tour 1-4`.
  2937. All items in current menu can be added with `tour *`.
  2938. Current item can be added back to the end of the tour with `tour .`.
  2939. Current tour can be listed with `tour ls` and scrubbed with `tour clear`."""
  2940. # Creating the tour list if needed
  2941. self.get_list("tour")
  2942. line = line.strip()
  2943. if not line:
  2944. # Fly to next waypoint on tour
  2945. if len(self.list_get_links("tour")) < 1:
  2946. print("End of tour.")
  2947. else:
  2948. url = self.list_go_to_line("1","tour")
  2949. if url:
  2950. self.list_rm_url(url,"tour")
  2951. elif line == "ls":
  2952. self.list_show("tour")
  2953. elif line == "clear":
  2954. for l in self.list_get_links("tour"):
  2955. self.list_rm_url(l.url_mode(),"tour")
  2956. elif line == "*":
  2957. for l in self.lookup:
  2958. self.list_add_line("tour",gi=l,verbose=False)
  2959. elif line == ".":
  2960. self.list_add_line("tour",verbose=False)
  2961. elif looks_like_url(line):
  2962. self.list_add_line("tour",gi=GeminiItem(line))
  2963. else:
  2964. for index in line.split():
  2965. try:
  2966. pair = index.split('-')
  2967. if len(pair) == 1:
  2968. # Just a single index
  2969. n = int(index)
  2970. gi = self.lookup[n-1]
  2971. self.list_add_line("tour",gi=gi,verbose=False)
  2972. elif len(pair) == 2:
  2973. # Two endpoints for a range of indices
  2974. if int(pair[0]) < int(pair[1]):
  2975. for n in range(int(pair[0]), int(pair[1]) + 1):
  2976. gi = self.lookup[n-1]
  2977. self.list_add_line("tour",gi=gi,verbose=False)
  2978. else:
  2979. for n in range(int(pair[0]), int(pair[1]) - 1, -1):
  2980. gi = self.lookup[n-1]
  2981. self.list_add_line("tour",gi=gi,verbose=False)
  2982. else:
  2983. # Syntax error
  2984. print("Invalid use of range syntax %s, skipping" % index)
  2985. except ValueError:
  2986. print("Non-numeric index %s, skipping." % index)
  2987. except IndexError:
  2988. print("Invalid index %d, skipping." % n)
  2989. @needs_gi
  2990. def do_mark(self, line):
  2991. """Mark the current item with a single letter. This letter can then
  2992. be passed to the 'go' command to return to the current item later.
  2993. Think of it like marks in vi: 'mark a'='ma' and 'go a'=''a'.
  2994. Marks are temporary until shutdown (not saved to disk)."""
  2995. line = line.strip()
  2996. if not line:
  2997. for mark, gi in self.marks.items():
  2998. print("[%s] %s (%s)" % (mark, gi.name, gi.url))
  2999. elif line.isalpha() and len(line) == 1:
  3000. self.marks[line] = self.gi
  3001. else:
  3002. print("Invalid mark, must be one letter")
  3003. @needs_gi
  3004. def do_info(self,line):
  3005. """Display information about current page."""
  3006. out = self.gi.get_page_title() + "\n\n"
  3007. out += "URL : " + self.gi.url + "\n"
  3008. out += "Path : " + self.gi.path + "\n"
  3009. out += "Mime : " + self.gi.get_mime() + "\n"
  3010. out += "Cache : " + self.gi.get_cache_path() + "\n"
  3011. tmp = self.gi.get_temp_filename()
  3012. if tmp != self.gi.get_cache_path():
  3013. out += "Tempfile : " + self.gi.get_temp_filename() + "\n"
  3014. if self.gi.renderer :
  3015. rend = str(self.gi.renderer.__class__)
  3016. rend = rend.lstrip("<class '__main__.").rstrip("'>")
  3017. else:
  3018. rend = "None"
  3019. out += "Renderer : " + rend + "\n\n"
  3020. lists = []
  3021. for l in self.list_lists():
  3022. if self.list_has_url(self.gi.url,l):
  3023. lists.append(l)
  3024. if len(lists) > 0:
  3025. out += "Page appeard in following lists :\n"
  3026. for l in lists:
  3027. if not self.list_is_system(l):
  3028. status = "normal list"
  3029. if self.list_is_subscribed(l):
  3030. status = "subscription"
  3031. elif self.list_is_frozen(l):
  3032. status = "frozen list"
  3033. out += " • %s\t(%s)\n" %(l,status)
  3034. for l in lists:
  3035. if self.list_is_system(l):
  3036. out += " • %s\n" %l
  3037. else:
  3038. out += "Page is not save in any list"
  3039. print(out)
  3040. def do_version(self, line):
  3041. """Display version and system information."""
  3042. def has(value):
  3043. if value:
  3044. return "\t\x1b[1;32mInstalled\x1b[0m\n"
  3045. else:
  3046. return "\t\x1b[1;31mNot Installed\x1b[0m\n"
  3047. output = "Offpunk " + _VERSION + "\n"
  3048. output += "===========\n"
  3049. output += "Highly recommended:\n"
  3050. output += " - python-cryptography : " + has(_HAS_CRYPTOGRAPHY)
  3051. output += " - xdg-open : " + has(_HAS_XDGOPEN)
  3052. output += "\nWeb browsing:\n"
  3053. output += " - python-requests : " + has(_DO_HTTP)
  3054. output += " - python-feedparser : " + has(_DO_FEED)
  3055. output += " - python-bs4 : " + has(_HAS_SOUP)
  3056. output += " - python-readability : " + has(_HAS_READABILITY)
  3057. output += " - timg : " + has(_HAS_TIMG)
  3058. if _NEW_CHAFA:
  3059. output += " - chafa 1.10+ : " + has(_HAS_CHAFA)
  3060. else:
  3061. output += " - chafa : " + has(_HAS_CHAFA)
  3062. output += " - python-pil : " + has(_HAS_PIL)
  3063. output += "\nNice to have:\n"
  3064. output += " - python-setproctitle : " + has(_HAS_SETPROCTITLE)
  3065. output += " - xsel : " + has(_HAS_XSEL)
  3066. output += "\nFeatures :\n"
  3067. if _NEW_CHAFA:
  3068. output += " - Render images (chafa or timg) : " + has(_RENDER_IMAGE)
  3069. else:
  3070. output += " - Render images (python-pil, chafa or timg) : " + has(_RENDER_IMAGE)
  3071. output += " - Render HTML (bs4, readability) : " + has(_DO_HTML)
  3072. output += " - Render Atom/RSS feeds (feedparser) : " + has(_DO_FEED)
  3073. output += " - Connect to http/https (requests) : " + has(_DO_HTTP)
  3074. output += " - copy to/from clipboard (xsel) : " + has(_HAS_XSEL)
  3075. output += " - restore last position (less 572+) : " + has(_LESS_RESTORE_POSITION)
  3076. output += "\n"
  3077. output += "Config directory : " + _CONFIG_DIR + "\n"
  3078. output += "User Data directory : " + _DATA_DIR + "\n"
  3079. output += "Cache directoy : " + _CACHE_PATH
  3080. print(output)
  3081. ### Stuff that modifies the lookup table
  3082. def do_ls(self, line):
  3083. """List contents of current index.
  3084. Use 'ls -l' to see URLs."""
  3085. self.lookup = self.index
  3086. self._show_lookup(url = "-l" in line)
  3087. self.page_index = 0
  3088. def do_gus(self, line):
  3089. """Submit a search query to the geminispace.info search engine."""
  3090. gus = GeminiItem("gemini://geminispace.info/search")
  3091. self._go_to_gi(gus.query(line))
  3092. def do_history(self, *args):
  3093. """Display history."""
  3094. self.list_show("history")
  3095. @needs_gi
  3096. def do_find(self, searchterm):
  3097. """Find in current page by displaying only relevant lines (grep)."""
  3098. self.gi.display(grep=searchterm)
  3099. def emptyline(self):
  3100. """Page through index ten lines at a time."""
  3101. i = self.page_index
  3102. if i > len(self.lookup):
  3103. return
  3104. self._show_lookup(offset=i, end=i+10)
  3105. self.page_index += 10
  3106. ### Stuff that does something to most recently viewed item
  3107. @needs_gi
  3108. def do_cat(self, *args):
  3109. """Run most recently visited item through "cat" command."""
  3110. run("cat \"%s\"" % self.gi.get_temp_filename(),direct_output=True)
  3111. @needs_gi
  3112. def do_view(self, *args):
  3113. """Run most recently visited item through "less" command, restoring \
  3114. previous position.
  3115. Use "view normal" to see the default article view on html page.
  3116. Use "view full" to see a complete html page instead of the article view.
  3117. Use "view feed" to see the the linked feed of the page (in any).
  3118. Use "view feeds" to see available feeds on this page.
  3119. (full, feed, feeds have no effect on non-html content)."""
  3120. if self.gi and args and args[0] != "":
  3121. if args[0] in ["full","debug"]:
  3122. self._go_to_gi(self.gi,mode=args[0])
  3123. elif args[0] in ["normal","readable"]:
  3124. self._go_to_gi(self.gi,mode="readable")
  3125. elif args[0] == "feed":
  3126. subs = self.gi.get_subscribe_links()
  3127. if len(subs) > 1:
  3128. self.do_go(subs[1][0])
  3129. elif "rss" in subs[0][1] or "atom" in subs[0][1]:
  3130. print("%s is already a feed" %self.gi.url)
  3131. else:
  3132. print("No other feed found on %s"%self.gi.url)
  3133. elif args[0] == "feeds":
  3134. subs = self.gi.get_subscribe_links()
  3135. stri = "Available views :\n"
  3136. counter = 0
  3137. for s in subs:
  3138. counter += 1
  3139. stri += "[%s] %s [%s]\n"%(counter,s[0],s[1])
  3140. stri += "Which view do you want to see ? >"
  3141. ans = input(stri)
  3142. if ans.isdigit() and 0 < int(ans) <= len(subs):
  3143. self.do_go(subs[int(ans)-1][0])
  3144. else:
  3145. print("Valid argument for view are : normal, full, feed, feeds")
  3146. else:
  3147. self._go_to_gi(self.gi)
  3148. @needs_gi
  3149. def do_open(self, *args):
  3150. """Open current item with the configured handler or xdg-open.
  3151. Uses "open url" to open current URL in a browser.
  3152. see "handler" command to set your handler."""
  3153. if args[0] == "url":
  3154. run("xdg-open %s" %self.gi.url,direct_output=True)
  3155. else:
  3156. cmd_str = self._get_handler_cmd(self.gi.get_mime())
  3157. file_path = "\"%s\"" %self.gi.get_body(as_file=True)
  3158. cmd_str = cmd_str % file_path
  3159. run(cmd_str,direct_output=True)
  3160. @restricted
  3161. @needs_gi
  3162. def do_shell(self, line):
  3163. """'cat' most recently visited item through a shell pipeline.
  3164. '!' is an useful shortcut."""
  3165. run("cat \"%s\" |" % self.gi.get_temp_filename() + line,direct_output=True)
  3166. @restricted
  3167. @needs_gi
  3168. def do_save(self, line):
  3169. """Save an item to the filesystem.
  3170. 'save n filename' saves menu item n to the specified filename.
  3171. 'save filename' saves the last viewed item to the specified filename.
  3172. 'save n' saves menu item n to an automagic filename."""
  3173. args = line.strip().split()
  3174. # First things first, figure out what our arguments are
  3175. if len(args) == 0:
  3176. # No arguments given at all
  3177. # Save current item, if there is one, to a file whose name is
  3178. # inferred from the gemini path
  3179. if not self.gi.is_cache_valid():
  3180. print("You cannot save if not cached!")
  3181. return
  3182. else:
  3183. index = None
  3184. filename = None
  3185. elif len(args) == 1:
  3186. # One argument given
  3187. # If it's numeric, treat it as an index, and infer the filename
  3188. try:
  3189. index = int(args[0])
  3190. filename = None
  3191. # If it's not numeric, treat it as a filename and
  3192. # save the current item
  3193. except ValueError:
  3194. index = None
  3195. filename = os.path.expanduser(args[0])
  3196. elif len(args) == 2:
  3197. # Two arguments given
  3198. # Treat first as an index and second as filename
  3199. index, filename = args
  3200. try:
  3201. index = int(index)
  3202. except ValueError:
  3203. print("First argument is not a valid item index!")
  3204. return
  3205. filename = os.path.expanduser(filename)
  3206. else:
  3207. print("You must provide an index, a filename, or both.")
  3208. return
  3209. # Next, fetch the item to save, if it's not the current one.
  3210. if index:
  3211. last_gi = self.gi
  3212. try:
  3213. gi = self.lookup[index-1]
  3214. self._go_to_gi(gi, update_hist = False, handle = False)
  3215. except IndexError:
  3216. print ("Index too high!")
  3217. self.gi = last_gi
  3218. return
  3219. else:
  3220. gi = self.gi
  3221. # Derive filename from current GI's path, if one hasn't been set
  3222. if not filename:
  3223. filename = gi.get_filename()
  3224. # Check for filename collisions and actually do the save if safe
  3225. if os.path.exists(filename):
  3226. print("File %s already exists!" % filename)
  3227. else:
  3228. # Don't use _get_active_tmpfile() here, because we want to save the
  3229. # "source code" of menus, not the rendered view - this way Offpunk
  3230. # can navigate to it later.
  3231. path = gi.get_body(as_file=True)
  3232. if os.path.isdir(path):
  3233. print("Can’t save %s because it’s a folder, not a file"%path)
  3234. else:
  3235. print("Saved to %s" % filename)
  3236. shutil.copyfile(path, filename)
  3237. # Restore gi if necessary
  3238. if index != None:
  3239. self._go_to_gi(last_gi, handle=False)
  3240. @needs_gi
  3241. def do_url(self, *args):
  3242. """Print URL of most recently visited item."""
  3243. print(self.gi.url)
  3244. ### Bookmarking stuff
  3245. @restricted
  3246. @needs_gi
  3247. def do_add(self, line):
  3248. """Add the current URL to the list specied as argument.
  3249. If no argument given, URL is added to Bookmarks."""
  3250. args = line.split()
  3251. if len(args) < 1 :
  3252. list = "bookmarks"
  3253. if not self.list_path(list):
  3254. self.list_create(list)
  3255. self.list_add_line(list)
  3256. else:
  3257. self.list_add_line(args[0])
  3258. # Get the list file name, creating or migrating it if needed.
  3259. # Migrate bookmarks/tour/to_fetch from XDG_CONFIG to XDG_DATA
  3260. # We migrate only if the file exists in XDG_CONFIG and not XDG_DATA
  3261. def get_list(self,list):
  3262. list_path = self.list_path(list)
  3263. if not list_path:
  3264. old_file_gmi = os.path.join(_CONFIG_DIR,list + ".gmi")
  3265. old_file_nogmi = os.path.join(_CONFIG_DIR,list)
  3266. target = os.path.join(_DATA_DIR,"lists")
  3267. if os.path.exists(old_file_gmi):
  3268. shutil.move(old_file_gmi,target)
  3269. elif os.path.exists(old_file_nogmi):
  3270. targetgmi = os.path.join(target,list+".gmi")
  3271. shutil.move(old_file_nogmi,targetgmi)
  3272. else:
  3273. if list == "subscribed":
  3274. title = "Subscriptions #subscribed (new links in those pages will be added to tour)"
  3275. elif list == "to_fetch":
  3276. title = "Links requested and to be fetched during the next --sync"
  3277. else:
  3278. title = None
  3279. self.list_create(list, title=title)
  3280. list_path = self.list_path(list)
  3281. return list_path
  3282. def do_subscribe(self,line):
  3283. """Subscribe to current page by saving it in the "subscribed" list.
  3284. If a new link is found in the page during a --sync, the new link is automatically
  3285. fetched and added to your next tour.
  3286. To unsubscribe, remove the page from the "subscribed" list."""
  3287. subs = self.gi.get_subscribe_links()
  3288. if len(subs) > 1:
  3289. stri = "Multiple feeds have been found :\n"
  3290. elif "rss" in subs[0][1] or "atom" in subs[0][1] :
  3291. stri = "This page is already a feed:\n"
  3292. else:
  3293. stri = "No feed detected. You can still watch the page :\n"
  3294. counter = 0
  3295. for l in subs:
  3296. link = l[0]
  3297. already = []
  3298. for li in self.list_lists():
  3299. if self.list_is_subscribed(li):
  3300. if self.list_has_url(link,li):
  3301. already.append(li)
  3302. stri += "[%s] %s [%s]\n"%(counter+1,link,l[1])
  3303. if len(already) > 0:
  3304. stri += "\t -> (already subscribed through lists %s)\n"%(str(already))
  3305. counter += 1
  3306. stri += "\n"
  3307. stri += "Which feed do you want to subscribe ? > "
  3308. ans = input(stri)
  3309. if ans.isdigit() and 0 < int(ans) <= len(subs):
  3310. sublink,mime,title = subs[int(ans)-1]
  3311. else:
  3312. sublink,title = None,None
  3313. if sublink:
  3314. sublink = self.gi.absolutise_url(sublink)
  3315. gi = GeminiItem(sublink,name=title)
  3316. list_path = self.get_list("subscribed")
  3317. added = self.list_add_line("subscribed",gi=gi,verbose=False)
  3318. if added :
  3319. print("Subscribed to %s" %sublink)
  3320. else:
  3321. print("You are already subscribed to %s"%sublink)
  3322. else:
  3323. print("No subscription registered")
  3324. def do_bookmarks(self, line):
  3325. """Show or access the bookmarks menu.
  3326. 'bookmarks' shows all bookmarks.
  3327. 'bookmarks n' navigates immediately to item n in the bookmark menu.
  3328. Bookmarks are stored using the 'add' command."""
  3329. list_path = self.get_list("bookmarks")
  3330. args = line.strip()
  3331. if len(args.split()) > 1 or (args and not args.isnumeric()):
  3332. print("bookmarks command takes a single integer argument!")
  3333. elif args:
  3334. self.list_go_to_line(args,"bookmarks")
  3335. else:
  3336. self.list_show("bookmarks")
  3337. def do_archive(self,args):
  3338. """Archive current page by removing it from every list and adding it to
  3339. archives, which is a special historical list limited in size. It is similar to `move archives`."""
  3340. for li in self.list_lists():
  3341. if li not in ["archives", "history"]:
  3342. deleted = self.list_rm_url(self.gi.url_mode(),li)
  3343. if deleted:
  3344. print("Removed from %s"%li)
  3345. self.list_add_top("archives",limit=self.options["archives_size"])
  3346. print("Archiving: %s"%self.gi.get_page_title())
  3347. print("\x1b[2;34mCurrent maximum size of archives : %s\x1b[0m" %self.options["archives_size"])
  3348. def list_add_line(self,list,gi=None,verbose=True):
  3349. list_path = self.list_path(list)
  3350. if not list_path:
  3351. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  3352. return False
  3353. else:
  3354. if not gi:
  3355. gi = self.gi
  3356. # first we check if url already exists in the file
  3357. with open(list_path,"r") as l_file:
  3358. lines = l_file.readlines()
  3359. l_file.close()
  3360. for l in lines:
  3361. sp = l.split()
  3362. if gi.url_mode() in sp:
  3363. if verbose:
  3364. print("%s already in %s."%(gi.url,list))
  3365. return False
  3366. with open(list_path,"a") as l_file:
  3367. l_file.write(gi.to_map_line())
  3368. l_file.close()
  3369. if verbose:
  3370. print("%s added to %s" %(gi.url,list))
  3371. return True
  3372. def list_add_top(self,list,limit=0,truncate_lines=0):
  3373. if not self.gi:
  3374. return
  3375. stri = self.gi.to_map_line().strip("\n")
  3376. if list == "archives":
  3377. stri += ", archived on "
  3378. elif list == "history":
  3379. stri += ", visited on "
  3380. else:
  3381. stri += ", added to %s on "%list
  3382. stri += time.ctime() + "\n"
  3383. list_path = self.get_list(list)
  3384. with open(list_path,"r") as l_file:
  3385. lines = l_file.readlines()
  3386. l_file.close()
  3387. with open(list_path,"w") as l_file:
  3388. l_file.write("#%s\n"%list)
  3389. l_file.write(stri)
  3390. counter = 0
  3391. # Truncating is useful in case we open a new branch
  3392. # after a few back in history
  3393. to_truncate = truncate_lines
  3394. for l in lines:
  3395. if not l.startswith("#"):
  3396. if to_truncate > 0:
  3397. to_truncate -= 1
  3398. elif limit == 0 or counter < limit:
  3399. l_file.write(l)
  3400. counter += 1
  3401. l_file.close()
  3402. # remove an url from a list.
  3403. # return True if the URL was removed
  3404. # return False if the URL was not found
  3405. def list_rm_url(self,url,list):
  3406. return self.list_has_url(url,list,deletion=True)
  3407. # deletion and has_url are so similar, I made them the same method
  3408. def list_has_url(self,url,list,deletion=False):
  3409. list_path = self.list_path(list)
  3410. if list_path:
  3411. to_return = False
  3412. with open(list_path,"r") as lf:
  3413. lines = lf.readlines()
  3414. lf.close()
  3415. to_write = []
  3416. # let’s remove the mode
  3417. url = url.split("##offpunk_mode=")[0]
  3418. for l in lines:
  3419. # we separate components of the line
  3420. # to ensure we identify a complete URL, not a part of it
  3421. splitted = l.split()
  3422. if url not in splitted and len(splitted) > 1:
  3423. current = splitted[1].split("##offpunk_mode=")[0]
  3424. #sometimes, we must remove the ending "/"
  3425. if url == current:
  3426. to_return = True
  3427. elif url.endswith("/") and url[:-1] == current:
  3428. to_return = True
  3429. else:
  3430. to_write.append(l)
  3431. else:
  3432. to_return = True
  3433. if deletion :
  3434. with open(list_path,"w") as lf:
  3435. for l in to_write:
  3436. lf.write(l)
  3437. lf.close()
  3438. return to_return
  3439. else:
  3440. return False
  3441. def list_get_links(self,list):
  3442. list_path = self.list_path(list)
  3443. if list_path:
  3444. gi = GeminiItem("list:///%s"%list)
  3445. return gi.get_links()
  3446. else:
  3447. return []
  3448. def list_go_to_line(self,line,list):
  3449. list_path = self.list_path(list)
  3450. if not list_path:
  3451. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  3452. elif not line.isnumeric():
  3453. print("go_to_line requires a number as parameter")
  3454. else:
  3455. gi = GeminiItem("list:///%s"%list)
  3456. gi = gi.get_link(int(line))
  3457. display = not self.sync_only
  3458. if gi:
  3459. self._go_to_gi(gi,handle=display)
  3460. return gi.url_mode()
  3461. def list_show(self,list):
  3462. list_path = self.list_path(list)
  3463. if not list_path:
  3464. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  3465. else:
  3466. gi = GeminiItem("list:///%s"%list)
  3467. display = not self.sync_only
  3468. self._go_to_gi(gi,handle=display)
  3469. #return the path of the list file if list exists.
  3470. #return None if the list doesn’t exist.
  3471. def list_path(self,list):
  3472. listdir = os.path.join(_DATA_DIR,"lists")
  3473. list_path = os.path.join(listdir, "%s.gmi"%list)
  3474. if os.path.exists(list_path):
  3475. return list_path
  3476. else:
  3477. return None
  3478. def list_create(self,list,title=None):
  3479. list_path = self.list_path(list)
  3480. if list in ["create","edit","delete","help"]:
  3481. print("%s is not allowed as a name for a list"%list)
  3482. elif not list_path:
  3483. listdir = os.path.join(_DATA_DIR,"lists")
  3484. os.makedirs(listdir,exist_ok=True)
  3485. list_path = os.path.join(listdir, "%s.gmi"%list)
  3486. with open(list_path,"a") as lfile:
  3487. if title:
  3488. lfile.write("# %s\n"%title)
  3489. else:
  3490. lfile.write("# %s\n"%list)
  3491. lfile.close()
  3492. print("list created. Display with `list %s`"%list)
  3493. else:
  3494. print("list %s already exists" %list)
  3495. def do_move(self,arg):
  3496. """move LIST will add the current page to the list LIST.
  3497. With a major twist: current page will be removed from all other lists.
  3498. If current page was not in a list, this command is similar to `add LIST`."""
  3499. if not arg:
  3500. print("LIST argument is required as the target for your move")
  3501. elif arg[0] == "archives":
  3502. self.do_archive()
  3503. else:
  3504. args = arg.split()
  3505. list_path = self.list_path(args[0])
  3506. if not list_path:
  3507. print("%s is not a list, aborting the move" %args[0])
  3508. else:
  3509. lists = self.list_lists()
  3510. for l in lists:
  3511. if l != args[0] and l not in ["archives", "history"]:
  3512. isremoved = self.list_rm_url(self.gi.url_mode(),l)
  3513. if isremoved:
  3514. print("Removed from %s"%l)
  3515. self.list_add_line(args[0])
  3516. def list_lists(self):
  3517. listdir = os.path.join(_DATA_DIR,"lists")
  3518. to_return = []
  3519. if os.path.exists(listdir):
  3520. lists = os.listdir(listdir)
  3521. if len(lists) > 0:
  3522. for l in lists:
  3523. #removing the .gmi at the end of the name
  3524. to_return.append(l[:-4])
  3525. return to_return
  3526. def list_has_status(self,list,status):
  3527. path = self.list_path(list)
  3528. toreturn = False
  3529. if path:
  3530. with open(path) as f:
  3531. line = f.readline().strip()
  3532. f.close()
  3533. if line.startswith("#") and status in line:
  3534. toreturn = True
  3535. return toreturn
  3536. def list_is_subscribed(self,list):
  3537. return self.list_has_status(list,"#subscribed")
  3538. def list_is_frozen(self,list):
  3539. return self.list_has_status(list,"#frozen")
  3540. def list_is_system(self,list):
  3541. return list in ["history","to_fetch","archives","tour"]
  3542. # This modify the status of a list to one of :
  3543. # normal, frozen, subscribed
  3544. # action is either #frozen, #subscribed or None
  3545. def list_modify(self,list,action=None):
  3546. path = self.list_path(list)
  3547. with open(path) as f:
  3548. lines = f.readlines()
  3549. f.close()
  3550. if lines[0].strip().startswith("#"):
  3551. first_line = lines.pop(0).strip("\n")
  3552. else:
  3553. first_line = "# %s "%list
  3554. first_line = first_line.replace("#subscribed","").replace("#frozen","")
  3555. if action:
  3556. first_line += " " + action
  3557. print("List %s has been marked as %s"%(list,action))
  3558. else:
  3559. print("List %s is now a normal list" %list)
  3560. first_line += "\n"
  3561. lines.insert(0,first_line)
  3562. with open(path,"w") as f:
  3563. for line in lines:
  3564. f.write(line)
  3565. f.close()
  3566. def do_list(self,arg):
  3567. """Manage list of bookmarked pages.
  3568. - list : display available lists
  3569. - list $LIST : display pages in $LIST
  3570. - list create $NEWLIST : create a new list
  3571. - list edit $LIST : edit the list
  3572. - list subscribe $LIST : during sync, add new links found in listed pages to tour
  3573. - list freeze $LIST : don’t update pages in list during sync if a cache already exists
  3574. - list normal $LIST : update pages in list during sync but don’t add anything to tour
  3575. - list delete $LIST : delete a list permanently (a confirmation is required)
  3576. - list help : print this help
  3577. See also :
  3578. - add $LIST (to add current page to $LIST or, by default, to bookmarks)
  3579. - move $LIST (to add current page to list while removing from all others)
  3580. - archive (to remove current page from all lists while adding to archives)
  3581. Note: There’s no "delete" on purpose. The use of "archive" is recommended."""
  3582. listdir = os.path.join(_DATA_DIR,"lists")
  3583. os.makedirs(listdir,exist_ok=True)
  3584. if not arg:
  3585. lists = self.list_lists()
  3586. if len(lists) > 0:
  3587. lgi = GeminiItem("list:///")
  3588. self._go_to_gi(lgi)
  3589. else:
  3590. print("No lists yet. Use `list create`")
  3591. else:
  3592. args = arg.split()
  3593. if args[0] == "create":
  3594. if len(args) > 2:
  3595. name = " ".join(args[2:])
  3596. self.list_create(args[1].lower(),title=name)
  3597. elif len(args) == 2:
  3598. self.list_create(args[1].lower())
  3599. else:
  3600. print("A name is required to create a new list. Use `list create NAME`")
  3601. elif args[0] == "edit":
  3602. editor = None
  3603. if "editor" in self.options and self.options["editor"]:
  3604. editor = self.options["editor"]
  3605. elif os.environ.get("VISUAL"):
  3606. editor = os.environ.get("VISUAL")
  3607. elif os.environ.get("EDITOR"):
  3608. editor = os.environ.get("EDITOR")
  3609. if editor:
  3610. if len(args) > 1 and args[1] in self.list_lists():
  3611. path = os.path.join(listdir,args[1]+".gmi")
  3612. try:
  3613. run("%s \"%s\""%(editor,path),direct_output=True)
  3614. except Exception as err:
  3615. print(err)
  3616. print("Please set a valid editor with \"set editor\"")
  3617. else:
  3618. print("A valid list name is required to edit a list")
  3619. else:
  3620. print("No valid editor has been found.")
  3621. print("You can use the following command to set your favourite editor:")
  3622. print("set editor EDITOR")
  3623. print("or use the $VISUAL or $EDITOR environment variables.")
  3624. elif args[0] == "delete":
  3625. if len(args) > 1:
  3626. if self.list_is_system(args[1]):
  3627. print("%s is a system list which cannot be deleted"%args[1])
  3628. elif args[1] in self.list_lists():
  3629. size = len(self.list_get_links(args[1]))
  3630. stri = "Are you sure you want to delete %s ?\n"%args[1]
  3631. confirm = "YES"
  3632. if size > 0:
  3633. stri += "! %s items in the list will be lost !\n"%size
  3634. confirm = "YES DELETE %s" %size
  3635. else :
  3636. stri += "The list is empty, it should be safe to delete it.\n"
  3637. stri += "Type \"%s\" (in capital, without quotes) to confirm :"%confirm
  3638. answer = input(stri)
  3639. if answer == confirm:
  3640. path = os.path.join(listdir,args[1]+".gmi")
  3641. os.remove(path)
  3642. print("* * * %s has been deleted" %args[1])
  3643. else:
  3644. print("A valid list name is required to be deleted")
  3645. else:
  3646. print("A valid list name is required to be deleted")
  3647. elif args[0] in ["subscribe","freeze","normal"]:
  3648. if len(args) > 1:
  3649. if self.list_is_system(args[1]):
  3650. print("You cannot modify %s which is a system list"%args[1])
  3651. elif args[1] in self.list_lists():
  3652. if args[0] == "subscribe":
  3653. action = "#subscribed"
  3654. elif args[0] == "freeze":
  3655. action = "#frozen"
  3656. else:
  3657. action = None
  3658. self.list_modify(args[1],action=action)
  3659. else:
  3660. print("A valid list name is required after %s" %args[0])
  3661. elif args[0] == "help":
  3662. self.onecmd("help list")
  3663. elif len(args) == 1:
  3664. self.list_show(args[0].lower())
  3665. else:
  3666. self.list_go_to_line(args[1],args[0].lower())
  3667. def completedefault(self,index,line,begidx,endidx):
  3668. print("completeing %s + %s" %index,line)
  3669. return ["bépo","auc"]
  3670. def do_help(self, arg):
  3671. """ALARM! Recursion detected! ALARM! Prepare to eject!"""
  3672. if arg == "!":
  3673. print("! is an alias for 'shell'")
  3674. elif arg == "?":
  3675. print("? is an alias for 'help'")
  3676. elif arg in _ABBREVS:
  3677. full_cmd = _ABBREVS[arg]
  3678. print("%s is an alias for '%s'" %(arg,full_cmd))
  3679. print("See the list of aliases with 'abbrevs'")
  3680. print("'help %s':"%full_cmd)
  3681. cmd.Cmd.do_help(self, full_cmd)
  3682. else:
  3683. cmd.Cmd.do_help(self, arg)
  3684. ### Flight recorder
  3685. def do_blackbox(self, *args):
  3686. """Display contents of flight recorder, showing statistics for the
  3687. current gemini browsing session."""
  3688. lines = []
  3689. # Compute flight time
  3690. now = time.time()
  3691. delta = now - self.log["start_time"]
  3692. hours, remainder = divmod(delta, 3600)
  3693. minutes, seconds = divmod(remainder, 60)
  3694. # Count hosts
  3695. ipv4_hosts = len([host for host in self.visited_hosts if host[0] == socket.AF_INET])
  3696. ipv6_hosts = len([host for host in self.visited_hosts if host[0] == socket.AF_INET6])
  3697. # Assemble lines
  3698. lines.append(("Patrol duration", "%02d:%02d:%02d" % (hours, minutes, seconds)))
  3699. lines.append(("Requests sent:", self.log["requests"]))
  3700. lines.append((" IPv4 requests:", self.log["ipv4_requests"]))
  3701. lines.append((" IPv6 requests:", self.log["ipv6_requests"]))
  3702. lines.append(("Bytes received:", self.log["bytes_recvd"]))
  3703. lines.append((" IPv4 bytes:", self.log["ipv4_bytes_recvd"]))
  3704. lines.append((" IPv6 bytes:", self.log["ipv6_bytes_recvd"]))
  3705. lines.append(("Unique hosts visited:", len(self.visited_hosts)))
  3706. lines.append((" IPv4 hosts:", ipv4_hosts))
  3707. lines.append((" IPv6 hosts:", ipv6_hosts))
  3708. lines.append(("DNS failures:", self.log["dns_failures"]))
  3709. lines.append(("Timeouts:", self.log["timeouts"]))
  3710. lines.append(("Refused connections:", self.log["refused_connections"]))
  3711. lines.append(("Reset connections:", self.log["reset_connections"]))
  3712. lines.append(("Cache hits:", self.log["cache_hits"]))
  3713. # Print
  3714. for key, value in lines:
  3715. print(key.ljust(24) + str(value).rjust(8))
  3716. def do_sync(self, line):
  3717. """Synchronize all bookmarks lists.
  3718. - New elements in pages in subscribed lists will be added to tour
  3719. - Elements in list to_fetch will be retrieved and added to tour
  3720. - Normal lists will be synchronized and updated
  3721. - Frozen lists will be fetched only if not present.
  3722. Argument : duration of cache validity (in seconds)."""
  3723. if self.offline_only:
  3724. print("Sync can only be achieved online. Change status with `online`.")
  3725. return
  3726. args = line.split()
  3727. if len(args) > 0:
  3728. if not args[0].isdigit():
  3729. print("sync argument should be the cache validity expressed in seconds")
  3730. return
  3731. else:
  3732. validity = int(args[0])
  3733. else:
  3734. validity = 0
  3735. self.call_sync(refresh_time=validity)
  3736. def call_sync(self,refresh_time=0,depth=1):
  3737. # fetch_gitem is the core of the sync algorithm.
  3738. # It takes as input :
  3739. # - a GeminiItem to be fetched
  3740. # - depth : the degree of recursion to build the cache (0 means no recursion)
  3741. # - validity : the age, in seconds, existing caches need to have before
  3742. # being refreshed (0 = never refreshed if it already exists)
  3743. # - savetotour : if True, newly cached items are added to tour
  3744. def add_to_tour(gitem):
  3745. if gitem.is_cache_valid():
  3746. toprint = " -> adding to tour: %s" %gitem.url
  3747. width = term_width() - 1
  3748. toprint = toprint[:width]
  3749. toprint += " "*(width-len(toprint))
  3750. print(toprint)
  3751. self.list_add_line("tour",gi=gitem,verbose=False)
  3752. return True
  3753. else:
  3754. return False
  3755. def fetch_gitem(gitem,depth=0,validity=0,savetotour=False,count=[0,0],strin=""):
  3756. #savetotour = True will save to tour newly cached content
  3757. # else, do not save to tour
  3758. #regardless of valitidy
  3759. if not gitem.is_cache_valid(validity=validity):
  3760. if strin != "":
  3761. endline = '\r'
  3762. else:
  3763. endline = None
  3764. #Did we already had a cache (even an old one) ?
  3765. isnew = not gitem.is_cache_valid()
  3766. toprint = "%s [%s/%s] Fetch "%(strin,count[0],count[1]) + gitem.url
  3767. width = term_width() - 1
  3768. toprint = toprint[:width]
  3769. toprint += " "*(width-len(toprint))
  3770. print(toprint,end=endline)
  3771. #If not saving to tour, then we should limit download size
  3772. limit = not savetotour
  3773. self._go_to_gi(gitem,update_hist=False,limit_size=limit)
  3774. if savetotour and isnew and gitem.is_cache_valid():
  3775. #we add to the next tour only if we managed to cache
  3776. #the ressource
  3777. add_to_tour(gitem)
  3778. #Now, recursive call, even if we didn’t refresh the cache
  3779. if depth > 0:
  3780. #we should only savetotour at the first level of recursion
  3781. # The code for this was removed so, currently, we savetotour
  3782. # at every level of recursion.
  3783. links = gitem.get_links()
  3784. subcount = [0,len(links)]
  3785. d = depth - 1
  3786. for k in links:
  3787. #recursive call (validity is always 0 in recursion)
  3788. substri = strin + " -->"
  3789. subcount[0] += 1
  3790. fetch_gitem(k,depth=d,validity=0,savetotour=savetotour,\
  3791. count=subcount,strin=substri)
  3792. def fetch_list(list,validity=0,depth=1,tourandremove=False,tourchildren=False):
  3793. links = self.list_get_links(list)
  3794. end = len(links)
  3795. counter = 0
  3796. print(" * * * %s to fetch in %s * * *" %(end,list))
  3797. for l in links:
  3798. counter += 1
  3799. fetch_gitem(l,depth=depth,validity=validity,savetotour=tourchildren,count=[counter,end])
  3800. if tourandremove:
  3801. if add_to_tour(l):
  3802. self.list_rm_url(l.url_mode(),list)
  3803. self.sync_only = True
  3804. lists = self.list_lists()
  3805. # We will fetch all the lists except "archives" and "history"
  3806. # We keep tour for the last round
  3807. subscriptions = []
  3808. normal_lists = []
  3809. fridge = []
  3810. for l in lists:
  3811. if not self.list_is_system(l):
  3812. if self.list_is_frozen(l):
  3813. fridge.append(l)
  3814. elif self.list_is_subscribed(l):
  3815. subscriptions.append(l)
  3816. else:
  3817. normal_lists.append(l)
  3818. # We start with the "subscribed" as we need to find new items
  3819. starttime = int(time.time())
  3820. for l in subscriptions:
  3821. fetch_list(l,validity=refresh_time,depth=depth,tourchildren=True)
  3822. #Then the fetch list (item are removed from the list after fetch)
  3823. # We fetch regarless of the refresh_time
  3824. if "to_fetch" in lists:
  3825. nowtime = int(time.time())
  3826. short_valid = nowtime - starttime
  3827. fetch_list("to_fetch",validity=short_valid,depth=depth,tourandremove=True)
  3828. #then we fetch all the rest (including bookmarks and tour)
  3829. for l in normal_lists:
  3830. fetch_list(l,validity=refresh_time,depth=depth)
  3831. for l in fridge:
  3832. fetch_list(l,validity=0,depth=depth)
  3833. #tour should be the last one as item my be added to it by others
  3834. fetch_list("tour",validity=refresh_time,depth=depth)
  3835. print("End of sync")
  3836. self.sync_only = False
  3837. ### The end!
  3838. def do_quit(self, *args):
  3839. """Exit Offpunk."""
  3840. def unlink(filename):
  3841. if filename and os.path.exists(filename):
  3842. os.unlink(filename)
  3843. # Close TOFU DB
  3844. self.db_conn.commit()
  3845. self.db_conn.close()
  3846. # Clean up after ourself
  3847. for cert in self.transient_certs_created:
  3848. for ext in (".crt", ".key"):
  3849. certfile = os.path.join(_CONFIG_DIR, "transient_certs", cert+ext)
  3850. if os.path.exists(certfile):
  3851. os.remove(certfile)
  3852. print("You can close your screen!")
  3853. sys.exit()
  3854. do_exit = do_quit
  3855. # Main function
  3856. def main():
  3857. # Parse args
  3858. parser = argparse.ArgumentParser(description='A command line gemini client.')
  3859. parser.add_argument('--bookmarks', action='store_true',
  3860. help='start with your list of bookmarks')
  3861. parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file')
  3862. parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file')
  3863. parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands')
  3864. parser.add_argument('--sync', action='store_true',
  3865. help='run non-interactively to build cache by exploring bookmarks')
  3866. parser.add_argument('--assume-yes', action='store_true',
  3867. help='assume-yes when asked questions about certificates/redirections during sync')
  3868. parser.add_argument('--disable-http',action='store_true',
  3869. help='do not try to get http(s) links (but already cached will be displayed)')
  3870. parser.add_argument('--fetch-later', action='store_true',
  3871. help='run non-interactively with an URL as argument to fetch it later')
  3872. parser.add_argument('--depth',
  3873. help='depth of the cache to build. Default is 1. More is crazy. Use at your own risks!')
  3874. parser.add_argument('--cache-validity',
  3875. help='duration for which a cache is valid before sync (seconds)')
  3876. parser.add_argument('--version', action='store_true',
  3877. help='display version information and quit')
  3878. parser.add_argument('--features', action='store_true',
  3879. help='display available features and dependancies then quit')
  3880. parser.add_argument('url', metavar='URL', nargs='*',
  3881. help='start with this URL')
  3882. args = parser.parse_args()
  3883. # Handle --version
  3884. if args.version:
  3885. print("Offpunk " + _VERSION)
  3886. sys.exit()
  3887. elif args.features:
  3888. GeminiClient.do_version(None,None)
  3889. sys.exit()
  3890. else:
  3891. for f in [_CONFIG_DIR, _CACHE_PATH, _DATA_DIR]:
  3892. if not os.path.exists(f):
  3893. print("Creating config directory {}".format(f))
  3894. os.makedirs(f)
  3895. # Instantiate client
  3896. gc = GeminiClient(restricted=args.restricted,synconly=args.sync)
  3897. torun_queue = []
  3898. # Act on args
  3899. if args.tls_cert:
  3900. # If tls_key is None, python will attempt to load the key from tls_cert.
  3901. gc._activate_client_cert(args.tls_cert, args.tls_key)
  3902. if args.bookmarks:
  3903. torun_queue.append("bookmarks")
  3904. elif args.url:
  3905. if len(args.url) == 1:
  3906. torun_queue.append("go %s" % args.url[0])
  3907. else:
  3908. for url in args.url:
  3909. torun_queue.append("tour %s" % url)
  3910. torun_queue.append("tour")
  3911. if args.disable_http:
  3912. gc.support_http = False
  3913. # Endless interpret loop (except while --sync or --fetch-later)
  3914. if args.fetch_later:
  3915. if args.url:
  3916. gc.sync_only = True
  3917. for u in args.url:
  3918. gi = GeminiItem(u)
  3919. if gi and gi.is_cache_valid():
  3920. gc.list_add_line("tour",gi)
  3921. else:
  3922. gc.list_add_line("to_fetch",gi)
  3923. else:
  3924. print("--fetch-later requires an URL (or a list of URLS) as argument")
  3925. elif args.sync:
  3926. if args.assume_yes:
  3927. gc.automatic_choice = "y"
  3928. if args.cache_validity:
  3929. refresh_time = int(args.cache_validity)
  3930. else:
  3931. # if no refresh time, a default of 0 is used (which means "infinite")
  3932. refresh_time = 0
  3933. if args.depth:
  3934. depth = int(args.depth)
  3935. else:
  3936. depth = 1
  3937. gc.call_sync(refresh_time=refresh_time,depth=depth)
  3938. gc.onecmd("blackbox")
  3939. else:
  3940. # We are in the normal mode. First process config file
  3941. rcfile = os.path.join(_CONFIG_DIR, "offpunkrc")
  3942. if os.path.exists(rcfile):
  3943. print("Using config %s" % rcfile)
  3944. with open(rcfile, "r") as fp:
  3945. for line in fp:
  3946. line = line.strip()
  3947. if ((args.bookmarks or args.url) and
  3948. any((line.startswith(x) for x in ("go", "g", "tour", "t")))
  3949. ):
  3950. if args.bookmarks:
  3951. print("Skipping rc command \"%s\" due to --bookmarks option." % line)
  3952. else:
  3953. print("Skipping rc command \"%s\" due to provided URLs." % line)
  3954. continue
  3955. torun_queue.append(line)
  3956. print("Welcome to Offpunk!")
  3957. if args.restricted:
  3958. print("Restricted mode engaged!")
  3959. print("Type `help` to get the list of available command.")
  3960. for line in torun_queue:
  3961. gc.onecmd(line)
  3962. while True:
  3963. try:
  3964. gc.cmdloop()
  3965. except KeyboardInterrupt:
  3966. print("")
  3967. if __name__ == '__main__':
  3968. main()