offpunk.py 139 KB


  1. #!/usr/bin/env python3
  2. # Offpunk Offline Gemini client
  3. # Derived from AV-98 by Solderpunk,
  4. # (C) 2021, 2022 Ploum <offpunk@ploum.eu>
  5. # (C) 2019, 2020 Solderpunk <solderpunk@sdf.org>
  6. # With contributions from:
  7. # - danceka <hannu.hartikainen@gmail.com>
  8. # - <jprjr@tilde.club>
  9. # - <vee@vnsf.xyz>
  10. # - Klaus Alexander Seistrup <klaus@seistrup.dk>
  11. # - govynnus <govynnus@sdf.org>
  12. # - Björn Wärmedal <bjorn.warmedal@gmail.com>
  13. # - <jake@rmgr.dev>
  14. _VERSION = "0.3"
  15. import argparse
  16. import cmd
  17. #import cmd2 as cmd
  18. import cgi
  19. import codecs
  20. import collections
  21. import datetime
  22. import fnmatch
  23. import getpass
  24. import glob
  25. import hashlib
  26. import io
  27. import mimetypes
  28. import os
  29. import os.path
  30. import filecmp
  31. import random
  32. import shlex
  33. import shutil
  34. import socket
  35. import sqlite3
  36. import ssl
  37. from ssl import CertificateError
  38. import subprocess
  39. import sys
  40. import tempfile
  41. import time
  42. import urllib.parse
  43. import uuid
  44. import webbrowser
  45. try:
  46. import setproctitle
  47. setproctitle.setproctitle("offpunk")
  48. _HAS_SETPROCTITLE = True
  49. except ModuleNotFoundError:
  50. _HAS_SETPROCTITLE = False
  51. try:
  52. import editor
  53. _HAS_EDITOR = True
  54. except ModuleNotFoundError:
  55. _HAS_EDITOR = False
  56. try:
  57. import ansiwrap as textwrap
  58. _HAS_ANSIWRAP = True
  59. except ModuleNotFoundError:
  60. print("Try installing python-ansiwrap for better rendering")
  61. import textwrap
  62. _HAS_ANSIWRAP = False
  63. _HAS_CHAFA = shutil.which('chafa')
  64. _HAS_XSEL = shutil.which('xsel')
  65. _HAS_XDGOPEN = shutil.which('xdg-open')
  66. try:
  67. from PIL import Image
  68. _HAS_PIL = True
  69. if _HAS_ANSIWRAP and _HAS_CHAFA:
  70. _RENDER_IMAGE = True
  71. else:
  72. print("chafa and ansiwrap are required to render images in terminal")
  73. _RENDER_IMAGE = False
  74. except ModuleNotFoundError:
  75. print("python-pil, chafa and ansiwrap are required to render images")
  76. _RENDER_IMAGE = False
  77. _HAS_PIL = False
  78. try:
  79. from cryptography import x509
  80. from cryptography.hazmat.backends import default_backend
  81. _HAS_CRYPTOGRAPHY = True
  82. _BACKEND = default_backend()
  83. except ModuleNotFoundError:
  84. _HAS_CRYPTOGRAPHY = False
  85. try:
  86. import magic
  87. _HAS_MAGIC = True
  88. except ModuleNotFoundError:
  89. print("Python-magic is recommended for better detection of mimetypes")
  90. _HAS_MAGIC = False
  91. try:
  92. import requests
  93. _DO_HTTP = True
  94. except ModuleNotFoundError:
  95. _DO_HTTP = False
  96. try:
  97. from readability import Document
  98. _HAS_READABILITY = True
  99. except ModuleNotFoundError:
  100. _HAS_READABILITY = False
  101. try:
  102. from bs4 import BeautifulSoup
  103. _HAS_SOUP = True
  104. except ModuleNotFoundError:
  105. _HAS_SOUP = False
  106. _DO_HTML = _HAS_SOUP and _HAS_READABILITY
  107. try:
  108. import feedparser
  109. _DO_FEED = True
  110. except ModuleNotFoundError:
  111. _DO_FEED = False
  112. ## Config directories
  113. # There are two conflicting xdg modules, we try to work with both
  114. try:
  115. from xdg import BaseDirectory
  116. _HAS_XDG = True
  117. _CACHE_PATH = BaseDirectory.save_cache_path("offpunk/")
  118. _CONFIG_DIR = BaseDirectory.save_config_path("offpunk/")
  119. _DATA_DIR = BaseDirectory.save_data_path("offpunk/")
  120. except ModuleNotFoundError:
  121. _HAS_XDG = False
  122. _CACHE_PATH = os.path.expanduser("~/.cache/offpunk/")
  123. _CONFIG_DIR = None
  124. ## Look for pre-existing config directory, if any
  125. for confdir in ("~/.offpunk/", "~/.config/offpunk/"):
  126. confdir = os.path.expanduser(confdir)
  127. if os.path.exists(confdir):
  128. _CONFIG_DIR = confdir
  129. break
  130. ## Otherwise, make one in .config if it exists
  131. if not _CONFIG_DIR and os.path.exists("~/.config/"):
  132. _CONFIG_DIR = os.path.expanduser("~/.config/offpunk/")
  133. elif not _CONFIG_DIR:
  134. _CONFIG_DIR = os.path.expanduser("~/.offpunk/")
  135. _DATA_DIR = _CONFIG_DIR
  136. for f in [_CONFIG_DIR, _CACHE_PATH, _DATA_DIR]:
  137. if not os.path.exists(f):
  138. print("Creating config directory {}".format(f))
  139. os.makedirs(f)
  140. _MAX_REDIRECTS = 5
  141. _MAX_CACHE_SIZE = 10
  142. _MAX_CACHE_AGE_SECS = 180
  143. less_version = 0
  144. return_code = subprocess.run("less --version",shell=True, capture_output=True)
  145. output = return_code.stdout.decode()
  146. # We get less Version (which is the only integer on the first line)
  147. words = output.split("\n")[0].split()
  148. for w in words:
  149. if w.isdigit():
  150. less_version = int(w)
  151. # restoring position only works for version of less > 572
  152. if less_version >= 572:
  153. _LESS_RESTORE_POSITION = True
  154. else:
  155. _LESS_RESTORE_POSITION = False
  156. #_DEFAULT_LESS = "less -EXFRfM -PMurl\ lines\ \%lt-\%lb/\%L\ \%Pb\%$ %s"
  157. # -E : quit when reaching end of file (to behave like "cat")
  158. # -F : quit if content fits the screen (behave like "cat")
  159. # -X : does not clear the screen
  160. # -R : interpret ANSI colors correctly
  161. # -f : suppress warning for some contents
  162. # -M : long prompt (to have info about where you are in the file)
  163. # -w : hilite the new first line after a page skip (space)
  164. # -i : ignore case in search
  165. #--incsearch : incremental search starting rev581
  166. if less_version >= 581:
  167. less_base = "less --incsearch --save-marks -XRfMwi"
  168. else:
  169. less_base = "less --save-marks -XRfMwi"
  170. _DEFAULT_LESS = less_base + " \"+''\" %s"
  171. _DEFAULT_CAT = less_base + " -EF %s"
  172. def less_cmd(file, histfile=None,cat=False):
  173. if histfile:
  174. prefix = "LESSHISTFILE=%s "%histfile
  175. else:
  176. prefix = ""
  177. if cat:
  178. cmd_str = prefix + _DEFAULT_CAT % file
  179. else:
  180. cmd_str = prefix + _DEFAULT_LESS % file
  181. subprocess.call(cmd_str,shell=True)
  182. # Command abbreviations
  183. _ABBREVS = {
  184. "a": "add",
  185. "b": "back",
  186. "bb": "blackbox",
  187. "bm": "bookmarks",
  188. "book": "bookmarks",
  189. "cp": "copy",
  190. "f": "fold",
  191. "fo": "forward",
  192. "g": "go",
  193. "h": "history",
  194. "hist": "history",
  195. "l": "less",
  196. "mv": "move",
  197. "n": "next",
  198. "off": "offline",
  199. "on": "online",
  200. "p": "previous",
  201. "prev": "previous",
  202. "q": "quit",
  203. "r": "reload",
  204. "s": "save",
  205. "se": "search",
  206. "/": "search",
  207. "t": "tour",
  208. "u": "up",
  209. }
  210. _MIME_HANDLERS = {
  211. "application/pdf": "zathura %s",
  212. "audio/mpeg": "mpg123 %s",
  213. "audio/ogg": "ogg123 %s",
  214. "image/*": "feh -. %s",
  215. #"text/html": "lynx -dump -force_html %s",
  216. }
  217. # monkey-patch Gemini support in urllib.parse
  218. # see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py
  219. urllib.parse.uses_relative.append("gemini")
  220. urllib.parse.uses_netloc.append("gemini")
  221. global TERM_WIDTH
  222. TERM_WIDTH = 80
  223. def term_width():
  224. width = TERM_WIDTH
  225. cur = shutil.get_terminal_size()[0] - 1
  226. if cur < width:
  227. width = cur
  228. return width
  229. def fix_ipv6_url(url):
  230. if not url:
  231. return
  232. if not url.count(":") > 2: # Best way to detect them?
  233. return url
  234. if url.startswith("mailto"):
  235. return url
  236. # If there's a pair of []s in there, it's probably fine as is.
  237. if "[" in url and "]" in url:
  238. return url
  239. # Easiest case is a raw address, no schema, no path.
  240. # Just wrap it in square brackets and whack a slash on the end
  241. if "/" not in url:
  242. return "[" + url + "]/"
  243. # Now the trickier cases...
  244. if "://" in url:
  245. schema, schemaless = url.split("://",maxsplit=1)
  246. else:
  247. schema, schemaless = None, url
  248. if "/" in schemaless:
  249. netloc, rest = schemaless.split("/",1)
  250. schemaless = "[" + netloc + "]" + "/" + rest
  251. if schema:
  252. return schema + "://" + schemaless
  253. return schemaless
  254. # This list is also used as a list of supported protocols
  255. standard_ports = {
  256. "gemini": 1965,
  257. "gopher": 70,
  258. "http" : 80,
  259. "https" : 443,
  260. }
  261. # First, we define the different content->text renderers, outside of the rest
  262. # (They could later be factorized in other files or replaced)
  263. class AbstractRenderer():
  264. def __init__(self,content,url):
  265. self.url = url
  266. self.body = content
  267. self.rendered_text = None
  268. self.links = None
  269. self.title = None
  270. self.validity = True
  271. def get_subscribe_links(self):
  272. return [[self.url,self.get_mime(),self.get_title()]]
  273. def is_valid(self):
  274. return self.validity
  275. def get_links(self):
  276. if self.links == None :
  277. results = self.render(self.body,mode="links_only")
  278. if results:
  279. self.links = results[1]
  280. return self.links
  281. def get_title(self):
  282. return "Abstract title"
  283. #This function will give gemtext to the gemtext renderer
  284. def prepare(self,body,mode=None):
  285. return body
  286. def get_body(self,readable=True,width=None):
  287. if not width:
  288. width = term_width()
  289. if self.rendered_text == None or not readable:
  290. if readable :
  291. mode = "readable"
  292. else :
  293. mode = "full"
  294. prepared_body = self.prepare(self.body,mode=mode)
  295. result = self.render(prepared_body,width=width,mode=mode)
  296. if result:
  297. self.rendered_text = result[0]
  298. self.links = result[1]
  299. return self.rendered_text
  300. # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
  301. # 3 modes are used : readable (by default), full and links_only (the fastest, when
  302. # rendered content is not used, only the links are needed)
  303. # The prepare() function is called before the rendering. It is useful if
  304. # your renderer output in a format suitable for another existing renderer (such as gemtext)
  305. # Gemtext Rendering Engine
  306. class GemtextRenderer(AbstractRenderer):
  307. def get_mime(self):
  308. return "text/gemini"
  309. def get_title(self):
  310. if self.title:
  311. return self.title
  312. else:
  313. lines = self.body.splitlines()
  314. for line in lines:
  315. if line.startswith("#"):
  316. self.title = line.strip("#").strip()
  317. return self.title
  318. if len(lines) > 0:
  319. # If not title found, we take the first 50 char
  320. # of the first line
  321. title_line = lines[0].strip()
  322. if len(title_line) > 50:
  323. title_line = title_line[:49] + "…"
  324. self.title = title_line
  325. return self.title
  326. else:
  327. self.title = "Empty Page"
  328. return self.title
  329. #render_gemtext
  330. def render(self,gemtext, width=None,mode=None):
  331. if not width:
  332. width = term_width()
  333. links = []
  334. preformatted = False
  335. rendered_text = ""
  336. #This local method takes a line and apply the ansi code given as "color"
  337. #The whole line is then wrapped and ansi code are ended.
  338. def wrap_line(line,color=None,i_indent="",s_indent=""):
  339. wrapped = textwrap.wrap(line,width,initial_indent=i_indent,\
  340. subsequent_indent=s_indent)
  341. final = ""
  342. for l in wrapped:
  343. if color:
  344. l = color + l + "\x1b[0m"
  345. if l.strip() != "":
  346. final += l + "\n"
  347. return final
  348. def format_link(url,index,name=None):
  349. if "://" in url:
  350. protocol,adress = url.split("://",maxsplit=1)
  351. protocol = " %s" %protocol
  352. else:
  353. adress = url
  354. protocol = ""
  355. if "gemini" in protocol:
  356. protocol = ""
  357. if not name:
  358. name = adress
  359. line = "[%d%s] %s" % (index, protocol, name)
  360. return line
  361. for line in gemtext.splitlines():
  362. if line.startswith("```"):
  363. preformatted = not preformatted
  364. elif preformatted:
  365. rendered_text += line + "\n"
  366. elif line.startswith("=>"):
  367. strippedline = line[2:].strip()
  368. if strippedline:
  369. links.append(strippedline)
  370. splitted = strippedline.split(maxsplit=1)
  371. url = splitted[0]
  372. name = None
  373. if len(splitted) > 1:
  374. name = splitted[1]
  375. link = format_link(url,len(links),name=name)
  376. startpos = link.find("] ") + 2
  377. wrapped = wrap_line(link,s_indent=startpos*" ")
  378. rendered_text += wrapped
  379. elif line.startswith("* "):
  380. line = line[1:].lstrip("\t ")
  381. rendered_text += textwrap.fill(line, width, initial_indent = "• ",
  382. subsequent_indent=" ") + "\n"
  383. elif line.startswith(">"):
  384. line = line[1:].lstrip("\t ")
  385. rendered_text += textwrap.fill(line,width, initial_indent = "> ",
  386. subsequent_indent="> ") + "\n"
  387. elif line.startswith("###"):
  388. line = line[3:].lstrip("\t ")
  389. rendered_text += wrap_line(line, color="\x1b[34m\x1b[2m")
  390. elif line.startswith("##"):
  391. line = line[2:].lstrip("\t ")
  392. rendered_text += wrap_line(line, color="\x1b[34m")
  393. elif line.startswith("#"):
  394. line = line[1:].lstrip("\t ")
  395. if not self.title:
  396. self.title = line
  397. rendered_text += wrap_line(line,color="\x1b[1;34m\x1b[4m")
  398. else:
  399. rendered_text += wrap_line(line).rstrip() + "\n"
  400. return rendered_text, links
  401. class GopherRenderer(AbstractRenderer):
  402. def get_mime(self):
  403. return "text/gopher"
  404. def get_title(self):
  405. return "Gopher - No Title"
  406. #menu_or_text
  407. def render(self,body,width=None,mode=None):
  408. if not width:
  409. width = term_width()
  410. try:
  411. render,links = self._render_goph(width=width,mode=mode)
  412. except Exception as err:
  413. print("Error ",err)
  414. lines = body.split("\n")
  415. render = ""
  416. for line in lines:
  417. render += textwrap.fill(line,width) + "\n"
  418. links = []
  419. return render,links
  420. def _render_goph(self,width=None,mode=None):
  421. if not width:
  422. width = term_width()
  423. # This is copied straight from Agena (and thus from VF1)
  424. rendered_text = ""
  425. links = []
  426. for line in self.body.split("\n"):
  427. #if line.strip() == ".":
  428. # continue
  429. if line.startswith("i"):
  430. towrap = line[1:].split("\t")[0] + "\r\n"
  431. rendered_text += textwrap.fill(towrap,width) + "\n"
  432. elif not line.strip() in [".",""]:
  433. parts = line.split("\t")
  434. parts[-1] = parts[-1].strip()
  435. if parts[-1] == "+":
  436. parts = parts[:-1]
  437. if len(parts) == 4:
  438. name,path,host,port = parts
  439. itemtype = name[0]
  440. name = name[1:]
  441. if port == "70":
  442. port = ""
  443. else:
  444. port = ":%s"%port
  445. if itemtype == "h" and path.startswith("URL:"):
  446. url = path[4:]
  447. else:
  448. if not path.startswith("/"):
  449. path = "/"+path
  450. url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
  451. linkline = url + " " + name
  452. links.append(linkline)
  453. towrap = "[%s] "%len(links)+ name + "\n"
  454. rendered_text += textwrap.fill(towrap,width) + "\n"
  455. else:
  456. towrap = line +"\n"
  457. rendered_text += textwrap.fill(towrap,width) + "\n"
  458. return rendered_text,links
  459. class FolderRenderer(GemtextRenderer):
  460. def get_mime(self):
  461. return "Directory"
  462. def prepare(self,body,mode=None):
  463. def get_first_line(l):
  464. path = os.path.join(listdir,l+".gmi")
  465. with open(path) as f:
  466. first_line = f.readline().strip()
  467. f.close()
  468. if first_line.startswith("#"):
  469. return first_line
  470. else:
  471. return None
  472. def write_list(l):
  473. body = ""
  474. for li in l:
  475. path = os.path.join(listdir,li+".gmi")
  476. gi = GeminiItem("file://" + path)
  477. size = len(gi.get_links())
  478. body += "=> %s %s (%s items)\n" %(str(path),li,size)
  479. return body
  480. listdir = os.path.join(_DATA_DIR,"lists")
  481. if self.url != listdir:
  482. return "This is folder %s" %self.url
  483. else:
  484. self.title = "My lists"
  485. lists = []
  486. if os.path.exists(listdir):
  487. listfiles = os.listdir(listdir)
  488. if len(listfiles) > 0:
  489. for l in listfiles:
  490. #removing the .gmi at the end of the name
  491. lists.append(l[:-4])
  492. if len(lists) > 0:
  493. body = ""
  494. my_lists = []
  495. system_lists = []
  496. subscriptions = []
  497. frozen = []
  498. lists.sort()
  499. for l in lists:
  500. if l in ["history","to_fetch","archives","tour"]:
  501. system_lists.append(l)
  502. else:
  503. first_line = get_first_line(l)
  504. if first_line and "#subscribed" in first_line:
  505. subscriptions.append(l)
  506. elif first_line and "#frozen" in first_line:
  507. frozen.append(l)
  508. else:
  509. my_lists.append(l)
  510. if len(my_lists) > 0:
  511. body+= "\n## Bookmarks Lists (updated during sync)\n"
  512. body += write_list(my_lists)
  513. if len(subscriptions) > 0:
  514. body +="\n## Subscriptions (new links in those are added to tour)\n"
  515. body += write_list(subscriptions)
  516. if len(frozen) > 0:
  517. body +="\n## Frozen (fetched but never updated)\n"
  518. body += write_list(frozen)
  519. if len(system_lists) > 0:
  520. body +="\n## System Lists\n"
  521. body += write_list(system_lists)
  522. return body
  523. class FeedRenderer(GemtextRenderer):
  524. def get_mime(self):
  525. return "application/rss+xml"
  526. def is_valid(self):
  527. if _DO_FEED:
  528. parsed = feedparser.parse(self.body)
  529. else:
  530. return False
  531. if parsed.bozo:
  532. return False
  533. else:
  534. return True
  535. def get_title(self):
  536. if not self.title:
  537. self.render(self.body)
  538. return self.title
  539. def prepare(self,content,mode="readable",width=None):
  540. if not width:
  541. width = term_width()
  542. self.links = []
  543. self.title = "RSS/Atom feed"
  544. page = ""
  545. if _DO_FEED:
  546. parsed = feedparser.parse(content)
  547. else:
  548. page += "Please install python-feedparser to handle RSS/Atom feeds\n"
  549. self.validity = False
  550. return page
  551. if parsed.bozo:
  552. page += "Invalid RSS feed\n\n"
  553. page += str(parsed.bozo_exception)
  554. self.validity = False
  555. else:
  556. if "title" in parsed.feed:
  557. t = parsed.feed.title
  558. else:
  559. t = "Unknown"
  560. self.title = "%s (XML feed)" %t
  561. title = "# %s"%self.title
  562. page += title + "\n"
  563. if "updated" in parsed.feed:
  564. page += "Last updated on %s\n\n" %parsed.feed.updated
  565. if "subtitle" in parsed.feed:
  566. page += parsed.feed.subtitle + "\n"
  567. if "link" in parsed.feed:
  568. page += "=> %s\n" %parsed.feed.link
  569. page += "\n## Entries\n"
  570. if len(parsed.entries) < 1:
  571. self.validity = False
  572. for i in parsed.entries:
  573. line = "=> %s " %i.link
  574. if "published" in i:
  575. pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
  576. line += pub_date + " : "
  577. line += "%s" %(i.title)
  578. if "author" in i:
  579. line += " (by %s)"%i.author
  580. page += line + "\n"
  581. if mode == "full":
  582. if "summary" in i:
  583. rendered, links = HtmlRenderer.render(self,i.summary,\
  584. width=width,mode="full",add_title=False)
  585. page += rendered
  586. page += "\n"
  587. return page
  588. class ImageRenderer(AbstractRenderer):
  589. def get_mime(self):
  590. return "image/*"
  591. def is_valid(self):
  592. if _RENDER_IMAGE:
  593. return True
  594. else:
  595. return False
  596. def get_links(self):
  597. return []
  598. def get_title(self):
  599. return "Picture file"
  600. def render(self,img,width=None,mode=None):
  601. if mode == "links_only":
  602. return "", []
  603. if not width:
  604. width = term_width()
  605. try:
  606. img_obj = Image.open(img)
  607. if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
  608. # we remove all frames but the first one
  609. img_obj.save(img,save_all=False)
  610. cmd = "chafa --bg white -s %s -w 1 \"%s\"" %(width,img)
  611. return_code = subprocess.run(cmd,shell=True, capture_output=True)
  612. ansi_img = return_code.stdout.decode()
  613. except Exception as err:
  614. ansi_img = "***image failed : %s***\n" %err
  615. return ansi_img, []
  616. class HtmlRenderer(AbstractRenderer):
  617. def get_mime(self):
  618. return "text/html"
  619. def is_valid(self):
  620. if not _DO_HTML:
  621. print("HTML document detected. Please install python-bs4 and python-readability.")
  622. return _DO_HTML and self.validity
  623. def get_subscribe_links(self):
  624. subs = [[self.url,self.get_mime(),self.get_title()]]
  625. soup = BeautifulSoup(self.body, 'html.parser')
  626. links = soup.find_all("link",rel="alternate",recursive=True)
  627. for l in links:
  628. ty = l.get("type")
  629. if "rss" in ty or "atom" in ty or "feed" in ty:
  630. subs.append([l.get("href"),ty,l.get("title")])
  631. return subs
  632. def get_title(self):
  633. if self.title:
  634. return self.title
  635. else:
  636. readable = Document(self.body)
  637. self.title = readable.short_title()
  638. return self.title
  639. # Our own HTML engine (crazy, isn’t it?)
  640. # Return [rendered_body, list_of_links]
  641. # mode is either links_only, readable or full
  642. def render(self,body,mode="readable",width=None,add_title=True):
  643. if not width:
  644. width = term_width()
  645. if not _DO_HTML:
  646. print("HTML document detected. Please install python-bs4 and python-readability.")
  647. return
  648. # This method recursively parse the HTML
  649. r_body = ""
  650. links = []
  651. # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
  652. # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
  653. # HTML is real crap. At least the one people are generating.
  654. def render_image(src,width=40,mode=None):
  655. ansi_img = ""
  656. if _RENDER_IMAGE and mode != "links_only" and src:
  657. abs_url = urllib.parse.urljoin(self.url, src)
  658. try:
  659. g = GeminiItem(abs_url)
  660. if g.is_cache_valid():
  661. img = g.get_cache_path()
  662. renderer = ImageRenderer(img,abs_url)
  663. # Image are 40px wide except if terminal is smaller
  664. if width > 40:
  665. size = 40
  666. else:
  667. size = width
  668. ansi_img = "\n" + renderer.get_body(width=size)
  669. except Exception as err:
  670. #we sometimes encounter really bad formatted files or URL
  671. ansi_img += "[BAD IMG] %s"%src
  672. return ansi_img
  673. def sanitize_string(string):
  674. #string = string.lstrip("\n")
  675. string = string.replace("\n", " ").replace("\t"," ")
  676. endspace = string.endswith(" ") or string.endswith("\xa0")
  677. startspace = string.startswith(" ") or string.startswith("\xa0")
  678. toreturn = string.replace("\n", " ").replace("\t"," ").strip()
  679. while " " in toreturn:
  680. toreturn = toreturn.replace(" "," ")
  681. toreturn = toreturn.replace("&nbsp","\xa0")
  682. if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
  683. toreturn += " "
  684. if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
  685. toreturn = " " + toreturn
  686. return toreturn
  687. def recursive_render(element,indent=""):
  688. rendered_body = ""
  689. if element.name == "blockquote":
  690. for child in element.children:
  691. rendered_body += "\x1b[3m"
  692. rendered_body += recursive_render(child,indent="\t").rstrip("\t")
  693. rendered_body += "\x1b[23m"
  694. elif element.name in ["div","p"]:
  695. rendered_body += "\n"
  696. div = ""
  697. for child in element.children:
  698. div += recursive_render(child,indent=indent)
  699. rendered_body += div.strip()
  700. rendered_body += "\n\n"
  701. elif element.name in ["h1","h2","h3","h4","h5","h6"]:
  702. line = sanitize_string(element.get_text())
  703. if element.name in ["h1","h2"]:
  704. rendered_body += "\n"+"\x1b[1;34m\x1b[4m" + line + "\x1b[0m"+"\n"
  705. elif element.name in ["h3","h4"]:
  706. rendered_body += "\n" + "\x1b[34m" + line + "\x1b[0m" + "\n"
  707. else:
  708. rendered_body += "\n" + "\x1b[34m\x1b[2m" + line + "\x1b[0m" + "\n"
  709. elif element.name == "pre":
  710. rendered_body += "\n"
  711. for child in element.children:
  712. rendered_body += recursive_render(child,indent=indent)
  713. rendered_body += "\n\n"
  714. elif element.name in ["li","tr"]:
  715. line = ""
  716. for child in element.children:
  717. line += recursive_render(child,indent=indent).strip("\n")
  718. rendered_body += " * " + line.strip() + "\n"
  719. elif element.name in ["td"]:
  720. line = "| "
  721. for child in element.children:
  722. line += recursive_render(child)
  723. line += " |"
  724. rendered_body += line
  725. # italics
  726. elif element.name in ["code","em","i"]:
  727. rendered_body += "\x1b[3m"
  728. for child in element.children:
  729. rendered_body += recursive_render(child,indent=indent)
  730. rendered_body += "\x1b[23m"
  731. #bold
  732. elif element.name in ["b","strong"]:
  733. rendered_body += "\x1b[1m"
  734. for child in element.children:
  735. rendered_body += recursive_render(child,indent=indent)
  736. rendered_body += "\x1b[22m"
  737. elif element.name == "a":
  738. text = ""
  739. # support for images nested in links
  740. for child in element.children:
  741. if child.name == "img":
  742. # recursive rendering seems to displaying images twice
  743. img = recursive_render(child)
  744. #src = child.get("src")
  745. #img = render_image(src,width=width,mode=mode)
  746. rendered_body += img
  747. else:
  748. text += recursive_render(child)
  749. link = element.get('href')
  750. if link:
  751. links.append(link+" "+text)
  752. link_id = " [%s]"%(len(links))
  753. rendered_body += "\x1b[2;34m" + text + link_id + "\x1b[0m"
  754. else:
  755. #No real link found
  756. rendered_body = text
  757. elif element.name == "img":
  758. src = element.get("src")
  759. text = ""
  760. ansi_img = render_image(src,width=width,mode=mode)
  761. alt = element.get("alt")
  762. if alt:
  763. alt = sanitize_string(alt)
  764. text += "[IMG] %s"%alt
  765. else:
  766. text += "[IMG]"
  767. if src:
  768. links.append(src+" "+text)
  769. link_id = " [%s]"%(len(links))
  770. rendered_body = ansi_img + "\x1b[2;33m" + text + link_id + "\x1b[0m\n\n"
  771. elif element.name == "br":
  772. rendered_body = "\n"
  773. elif element.string:
  774. rendered_body = sanitize_string(element.string)
  775. else:
  776. for child in element.children:
  777. rendered_body += recursive_render(child,indent=indent)
  778. return indent + rendered_body
  779. # the real render_html hearth
  780. if mode == "full":
  781. summary = body
  782. else:
  783. readable = Document(body)
  784. summary = readable.summary()
  785. soup = BeautifulSoup(summary, 'html.parser')
  786. rendered_body = ""
  787. if soup :
  788. if soup.body :
  789. contents = soup.body.contents
  790. else:
  791. contents = soup.contents
  792. for el in contents:
  793. rendered_body += recursive_render(el)
  794. paragraphs = rendered_body.split("\n\n")
  795. for par in paragraphs:
  796. lines = par.splitlines()
  797. for line in lines:
  798. if line.startswith("\t"):
  799. i_indent = " "
  800. s_indent = i_indent
  801. line = line.strip("\t")
  802. elif line.lstrip().startswith("* "):
  803. line = line.lstrip()
  804. i_indent = " " # we keep the initial bullet)
  805. s_indent = " "
  806. else:
  807. i_indent = ""
  808. s_indent = i_indent
  809. if line.strip() != "":
  810. try:
  811. wrapped = textwrap.fill(line,width,initial_indent=i_indent,
  812. subsequent_indent=s_indent)
  813. except Exception as err:
  814. wrapped = line
  815. wrapped += "\n"
  816. else:
  817. wrapped = ""
  818. r_body += wrapped
  819. r_body += "\n"
  820. #check if we need to add the title or if already in content
  821. lines = r_body.splitlines()
  822. first_line = ""
  823. while first_line == "" and len(lines) > 0:
  824. first_line = lines.pop(0)
  825. if add_title and self.get_title()[:(width-1)] not in first_line:
  826. title = "\x1b[1;34m\x1b[4m" + self.get_title() + "\x1b[0m""\n"
  827. title = textwrap.fill(title,width)
  828. r_body = title + "\n" + r_body
  829. #We try to avoid huge empty gaps in the page
  830. r_body = r_body.replace("\n\n\n\n","\n\n").replace("\n\n\n","\n\n")
  831. return r_body,links
  832. # Mapping mimetypes with renderers
  833. # (any content with a mimetype text/* not listed here will be rendered with as GemText)
  834. _FORMAT_RENDERERS = {
  835. "text/gemini": GemtextRenderer,
  836. "text/html" : HtmlRenderer,
  837. "text/xml" : FeedRenderer,
  838. "application/xml" : FeedRenderer,
  839. "application/rss+xml" : FeedRenderer,
  840. "application/atom+xml" : FeedRenderer,
  841. "text/gopher": GopherRenderer,
  842. "image/*": ImageRenderer
  843. }
  844. # Offpunk is organized as follow:
  845. # - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
  846. # - There’s only one GeminiClient. Each page is a GeminiItem (name is historical, as
  847. # it could be non-gemini content)
  848. # - A GeminiItem is created with an URL from which it will derives content.
  849. # - Content include : a title, a body, an ansi-rendered-body and a list of links.
  850. # - Each GeminiItem generates a "cache_path" in which it maintains a cached version of its content.
  851. class GeminiItem():
  852. def __init__(self, url, name=""):
  853. if "://" not in url and ("./" not in url and url[0] != "/"):
  854. if not url.startswith("mailto:"):
  855. url = "gemini://" + url
  856. self.url = fix_ipv6_url(url).strip()
  857. self._cache_path = None
  858. self.name = name
  859. self.mime = None
  860. self.renderer = None
  861. self.links = None
  862. self.body = None
  863. parsed = urllib.parse.urlparse(self.url)
  864. if "./" in url or url[0] == "/":
  865. self.scheme = "file"
  866. else:
  867. self.scheme = parsed.scheme
  868. if self.scheme in ["file","mailto"]:
  869. self.local = True
  870. self.host = ""
  871. self.port = None
  872. # file:// is 7 char
  873. if self.url.startswith("file://"):
  874. self.path = self.url[7:]
  875. elif self.scheme == "mailto":
  876. self.path = parsed.path
  877. else:
  878. self.path = self.url
  879. else:
  880. self.local = False
  881. self.host = parsed.hostname
  882. self.port = parsed.port or standard_ports.get(self.scheme, 0)
  883. # special gopher selector case
  884. if self.scheme == "gopher":
  885. if parsed.path and parsed.path[0] == "/" and len(parsed.path) > 1:
  886. splitted = parsed.path.split("/")
  887. # We check if we have well a gopher type
  888. if len(splitted[1]) == 1:
  889. itemtype = parsed.path[1]
  890. selector = parsed.path[2:]
  891. else:
  892. itemtype = "1"
  893. selector = parsed.path
  894. self.path = selector
  895. else:
  896. itemtype = "1"
  897. self.path = parsed.path
  898. if itemtype == "0":
  899. self.mime = "text/gemini"
  900. elif itemtype == "1":
  901. self.mime = "text/gopher"
  902. elif itemtype == "h":
  903. self.mime = "text/html"
  904. elif itemtype in ("9","g","I","s"):
  905. self.mime = None
  906. else:
  907. self.mime = "text/gopher"
  908. else:
  909. self.path = parsed.path
  910. if parsed.query:
  911. # we don’t add the query if path is too long because path above 260 char
  912. # are not supported and crash python.
  913. # Also, very long query are usually useless stuff
  914. if len(self.path+parsed.query) < 258:
  915. self.path += "/" + parsed.query
  916. def get_cache_path(self):
  917. if self._cache_path and not os.path.isdir(self._cache_path):
  918. return self._cache_path
  919. elif self.local:
  920. self._cache_path = self.path
  921. #if not local, we create a local cache path.
  922. else:
  923. self._cache_path = os.path.expanduser(_CACHE_PATH + self.scheme +\
  924. "/" + self.host + self.path)
  925. #There’s an OS limitation of 260 characters per path.
  926. #We will thus cut the path enough to add the index afterward
  927. self._cache_path = self._cache_path[:249]
  928. # FIXME : this is a gross hack to give a name to
  929. # index files. This will break if the index is not
  930. # index.gmi. I don’t know how to know the real name
  931. # of the file. But first, we need to ensure that the domain name
  932. # finish by "/". Else, the cache will create a file, not a folder.
  933. if self.scheme.startswith("http"):
  934. index = "index.html"
  935. elif self.scheme == "gopher":
  936. index = "index.txt"
  937. else:
  938. index = "index.gmi"
  939. if self.path == "" or os.path.isdir(self._cache_path):
  940. if not self._cache_path.endswith("/"):
  941. self._cache_path += "/"
  942. if not self.url.endswith("/"):
  943. self.url += "/"
  944. if self._cache_path.endswith("/"):
  945. self._cache_path += index
  946. return self._cache_path
  947. def get_capsule_title(self):
  948. #small intelligence to try to find a good name for a capsule
  949. #we try to find eithe ~username or /users/username
  950. #else we fallback to hostname
  951. if self.local:
  952. if self.name != "":
  953. red_title = self.name
  954. else:
  955. red_title = self.path
  956. else:
  957. red_title = self.host
  958. if "user" in self.path:
  959. i = 0
  960. splitted = self.path.split("/")
  961. while i < (len(splitted)-1):
  962. if splitted[i].startswith("user"):
  963. red_title = splitted[i+1]
  964. i += 1
  965. if "~" in self.path:
  966. for pp in self.path.split("/"):
  967. if pp.startswith("~"):
  968. red_title = pp[1:]
  969. return red_title
  970. def is_cache_valid(self,validity=0):
  971. # Validity is the acceptable time for
  972. # a cache to be valid (in seconds)
  973. # If 0, then any cache is considered as valid
  974. # (use validity = 1 if you want to refresh everything)
  975. cache = self.get_cache_path()
  976. if self.local:
  977. return True
  978. elif cache :
  979. # If path is too long, we always return True to avoid
  980. # fetching it.
  981. if len(cache) > 259:
  982. self.links = []
  983. print("We return False because path is too long")
  984. return False
  985. if os.path.exists(cache) and not os.path.isdir(cache):
  986. if validity > 0 :
  987. last_modification = self.cache_last_modified()
  988. now = time.time()
  989. age = now - last_modification
  990. return age < validity
  991. else:
  992. return True
  993. else:
  994. #Cache has not been build
  995. return False
  996. else:
  997. #There’s not even a cache!
  998. return False
  999. def cache_last_modified(self):
  1000. path = self.get_cache_path()
  1001. if path:
  1002. return os.path.getmtime(path)
  1003. elif self.local:
  1004. return 0
  1005. else:
  1006. print("ERROR : NO CACHE in cache_last_modified")
  1007. return None
  1008. def get_body(self,as_file=False):
  1009. if self.body and not as_file:
  1010. return self.body
  1011. if self.is_cache_valid():
  1012. path = self.get_cache_path()
  1013. else:
  1014. path = None
  1015. if path:
  1016. # There’s on OS limit on path length
  1017. if len(path) > 259:
  1018. toreturn = "Path is too long. This is an OS limitation.\n\n"
  1019. toreturn += self.url
  1020. return toreturn
  1021. elif as_file:
  1022. return path
  1023. else:
  1024. with open(path) as f:
  1025. body = f.read()
  1026. f.close()
  1027. return body
  1028. else:
  1029. print("ERROR: NO CACHE for %s" %self._cache_path)
  1030. return error
  1031. # This method is used to load once the list of links in a gi
  1032. # Links can be followed, after a space, by a description/title
  1033. def __make_links(self,links):
  1034. self.links = []
  1035. for l in links:
  1036. #split between link and potential name
  1037. splitted = l.split(maxsplit=1)
  1038. url = self.absolutise_url(splitted[0])
  1039. if looks_like_url(url):
  1040. if len(splitted) > 1:
  1041. newgi = GeminiItem(url,splitted[1])
  1042. else:
  1043. newgi = GeminiItem(url)
  1044. self.links.append(newgi)
  1045. def get_links(self):
  1046. if self.links == None:
  1047. if not self.renderer:
  1048. self._set_renderer()
  1049. if self.renderer:
  1050. self.__make_links(self.renderer.get_links())
  1051. else:
  1052. self.links = []
  1053. return self.links
  1054. def get_link(self,nb):
  1055. # == None allows to return False, even if the list is empty
  1056. if self.links == None:
  1057. r_body = self.get_rendered_body()
  1058. if len(self.links) < nb:
  1059. print("Index too high! No link %s for %s" %(nb,self.url))
  1060. return None
  1061. else:
  1062. return self.links[nb-1]
  1063. def get_subscribe_links(self):
  1064. if not self.renderer:
  1065. self._set_renderer()
  1066. if self.renderer:
  1067. return self.renderer.get_subscribe_links()
  1068. else:
  1069. return []
  1070. # Red title above rendered content
  1071. def _make_terminal_title(self):
  1072. title = self.get_capsule_title()
  1073. #FIXME : how do I know that I’m offline_only ?
  1074. if self.is_cache_valid(): #and self.offline_only and not self.local:
  1075. last_modification = self.cache_last_modified()
  1076. str_last = time.ctime(last_modification)
  1077. nbr = len(self.get_links())
  1078. if self.local:
  1079. title += " (%s items) \x1b[0;31m(local file)"%nbr
  1080. else:
  1081. title += " (%s links) \x1b[0;31m(last accessed on %s)"%(nbr,str_last)
  1082. rendered_title = "\x1b[31m\x1b[1m"+ title + "\x1b[0m"
  1083. wrapped = textwrap.fill(rendered_title,term_width())
  1084. return wrapped + "\n"
  1085. def _set_renderer(self,mime=None):
  1086. if self.local and os.path.isdir(self.get_cache_path()):
  1087. self.renderer = FolderRenderer("",self.get_cache_path())
  1088. return
  1089. if not mime:
  1090. mime = self.get_mime()
  1091. #we don’t even have a mime (so probably we don’t have a cache)
  1092. if not mime:
  1093. return
  1094. mime_to_use = []
  1095. for m in _FORMAT_RENDERERS:
  1096. if fnmatch.fnmatch(mime, m):
  1097. mime_to_use.append(m)
  1098. if len(mime_to_use) > 0:
  1099. current_mime = mime_to_use[0]
  1100. func = _FORMAT_RENDERERS[current_mime]
  1101. if current_mime.startswith("text"):
  1102. self.renderer = func(self.get_body(),self.url)
  1103. # We double check if the renderer is correct.
  1104. # If not, we fallback to html
  1105. # (this is currently only for XHTML, often being
  1106. # mislabelled as xml thus RSS feeds)
  1107. if not self.renderer.is_valid():
  1108. func = _FORMAT_RENDERERS["text/html"]
  1109. #print("Set (fallback)RENDERER to html instead of %s"%mime)
  1110. self.renderer = func(self.get_body(),self.url)
  1111. else:
  1112. #we don’t parse text, we give the file to the renderer
  1113. self.renderer = func(self.get_cache_path(),self.url)
  1114. if not self.renderer.is_valid():
  1115. self.renderer = None
  1116. def get_rendered_body(self,readable=True):
  1117. if not self.renderer:
  1118. self._set_renderer()
  1119. if self.renderer and self.renderer.is_valid():
  1120. body = self.renderer.get_body(readable=readable)
  1121. self.__make_links(self.renderer.get_links())
  1122. to_return = self._make_terminal_title() + body
  1123. return to_return
  1124. else:
  1125. self.links = []
  1126. return None
  1127. def get_filename(self):
  1128. filename = os.path.basename(self.get_cache_path())
  1129. return filename
  1130. def write_body(self,body,mime=None):
  1131. ## body is a copy of the raw gemtext
  1132. ## Write_body() also create the cache !
  1133. # DEFAULT GEMINI MIME
  1134. self.body = body
  1135. if mime:
  1136. self.mime, mime_options = cgi.parse_header(mime)
  1137. if not self.local:
  1138. if self.mime and self.mime.startswith("text/"):
  1139. mode = "w"
  1140. else:
  1141. mode = "wb"
  1142. cache_dir = os.path.dirname(self.get_cache_path())
  1143. # If the subdirectory already exists as a file (not a folder)
  1144. # We remove it (happens when accessing URL/subfolder before
  1145. # URL/subfolder/file.gmi.
  1146. # This causes loss of data in the cache
  1147. # proper solution would be to save "sufolder" as "sufolder/index.gmi"
  1148. # If the subdirectory doesn’t exist, we recursively try to find one
  1149. # until it exists to avoid a file blocking the creation of folders
  1150. root_dir = cache_dir
  1151. while not os.path.exists(root_dir):
  1152. root_dir = os.path.dirname(root_dir)
  1153. if os.path.isfile(root_dir):
  1154. os.remove(root_dir)
  1155. os.makedirs(cache_dir,exist_ok=True)
  1156. with open(self.get_cache_path(), mode=mode) as f:
  1157. f.write(body)
  1158. f.close()
  1159. def get_mime(self):
  1160. if self.mime:
  1161. return self.mime
  1162. elif self.is_cache_valid():
  1163. path = self.get_cache_path()
  1164. if os.path.isdir(path):
  1165. mime = "Local Folder"
  1166. elif path.endswith(".gmi"):
  1167. mime = "text/gemini"
  1168. elif _HAS_MAGIC :
  1169. mime = magic.from_file(path,mime=True)
  1170. mime2,encoding = mimetypes.guess_type(path,strict=False)
  1171. #If we hesitate between html and xml, takes the xml one
  1172. #because the FeedRendered fallback to HtmlRenderer
  1173. if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
  1174. mime = "text/xml"
  1175. #Some xml/html document are considered as octet-stream
  1176. if mime == "application/octet-stream":
  1177. mime = "text/xml"
  1178. else:
  1179. mime,encoding = mimetypes.guess_type(path,strict=False)
  1180. #gmi Mimetype is not recognized yet
  1181. if not mime and not _HAS_MAGIC :
  1182. print("Cannot guess the mime type of the file. Install Python-magic")
  1183. if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
  1184. #by default, we consider it’s gemini except for html
  1185. mime = "text/gemini"
  1186. self.mime = mime
  1187. return self.mime
  1188. def set_error(self,err):
  1189. # If we get an error, we want to keep an existing cache
  1190. # but we need to touch it or to create an empty one
  1191. # to avoid hitting the error at each refresh
  1192. cache = self.get_cache_path()
  1193. if self.is_cache_valid():
  1194. os.utime(cache)
  1195. else:
  1196. cache_dir = os.path.dirname(cache)
  1197. root_dir = cache_dir
  1198. while not os.path.exists(root_dir):
  1199. root_dir = os.path.dirname(root_dir)
  1200. if os.path.isfile(root_dir):
  1201. os.remove(root_dir)
  1202. os.makedirs(cache_dir,exist_ok=True)
  1203. if os.path.isdir(cache_dir):
  1204. with open(cache, "w") as cache:
  1205. cache.write(str(datetime.datetime.now())+"\n")
  1206. cache.write("ERROR while caching %s\n\n" %self.url)
  1207. cache.write("*****\n\n")
  1208. cache.write(str(type(err)) + " = " + str(err))
  1209. cache.write("\n" + str(err.with_traceback(None)))
  1210. cache.write("\n*****\n\n")
  1211. cache.write("If you believe this error was temporary, type ""reload"".\n")
  1212. cache.write("The ressource will be tentatively fetched during next sync.\n")
  1213. cache.close()
  1214. def root(self):
  1215. return GeminiItem(self._derive_url("/"))
  1216. def up(self):
  1217. pathbits = list(os.path.split(self.path.rstrip('/')))
  1218. # Don't try to go higher than root or in config
  1219. if self.local or len(pathbits) == 1 :
  1220. return self
  1221. # Get rid of bottom component
  1222. pathbits.pop()
  1223. new_path = os.path.join(*pathbits)
  1224. if self.scheme == "gopher":
  1225. new_path = "/1" + new_path
  1226. return GeminiItem(self._derive_url(new_path))
  1227. def query(self, query):
  1228. query = urllib.parse.quote(query)
  1229. return GeminiItem(self._derive_url(query=query))
  1230. def _derive_url(self, path="", query=""):
  1231. """
  1232. A thin wrapper around urlunparse which avoids inserting standard ports
  1233. into URLs just to keep things clean.
  1234. """
  1235. if not self.port or self.port == standard_ports[self.scheme] :
  1236. host = self.host
  1237. else:
  1238. host = self.host + ":" + str(self.port)
  1239. return urllib.parse.urlunparse((self.scheme,host,path or self.path, "", query, ""))
  1240. def absolutise_url(self, relative_url):
  1241. """
  1242. Convert a relative URL to an absolute URL by using the URL of this
  1243. GeminiItem as a base.
  1244. """
  1245. abs_url = urllib.parse.urljoin(self.url, relative_url)
  1246. return abs_url
  1247. def full_title(self):
  1248. if self.renderer:
  1249. title = self.renderer.get_title()
  1250. elif self.name:
  1251. title = self.name
  1252. else:
  1253. # we take the last component of url as title
  1254. if self.local:
  1255. title = self.url.split("/")[-1]
  1256. else:
  1257. parsed = urllib.parse.urlparse(self.url)
  1258. if parsed.path:
  1259. title = parsed.path.strip("/").split("/")[-1]
  1260. else:
  1261. title = parsed.netloc
  1262. title += " (%s)"%self.get_capsule_title()
  1263. return title
  1264. def to_map_line(self):
  1265. return "=> {} {}\n".format(self.url, self.full_title())
  1266. CRLF = '\r\n'
  1267. # Cheap and cheerful URL detector
  1268. def looks_like_url(word):
  1269. try:
  1270. if not word.strip():
  1271. return False
  1272. url = fix_ipv6_url(word).strip()
  1273. parsed = urllib.parse.urlparse(url)
  1274. #sometimes, urllib crashed only when requesting the port
  1275. port = parsed.port
  1276. mailto = word.startswith("mailto:")
  1277. start = word.startswith("gemini://") or word.startswith("http://")\
  1278. or word.startswith("https://")
  1279. if not start and not mailto:
  1280. return looks_like_url("gemini://"+word)
  1281. elif mailto:
  1282. return "@" in word
  1283. else:
  1284. return "." in word
  1285. except ValueError:
  1286. return False
  1287. class UserAbortException(Exception):
  1288. pass
  1289. # GeminiClient Decorators
  1290. def needs_gi(inner):
  1291. def outer(self, *args, **kwargs):
  1292. if not self.gi:
  1293. print("You need to 'go' somewhere, first")
  1294. return None
  1295. else:
  1296. return inner(self, *args, **kwargs)
  1297. outer.__doc__ = inner.__doc__
  1298. return outer
  1299. def restricted(inner):
  1300. def outer(self, *args, **kwargs):
  1301. if self.restricted:
  1302. print("Sorry, this command is not available in restricted mode!")
  1303. return None
  1304. else:
  1305. return inner(self, *args, **kwargs)
  1306. outer.__doc__ = inner.__doc__
  1307. return outer
  1308. class GeminiClient(cmd.Cmd):
  1309. def __init__(self, restricted=False, synconly=False):
  1310. cmd.Cmd.__init__(self)
  1311. # Set umask so that nothing we create can be read by anybody else.
  1312. # The certificate cache and TOFU database contain "browser history"
  1313. # type sensitivie information.
  1314. os.umask(0o077)
  1315. self.no_cert_prompt = "\x1b[38;5;76m" + "ON" + "\x1b[38;5;255m" + "> " + "\x1b[0m"
  1316. self.cert_prompt = "\x1b[38;5;202m" + "ON" + "\x1b[38;5;255m"
  1317. self.offline_prompt = "\x1b[38;5;76m" + "OFF" + "\x1b[38;5;255m" + "> " + "\x1b[0m"
  1318. self.prompt = self.no_cert_prompt
  1319. self.gi = None
  1320. self.hist_index = 0
  1321. self.idx_filename = ""
  1322. self.less_histfile = None
  1323. self.index = []
  1324. self.index_index = -1
  1325. self.lookup = self.index
  1326. self.marks = {}
  1327. self.page_index = 0
  1328. self.permanent_redirects = {}
  1329. self.previous_redirectors = set()
  1330. # Sync-only mode is restriced by design
  1331. self.restricted = restricted or synconly
  1332. self.tmp_filename = ""
  1333. self.visited_hosts = set()
  1334. self.offline_only = False
  1335. self.sync_only = False
  1336. self.support_http = _DO_HTTP
  1337. self.automatic_choice = "n"
  1338. self.client_certs = {
  1339. "active": None
  1340. }
  1341. self.active_cert_domains = []
  1342. self.active_is_transient = False
  1343. self.transient_certs_created = []
  1344. self.options = {
  1345. "debug" : False,
  1346. "ipv6" : True,
  1347. "timeout" : 600,
  1348. "short_timeout" : 5,
  1349. "width" : 80,
  1350. "auto_follow_redirects" : True,
  1351. "gopher_proxy" : None,
  1352. "tls_mode" : "tofu",
  1353. "http_proxy": None,
  1354. "https_everywhere": False,
  1355. "archives_size" : 100,
  1356. "history_size" : 100
  1357. }
  1358. global TERM_WIDTH
  1359. TERM_WIDTH = self.options["width"]
  1360. self.log = {
  1361. "start_time": time.time(),
  1362. "requests": 0,
  1363. "ipv4_requests": 0,
  1364. "ipv6_requests": 0,
  1365. "bytes_recvd": 0,
  1366. "ipv4_bytes_recvd": 0,
  1367. "ipv6_bytes_recvd": 0,
  1368. "dns_failures": 0,
  1369. "refused_connections": 0,
  1370. "reset_connections": 0,
  1371. "timeouts": 0,
  1372. "cache_hits": 0,
  1373. }
  1374. self._connect_to_tofu_db()
  1375. def _connect_to_tofu_db(self):
  1376. db_path = os.path.join(_CONFIG_DIR, "tofu.db")
  1377. self.db_conn = sqlite3.connect(db_path)
  1378. self.db_cur = self.db_conn.cursor()
  1379. self.db_cur.execute("""CREATE TABLE IF NOT EXISTS cert_cache
  1380. (hostname text, address text, fingerprint text,
  1381. first_seen date, last_seen date, count integer)""")
  1382. def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True,readable=True):
  1383. """This method might be considered "the heart of Offpunk".
  1384. Everything involved in fetching a gemini resource happens here:
  1385. sending the request over the network, parsing the response,
  1386. storing the response in a temporary file, choosing
  1387. and calling a handler program, and updating the history.
  1388. Nothing is returned."""
  1389. if not gi:
  1390. return
  1391. # Don't try to speak to servers running other protocols
  1392. elif gi.scheme == "mailto":
  1393. if handle and not self.sync_only:
  1394. resp = input("Send an email to %s Y/N? " %gi.path)
  1395. self.gi = gi
  1396. if resp.strip().lower() in ("y", "yes"):
  1397. if _HAS_XDGOPEN :
  1398. cmd = "xdg-open mailto:%s" %gi.path
  1399. subprocess.call(shlex.split(cmd))
  1400. else:
  1401. print("Cannot find a mail client to send mail to %s" %gi.path)
  1402. print("Please install xdg-open (usually from xdg-util package)")
  1403. return
  1404. elif gi.scheme not in ("file","gemini", "gopher", "http", "https") and not self.sync_only:
  1405. print("Sorry, no support for {} links.".format(gi.scheme))
  1406. return
  1407. # Obey permanent redirects
  1408. if gi.url in self.permanent_redirects:
  1409. new_gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name)
  1410. self._go_to_gi(new_gi)
  1411. return
  1412. if gi.scheme == "http" and self.options["https_everywhere"] :
  1413. newurl = "https" + gi.url[4:]
  1414. new_gi = GeminiItem(newurl,name=gi.name)
  1415. self._go_to_gi(new_gi)
  1416. return
  1417. # Use cache or mark as to_fetch if resource is not cached
  1418. # Why is this code useful ? It set the mimetype !
  1419. if self.offline_only:
  1420. if not gi.is_cache_valid():
  1421. self.get_list("to_fetch")
  1422. r = self.list_add_line("to_fetch",gi=gi,verbose=False)
  1423. if r:
  1424. print("%s not available, marked for syncing"%gi.url)
  1425. else:
  1426. print("%s already marked for syncing"%gi.url)
  1427. #self.gi = gi
  1428. return
  1429. # check if local file exists.
  1430. if gi.local and not os.path.exists(gi.path):
  1431. print("Local file %s does not exist!" %gi.path)
  1432. return
  1433. elif not self.offline_only and not gi.local:
  1434. try:
  1435. if gi.scheme in ("http", "https"):
  1436. if self.support_http:
  1437. gi = self._fetch_http(gi)
  1438. elif handle and not self.sync_only:
  1439. if not _DO_HTTP:
  1440. print("Install python3-requests to handle http requests natively")
  1441. webbrowser.open_new_tab(gi.url)
  1442. return
  1443. else:
  1444. return
  1445. elif gi.scheme in ("gopher"):
  1446. gi = self._fetch_gopher(gi,timeout=self.options["short_timeout"])
  1447. else:
  1448. gi = self._fetch_over_network(gi)
  1449. except UserAbortException:
  1450. return
  1451. except Exception as err:
  1452. gi.set_error(err)
  1453. # Print an error message
  1454. # we fail silently when sync_only
  1455. print_error = not self.sync_only
  1456. if isinstance(err, socket.gaierror):
  1457. self.log["dns_failures"] += 1
  1458. if print_error:
  1459. print("ERROR: DNS error!")
  1460. elif isinstance(err, ConnectionRefusedError):
  1461. self.log["refused_connections"] += 1
  1462. if print_error:
  1463. print("ERROR1: Connection refused!")
  1464. elif isinstance(err, ConnectionResetError):
  1465. self.log["reset_connections"] += 1
  1466. if print_error:
  1467. print("ERROR2: Connection reset!")
  1468. elif isinstance(err, (TimeoutError, socket.timeout)):
  1469. self.log["timeouts"] += 1
  1470. if print_error:
  1471. print("""ERROR3: Connection timed out!
  1472. Slow internet connection? Use 'set timeout' to be more patient.""")
  1473. elif isinstance(err, FileExistsError):
  1474. print("""ERROR5: Trying to create a directory which already exists
  1475. in the cache : """)
  1476. print(err)
  1477. else:
  1478. if print_error:
  1479. print("ERROR4: " + str(type(err)) + " : " + str(err))
  1480. print("\n" + str(err.with_traceback(None)))
  1481. return
  1482. # Pass file to handler, unless we were asked not to
  1483. if gi :
  1484. rendered_body = gi.get_rendered_body(readable=readable)
  1485. display = handle and not self.sync_only
  1486. if rendered_body:
  1487. self.index = gi.get_links()
  1488. self.lookup = self.index
  1489. self.page_index = 0
  1490. self.index_index = -1
  1491. if display:
  1492. self._temp_file(rendered_body)
  1493. if self.less_histfile:
  1494. os.unlink(self.less_histfile)
  1495. tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
  1496. self.less_histfile = tmpf.name
  1497. less_cmd(self.idx_filename,histfile=self.less_histfile,cat=True)
  1498. elif display :
  1499. cmd_str = self._get_handler_cmd(gi.get_mime())
  1500. try:
  1501. # get tmpfile from gi !
  1502. tmpfile = gi.get_body(as_file=True)
  1503. subprocess.call(shlex.split(cmd_str % tmpfile))
  1504. except FileNotFoundError:
  1505. print("Handler program %s not found!" % shlex.split(cmd_str)[0])
  1506. print("You can use the ! command to specify another handler program or pipeline.")
  1507. # Update state
  1508. self.gi = gi
  1509. if update_hist and not self.sync_only:
  1510. self._update_history(gi)
  1511. def _temp_file(self,content):
  1512. # We actually put the body in a tmpfile before giving it to less
  1513. if self.idx_filename:
  1514. os.unlink(self.idx_filename)
  1515. tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
  1516. tmpf.write(content)
  1517. tmpf.close()
  1518. self.idx_filename = tmpf.name
  1519. return self.idx_filename
  1520. def _fetch_http(self,gi):
  1521. header = {}
  1522. header["User-Agent"] = "Offpunk browser v%s"%_VERSION
  1523. response = requests.get(gi.url,headers=header)
  1524. mime = response.headers['content-type']
  1525. body = response.content
  1526. if "text/" in mime:
  1527. #body = response.text
  1528. body = response.content.decode("UTF-8","replace")
  1529. else:
  1530. body = response.content
  1531. gi.write_body(body,mime)
  1532. return gi
  1533. def _fetch_gopher(self,gi,timeout=10):
  1534. if not looks_like_url(gi.url):
  1535. print("%s is not a valide url" %gi.url)
  1536. parsed =urllib.parse.urlparse(gi.url)
  1537. host = parsed.hostname
  1538. port = parsed.port or 70
  1539. if parsed.path and parsed.path[0] == "/" and len(parsed.path) > 1:
  1540. splitted = parsed.path.split("/")
  1541. # We check if we have well a gopher type
  1542. if len(splitted[1]) == 1:
  1543. itemtype = parsed.path[1]
  1544. selector = parsed.path[2:]
  1545. else:
  1546. itemtype = "1"
  1547. selector = parsed.path
  1548. else:
  1549. itemtype = "1"
  1550. selector = parsed.path
  1551. addresses = socket.getaddrinfo(host, port, family=0,type=socket.SOCK_STREAM)
  1552. s = socket.create_connection((host,port))
  1553. for address in addresses:
  1554. self._debug("Connecting to: " + str(address[4]))
  1555. s = socket.socket(address[0], address[1])
  1556. s.settimeout(timeout)
  1557. try:
  1558. s.connect(address[4])
  1559. break
  1560. except OSError as e:
  1561. err = e
  1562. else:
  1563. # If we couldn't connect to *any* of the addresses, just
  1564. # bubble up the exception from the last attempt and deny
  1565. # knowledge of earlier failures.
  1566. raise err
  1567. if parsed.query:
  1568. request = selector + "\t" + parsed.query
  1569. else:
  1570. request = selector
  1571. request += "\r\n"
  1572. s.sendall(request.encode("UTF-8"))
  1573. response = s.makefile("rb").read()
  1574. # Transcode response into UTF-8
  1575. #if itemtype in ("0","1","h"):
  1576. if not itemtype in ("9","g","I","s"):
  1577. # Try most common encodings
  1578. for encoding in ("UTF-8", "ISO-8859-1"):
  1579. try:
  1580. response = response.decode("UTF-8")
  1581. break
  1582. except UnicodeDecodeError:
  1583. pass
  1584. else:
  1585. # try to find encoding
  1586. #if _HAS_CHARDET:
  1587. detected = chardet.detect(response)
  1588. response = response.decode(detected["encoding"])
  1589. #else:
  1590. #raise UnicodeDecodeError
  1591. if itemtype == "0":
  1592. mime = "text/gemini"
  1593. elif itemtype == "1":
  1594. mime = "text/gopher"
  1595. elif itemtype == "h":
  1596. mime = "text/html"
  1597. elif itemtype in ("9","g","I","s"):
  1598. mime = None
  1599. else:
  1600. # by default, we should consider Gopher
  1601. mime = "text/gopher"
  1602. gi.write_body(response,mime)
  1603. return gi
  1604. # fetch_over_network will modify with gi.write_body(body,mime)
  1605. # before returning the gi
  1606. def _fetch_over_network(self, gi):
  1607. # Be careful with client certificates!
  1608. # Are we crossing a domain boundary?
  1609. if self.active_cert_domains and gi.host not in self.active_cert_domains:
  1610. if self.active_is_transient:
  1611. print("Permanently delete currently active transient certificate?")
  1612. resp = input("Y/N? ")
  1613. if resp.strip().lower() in ("y", "yes"):
  1614. print("Destroying certificate.")
  1615. self._deactivate_client_cert()
  1616. else:
  1617. print("Staying here.")
  1618. raise UserAbortException()
  1619. else:
  1620. print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?")
  1621. resp = input("Y/N? ")
  1622. if resp.strip().lower() in ("n", "no"):
  1623. print("Keeping certificate active for {}".format(gi.host))
  1624. else:
  1625. print("Deactivating certificate.")
  1626. self._deactivate_client_cert()
  1627. # Suggest reactivating previous certs
  1628. if not self.client_certs["active"] and gi.host in self.client_certs:
  1629. print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host))
  1630. resp = input("Y/N? ")
  1631. if resp.strip().lower() in ("y", "yes"):
  1632. self._activate_client_cert(*self.client_certs[gi.host])
  1633. else:
  1634. print("Remaining unidentified.")
  1635. self.client_certs.pop(gi.host)
  1636. # Is this a local file?
  1637. if gi.local:
  1638. address, f = None, open(gi.path, "rb")
  1639. else:
  1640. address, f = self._send_request(gi)
  1641. # Spec dictates <META> should not exceed 1024 bytes,
  1642. # so maximum valid header length is 1027 bytes.
  1643. header = f.readline(1027)
  1644. header = header.decode("UTF-8")
  1645. if not header or header[-1] != '\n':
  1646. raise RuntimeError("Received invalid header from server!")
  1647. header = header.strip()
  1648. self._debug("Response header: %s." % header)
  1649. # Validate header
  1650. status, meta = header.split(maxsplit=1)
  1651. if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
  1652. f.close()
  1653. raise RuntimeError("Received invalid header from server!")
  1654. # Update redirect loop/maze escaping state
  1655. if not status.startswith("3"):
  1656. self.previous_redirectors = set()
  1657. # Handle non-SUCCESS headers, which don't have a response body
  1658. # Inputs
  1659. if status.startswith("1"):
  1660. if self.sync_only:
  1661. return None
  1662. else:
  1663. print(meta)
  1664. if status == "11":
  1665. user_input = getpass.getpass("> ")
  1666. else:
  1667. user_input = input("> ")
  1668. return self._fetch_over_network(gi.query(user_input))
  1669. # Redirects
  1670. elif status.startswith("3"):
  1671. new_gi = GeminiItem(gi.absolutise_url(meta))
  1672. if new_gi.url == gi.url:
  1673. raise RuntimeError("URL redirects to itself!")
  1674. elif new_gi.url in self.previous_redirectors:
  1675. raise RuntimeError("Caught in redirect loop!")
  1676. elif len(self.previous_redirectors) == _MAX_REDIRECTS:
  1677. raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
  1678. elif self.sync_only:
  1679. follow = self.automatic_choice
  1680. # Never follow cross-domain redirects without asking
  1681. elif new_gi.host != gi.host:
  1682. follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
  1683. # Never follow cross-protocol redirects without asking
  1684. elif new_gi.scheme != gi.scheme:
  1685. follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url)
  1686. # Don't follow *any* redirect without asking if auto-follow is off
  1687. elif not self.options["auto_follow_redirects"]:
  1688. follow = input("Follow redirect to %s? (y/n) " % new_gi.url)
  1689. # Otherwise, follow away
  1690. else:
  1691. follow = "yes"
  1692. if follow.strip().lower() not in ("y", "yes"):
  1693. raise UserAbortException()
  1694. self._debug("Following redirect to %s." % new_gi.url)
  1695. self._debug("This is consecutive redirect number %d." % len(self.previous_redirectors))
  1696. self.previous_redirectors.add(gi.url)
  1697. if status == "31":
  1698. # Permanent redirect
  1699. self.permanent_redirects[gi.url] = new_gi.url
  1700. return self._fetch_over_network(new_gi)
  1701. # Errors
  1702. elif status.startswith("4") or status.startswith("5"):
  1703. raise RuntimeError(meta)
  1704. # Client cert
  1705. elif status.startswith("6"):
  1706. self._handle_cert_request(meta)
  1707. return self._fetch_over_network(gi)
  1708. # Invalid status
  1709. elif not status.startswith("2"):
  1710. raise RuntimeError("Server returned undefined status code %s!" % status)
  1711. # If we're here, this must be a success and there's a response body
  1712. assert status.startswith("2")
  1713. mime = meta
  1714. # Read the response body over the network
  1715. fbody = f.read()
  1716. # DEFAULT GEMINI MIME
  1717. if mime == "":
  1718. mime = "text/gemini; charset=utf-8"
  1719. shortmime, mime_options = cgi.parse_header(mime)
  1720. if "charset" in mime_options:
  1721. try:
  1722. codecs.lookup(mime_options["charset"])
  1723. except LookupError:
  1724. raise RuntimeError("Header declared unknown encoding %s" % value)
  1725. if shortmime.startswith("text/"):
  1726. #Get the charset and default to UTF-8 in none
  1727. encoding = mime_options.get("charset", "UTF-8")
  1728. try:
  1729. body = fbody.decode(encoding)
  1730. except UnicodeError:
  1731. raise RuntimeError("Could not decode response body using %s\
  1732. encoding declared in header!" % encoding)
  1733. else:
  1734. body = fbody
  1735. gi.write_body(body,mime)
  1736. return gi
  1737. def _send_request(self, gi):
  1738. """Send a selector to a given host and port.
  1739. Returns the resolved address and binary file with the reply."""
  1740. if gi.scheme == "gemini":
  1741. # For Gemini requests, connect to the host and port specified in the URL
  1742. host, port = gi.host, gi.port
  1743. elif gi.scheme == "gopher":
  1744. # For Gopher requests, use the configured proxy
  1745. host, port = self.options["gopher_proxy"].rsplit(":", 1)
  1746. self._debug("Using gopher proxy: " + self.options["gopher_proxy"])
  1747. elif gi.scheme in ("http", "https"):
  1748. host, port = self.options["http_proxy"].rsplit(":",1)
  1749. self._debug("Using http proxy: " + self.options["http_proxy"])
  1750. # Do DNS resolution
  1751. addresses = self._get_addresses(host, port)
  1752. # Prepare TLS context
  1753. protocol = ssl.PROTOCOL_TLS if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2
  1754. context = ssl.SSLContext(protocol)
  1755. # Use CAs or TOFU
  1756. if self.options["tls_mode"] == "ca":
  1757. context.verify_mode = ssl.CERT_REQUIRED
  1758. context.check_hostname = True
  1759. context.load_default_certs()
  1760. else:
  1761. context.check_hostname = False
  1762. context.verify_mode = ssl.CERT_NONE
  1763. # Impose minimum TLS version
  1764. ## In 3.7 and above, this is easy...
  1765. if sys.version_info.minor >= 7:
  1766. context.minimum_version = ssl.TLSVersion.TLSv1_2
  1767. ## Otherwise, it seems very hard...
  1768. ## The below is less strict than it ought to be, but trying to disable
  1769. ## TLS v1.1 here using ssl.OP_NO_TLSv1_1 produces unexpected failures
  1770. ## with recent versions of OpenSSL. What a mess...
  1771. else:
  1772. context.options |= ssl.OP_NO_SSLv3
  1773. context.options |= ssl.OP_NO_SSLv2
  1774. # Try to enforce sensible ciphers
  1775. try:
  1776. context.set_ciphers("AESGCM+ECDHE:AESGCM+DHE:CHACHA20+ECDHE:CHACHA20+DHE:!DSS:!SHA1:!MD5:@STRENGTH")
  1777. except ssl.SSLError:
  1778. # Rely on the server to only support sensible things, I guess...
  1779. pass
  1780. # Load client certificate if needed
  1781. if self.client_certs["active"]:
  1782. certfile, keyfile = self.client_certs["active"]
  1783. context.load_cert_chain(certfile, keyfile)
  1784. # Connect to remote host by any address possible
  1785. err = None
  1786. for address in addresses:
  1787. self._debug("Connecting to: " + str(address[4]))
  1788. s = socket.socket(address[0], address[1])
  1789. if self.sync_only:
  1790. timeout = self.options["short_timeout"]
  1791. else:
  1792. timeout = self.options["timeout"]
  1793. s.settimeout(timeout)
  1794. s = context.wrap_socket(s, server_hostname = gi.host)
  1795. try:
  1796. s.connect(address[4])
  1797. break
  1798. except OSError as e:
  1799. err = e
  1800. else:
  1801. # If we couldn't connect to *any* of the addresses, just
  1802. # bubble up the exception from the last attempt and deny
  1803. # knowledge of earlier failures.
  1804. raise err
  1805. if sys.version_info.minor >=5:
  1806. self._debug("Established {} connection.".format(s.version()))
  1807. self._debug("Cipher is: {}.".format(s.cipher()))
  1808. # Do TOFU
  1809. if self.options["tls_mode"] != "ca":
  1810. cert = s.getpeercert(binary_form=True)
  1811. self._validate_cert(address[4][0], host, cert)
  1812. # Remember that we showed the current cert to this domain...
  1813. if self.client_certs["active"]:
  1814. self.active_cert_domains.append(gi.host)
  1815. self.client_certs[gi.host] = self.client_certs["active"]
  1816. # Send request and wrap response in a file descriptor
  1817. self._debug("Sending %s<CRLF>" % gi.url)
  1818. s.sendall((gi.url + CRLF).encode("UTF-8"))
  1819. mf= s.makefile(mode = "rb")
  1820. return address, mf
  1821. def _get_addresses(self, host, port):
  1822. # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled
  1823. if ":" in host:
  1824. # This is likely a literal IPv6 address, so we can *only* ask for
  1825. # IPv6 addresses or getaddrinfo will complain
  1826. family_mask = socket.AF_INET6
  1827. elif socket.has_ipv6 and self.options["ipv6"]:
  1828. # Accept either IPv4 or IPv6 addresses
  1829. family_mask = 0
  1830. else:
  1831. # IPv4 only
  1832. family_mask = socket.AF_INET
  1833. addresses = socket.getaddrinfo(host, port, family=family_mask,
  1834. type=socket.SOCK_STREAM)
  1835. # Sort addresses so IPv6 ones come first
  1836. addresses.sort(key=lambda add: add[0] == socket.AF_INET6, reverse=True)
  1837. return addresses
  1838. def _handle_cert_request(self, meta):
  1839. # Don't do client cert stuff in restricted mode, as in principle
  1840. # it could be used to fill up the disk by creating a whole lot of
  1841. # certificates
  1842. if self.restricted:
  1843. print("The server is requesting a client certificate.")
  1844. print("These are not supported in restricted mode, sorry.")
  1845. raise UserAbortException()
  1846. print("SERVER SAYS: ", meta)
  1847. # Present different messages for different 6x statuses, but
  1848. # handle them the same.
  1849. if status in ("64", "65"):
  1850. print("The server rejected your certificate because it is either expired or not yet valid.")
  1851. elif status == "63":
  1852. print("The server did not accept your certificate.")
  1853. print("You may need to e.g. coordinate with the admin to get your certificate fingerprint whitelisted.")
  1854. else:
  1855. print("The site {} is requesting a client certificate.".format(gi.host))
  1856. print("This will allow the site to recognise you across requests.")
  1857. # Give the user choices
  1858. print("What do you want to do?")
  1859. print("1. Give up.")
  1860. print("2. Generate a new transient certificate.")
  1861. print("3. Generate a new persistent certificate.")
  1862. print("4. Load a previously generated persistent.")
  1863. print("5. Load certificate from an external file.")
  1864. if self.sync_only:
  1865. choice = 1
  1866. else:
  1867. choice = input("> ").strip()
  1868. if choice == "2":
  1869. self._generate_transient_cert_cert()
  1870. elif choice == "3":
  1871. self._generate_persistent_client_cert()
  1872. elif choice == "4":
  1873. self._choose_client_cert()
  1874. elif choice == "5":
  1875. self._load_client_cert()
  1876. else:
  1877. print("Giving up.")
  1878. raise UserAbortException()
  1879. def _validate_cert(self, address, host, cert):
  1880. """
  1881. Validate a TLS certificate in TOFU mode.
  1882. If the cryptography module is installed:
  1883. - Check the certificate Common Name or SAN matches `host`
  1884. - Check the certificate's not valid before date is in the past
  1885. - Check the certificate's not valid after date is in the future
  1886. Whether the cryptography module is installed or not, check the
  1887. certificate's fingerprint against the TOFU database to see if we've
  1888. previously encountered a different certificate for this IP address and
  1889. hostname.
  1890. """
  1891. now = datetime.datetime.utcnow()
  1892. if _HAS_CRYPTOGRAPHY:
  1893. # Using the cryptography module we can get detailed access
  1894. # to the properties of even self-signed certs, unlike in
  1895. # the standard ssl library...
  1896. c = x509.load_der_x509_certificate(cert, _BACKEND)
  1897. sha = hashlib.sha256()
  1898. sha.update(cert)
  1899. fingerprint = sha.hexdigest()
  1900. # Have we been here before?
  1901. self.db_cur.execute("""SELECT fingerprint, first_seen, last_seen, count
  1902. FROM cert_cache
  1903. WHERE hostname=? AND address=?""", (host, address))
  1904. cached_certs = self.db_cur.fetchall()
  1905. # If so, check for a match
  1906. if cached_certs:
  1907. max_count = 0
  1908. most_frequent_cert = None
  1909. for cached_fingerprint, first, last, count in cached_certs:
  1910. if count > max_count:
  1911. max_count = count
  1912. most_frequent_cert = cached_fingerprint
  1913. if fingerprint == cached_fingerprint:
  1914. # Matched!
  1915. self._debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint))
  1916. self.db_cur.execute("""UPDATE cert_cache
  1917. SET last_seen=?, count=?
  1918. WHERE hostname=? AND address=? AND fingerprint=?""",
  1919. (now, count+1, host, address, fingerprint))
  1920. self.db_conn.commit()
  1921. break
  1922. else:
  1923. if _HAS_CRYPTOGRAPHY:
  1924. # Load the most frequently seen certificate to see if it has
  1925. # expired
  1926. certdir = os.path.join(_CONFIG_DIR, "cert_cache")
  1927. with open(os.path.join(certdir, most_frequent_cert+".crt"), "rb") as fp:
  1928. previous_cert = fp.read()
  1929. previous_cert = x509.load_der_x509_certificate(previous_cert, _BACKEND)
  1930. previous_ttl = previous_cert.not_valid_after - now
  1931. print(previous_ttl)
  1932. self._debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint))
  1933. print("****************************************")
  1934. print("[SECURITY WARNING] Unrecognised certificate!")
  1935. print("The certificate presented for {} ({}) has never been seen before.".format(host, address))
  1936. print("This MIGHT be a Man-in-the-Middle attack.")
  1937. print("A different certificate has previously been seen {} times.".format(max_count))
  1938. if _HAS_CRYPTOGRAPHY:
  1939. if previous_ttl < datetime.timedelta():
  1940. print("That certificate has expired, which reduces suspicion somewhat.")
  1941. else:
  1942. print("That certificate is still valid for: {}".format(previous_ttl))
  1943. print("****************************************")
  1944. print("Attempt to verify the new certificate fingerprint out-of-band:")
  1945. print(fingerprint)
  1946. if self.sync_only:
  1947. choice = self.automatic_choice
  1948. else:
  1949. choice = input("Accept this new certificate? Y/N ").strip().lower()
  1950. if choice in ("y", "yes"):
  1951. self.db_cur.execute("""INSERT INTO cert_cache
  1952. VALUES (?, ?, ?, ?, ?, ?)""",
  1953. (host, address, fingerprint, now, now, 1))
  1954. self.db_conn.commit()
  1955. with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp:
  1956. fp.write(cert)
  1957. else:
  1958. raise Exception("TOFU Failure!")
  1959. # If not, cache this cert
  1960. else:
  1961. self._debug("TOFU: Blindly trusting first ever certificate for this host!")
  1962. self.db_cur.execute("""INSERT INTO cert_cache
  1963. VALUES (?, ?, ?, ?, ?, ?)""",
  1964. (host, address, fingerprint, now, now, 1))
  1965. self.db_conn.commit()
  1966. certdir = os.path.join(_CONFIG_DIR, "cert_cache")
  1967. if not os.path.exists(certdir):
  1968. os.makedirs(certdir)
  1969. with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp:
  1970. fp.write(cert)
  1971. def _get_handler_cmd(self, mimetype):
  1972. # Now look for a handler for this mimetype
  1973. # Consider exact matches before wildcard matches
  1974. exact_matches = []
  1975. wildcard_matches = []
  1976. for handled_mime, cmd_str in _MIME_HANDLERS.items():
  1977. if "*" in handled_mime:
  1978. wildcard_matches.append((handled_mime, cmd_str))
  1979. else:
  1980. exact_matches.append((handled_mime, cmd_str))
  1981. for handled_mime, cmd_str in exact_matches + wildcard_matches:
  1982. if fnmatch.fnmatch(mimetype, handled_mime):
  1983. break
  1984. else:
  1985. # Use "xdg-open" as a last resort.
  1986. if _HAS_XDGOPEN:
  1987. cmd_str = "xdg-open %s"
  1988. else:
  1989. cmd_str = "echo ""Can’t find how to open %s"""
  1990. print("Please install xdg-open (usually from xdg-util package)")
  1991. self._debug("Using handler: %s" % cmd_str)
  1992. return cmd_str
  1993. #TODO: remove format_geminiitem
  1994. def _format_geminiitem(self, index, gi, url=False):
  1995. protocol = "" if gi.scheme == "gemini" else " %s" % gi.scheme
  1996. line = "[%d%s] %s" % (index, protocol, gi.name or gi.url)
  1997. if gi.name and url:
  1998. line += " (%s)" % gi.url
  1999. return line
  2000. def _show_lookup(self, offset=0, end=None, url=False):
  2001. for n, gi in enumerate(self.lookup[offset:end]):
  2002. print(self._format_geminiitem(n+offset+1, gi, url))
  2003. def _update_history(self, gi):
  2004. # We never update while in sync_only
  2005. if self.sync_only:
  2006. return
  2007. # We don’t add lists to history
  2008. #if not gi or os.path.join(_DATA_DIR,"lists") in gi.url:
  2009. # return
  2010. histlist = self.get_list("history")
  2011. links = self.list_get_links("history")
  2012. # avoid duplicate
  2013. length = len(links)
  2014. if length > self.options["history_size"]:
  2015. length = self.options["history_size"]
  2016. if length > 0 and links[self.hist_index] == gi:
  2017. return
  2018. self.list_add_top("history",limit=self.options["history_size"],truncate_lines=self.hist_index)
  2019. self.hist_index = 0
  2020. def _log_visit(self, gi, address, size):
  2021. if not address:
  2022. return
  2023. self.log["requests"] += 1
  2024. self.log["bytes_recvd"] += size
  2025. self.visited_hosts.add(address)
  2026. if address[0] == socket.AF_INET:
  2027. self.log["ipv4_requests"] += 1
  2028. self.log["ipv4_bytes_recvd"] += size
  2029. elif address[0] == socket.AF_INET6:
  2030. self.log["ipv6_requests"] += 1
  2031. self.log["ipv6_bytes_recvd"] += size
  2032. def _get_active_tmpfile(self):
  2033. if self.gi.get_mime() in _FORMAT_RENDERERS:
  2034. return self.idx_filename
  2035. else:
  2036. return self.tmp_filename
  2037. def _debug(self, debug_text):
  2038. if not self.options["debug"]:
  2039. return
  2040. debug_text = "\x1b[0;32m[DEBUG] " + debug_text + "\x1b[0m"
  2041. print(debug_text)
  2042. def _load_client_cert(self):
  2043. """
  2044. Interactively load a TLS client certificate from the filesystem in PEM
  2045. format.
  2046. """
  2047. print("Loading client certificate file, in PEM format (blank line to cancel)")
  2048. certfile = input("Certfile path: ").strip()
  2049. if not certfile:
  2050. print("Aborting.")
  2051. return
  2052. certfile = os.path.expanduser(certfile)
  2053. if not os.path.isfile(certfile):
  2054. print("Certificate file {} does not exist.".format(certfile))
  2055. return
  2056. print("Loading private key file, in PEM format (blank line to cancel)")
  2057. keyfile = input("Keyfile path: ").strip()
  2058. if not keyfile:
  2059. print("Aborting.")
  2060. return
  2061. keyfile = os.path.expanduser(keyfile)
  2062. if not os.path.isfile(keyfile):
  2063. print("Private key file {} does not exist.".format(keyfile))
  2064. return
  2065. self._activate_client_cert(certfile, keyfile)
  2066. def _generate_transient_cert_cert(self):
  2067. """
  2068. Use `openssl` command to generate a new transient client certificate
  2069. with 24 hours of validity.
  2070. """
  2071. certdir = os.path.join(_CONFIG_DIR, "transient_certs")
  2072. name = str(uuid.uuid4())
  2073. self._generate_client_cert(certdir, name, transient=True)
  2074. self.active_is_transient = True
  2075. self.transient_certs_created.append(name)
  2076. def _generate_persistent_client_cert(self):
  2077. """
  2078. Interactively use `openssl` command to generate a new persistent client
  2079. certificate with one year of validity.
  2080. """
  2081. certdir = os.path.join(_CONFIG_DIR, "client_certs")
  2082. print("What do you want to name this new certificate?")
  2083. print("Answering `mycert` will create `{0}/mycert.crt` and `{0}/mycert.key`".format(certdir))
  2084. name = input("> ")
  2085. if not name.strip():
  2086. print("Aborting.")
  2087. return
  2088. self._generate_client_cert(certdir, name)
  2089. def _generate_client_cert(self, certdir, basename, transient=False):
  2090. """
  2091. Use `openssl` binary to generate a client certificate (which may be
  2092. transient or persistent) and save the certificate and private key to the
  2093. specified directory with the specified basename.
  2094. """
  2095. if not os.path.exists(certdir):
  2096. os.makedirs(certdir)
  2097. certfile = os.path.join(certdir, basename+".crt")
  2098. keyfile = os.path.join(certdir, basename+".key")
  2099. cmd = "openssl req -x509 -newkey rsa:2048 -days {} -nodes -keyout {} -out {}".format(1 if transient else 365, keyfile, certfile)
  2100. if transient:
  2101. cmd += " -subj '/CN={}'".format(basename)
  2102. os.system(cmd)
  2103. self._activate_client_cert(certfile, keyfile)
  2104. def _choose_client_cert(self):
  2105. """
  2106. Interactively select a previously generated client certificate and
  2107. activate it.
  2108. """
  2109. certdir = os.path.join(_CONFIG_DIR, "client_certs")
  2110. certs = glob.glob(os.path.join(certdir, "*.crt"))
  2111. if len(certs) == 0:
  2112. print("There are no previously generated certificates.")
  2113. return
  2114. certdir = {}
  2115. for n, cert in enumerate(certs):
  2116. certdir[str(n+1)] = (cert, os.path.splitext(cert)[0] + ".key")
  2117. print("{}. {}".format(n+1, os.path.splitext(os.path.basename(cert))[0]))
  2118. choice = input("> ").strip()
  2119. if choice in certdir:
  2120. certfile, keyfile = certdir[choice]
  2121. self._activate_client_cert(certfile, keyfile)
  2122. else:
  2123. print("What?")
  2124. def _activate_client_cert(self, certfile, keyfile):
  2125. self.client_certs["active"] = (certfile, keyfile)
  2126. self.active_cert_domains = []
  2127. self.prompt = self.cert_prompt + "+" + os.path.basename(certfile).replace('.crt','') + "> " + "\x1b[0m"
  2128. self._debug("Using ID {} / {}.".format(*self.client_certs["active"]))
  2129. def _deactivate_client_cert(self):
  2130. if self.active_is_transient:
  2131. for filename in self.client_certs["active"]:
  2132. os.remove(filename)
  2133. for domain in self.active_cert_domains:
  2134. self.client_certs.pop(domain)
  2135. self.client_certs["active"] = None
  2136. self.active_cert_domains = []
  2137. self.prompt = self.no_cert_prompt
  2138. self.active_is_transient = False
  2139. # Cmd implementation follows
  2140. def default(self, line):
  2141. if line.strip() == "EOF":
  2142. return self.onecmd("quit")
  2143. elif line.strip() == "..":
  2144. return self.do_up()
  2145. elif line.startswith("/"):
  2146. return self.do_find(line[1:])
  2147. # Expand abbreviated commands
  2148. first_word = line.split()[0].strip()
  2149. if first_word in _ABBREVS:
  2150. full_cmd = _ABBREVS[first_word]
  2151. expanded = line.replace(first_word, full_cmd, 1)
  2152. return self.onecmd(expanded)
  2153. # Try to access it like an URL
  2154. if looks_like_url(line):
  2155. return self.do_go(line)
  2156. # Try to parse numerical index for lookup table
  2157. try:
  2158. n = int(line.strip())
  2159. except ValueError:
  2160. print("What?")
  2161. return
  2162. try:
  2163. gi = self.lookup[n-1]
  2164. except IndexError:
  2165. print ("Index too high!")
  2166. return
  2167. self.index_index = n
  2168. self._go_to_gi(gi)
  2169. ### Settings
  2170. @restricted
  2171. def do_set(self, line):
  2172. """View or set various options."""
  2173. if not line.strip():
  2174. # Show all current settings
  2175. for option in sorted(self.options.keys()):
  2176. print("%s %s" % (option, self.options[option]))
  2177. elif len(line.split()) == 1:
  2178. # Show current value of one specific setting
  2179. option = line.strip()
  2180. if option in self.options:
  2181. print("%s %s" % (option, self.options[option]))
  2182. else:
  2183. print("Unrecognised option %s" % option)
  2184. else:
  2185. # Set value of one specific setting
  2186. option, value = line.split(" ", 1)
  2187. if option not in self.options:
  2188. print("Unrecognised option %s" % option)
  2189. return
  2190. # Validate / convert values
  2191. if option == "gopher_proxy":
  2192. if ":" not in value:
  2193. value += ":1965"
  2194. else:
  2195. host, port = value.rsplit(":",1)
  2196. if not port.isnumeric():
  2197. print("Invalid proxy port %s" % port)
  2198. return
  2199. elif option == "tls_mode":
  2200. if value.lower() not in ("ca", "tofu"):
  2201. print("TLS mode must be `ca` or `tofu`!")
  2202. return
  2203. elif option == "width":
  2204. if value.isnumeric():
  2205. value = int(value)
  2206. print("changing width to ",value)
  2207. global TERM_WIDTH
  2208. TERM_WIDTH = value
  2209. else:
  2210. print("%s is not a valid width (integer required)"%value)
  2211. elif value.isnumeric():
  2212. value = int(value)
  2213. elif value.lower() == "false":
  2214. value = False
  2215. elif value.lower() == "true":
  2216. value = True
  2217. else:
  2218. try:
  2219. value = float(value)
  2220. except ValueError:
  2221. pass
  2222. self.options[option] = value
  2223. @restricted
  2224. def do_cert(self, line):
  2225. """Manage client certificates"""
  2226. print("Managing client certificates")
  2227. if self.client_certs["active"]:
  2228. print("Active certificate: {}".format(self.client_certs["active"][0]))
  2229. print("1. Deactivate client certificate.")
  2230. print("2. Generate new certificate.")
  2231. print("3. Load previously generated certificate.")
  2232. print("4. Load externally created client certificate from file.")
  2233. print("Enter blank line to exit certificate manager.")
  2234. choice = input("> ").strip()
  2235. if choice == "1":
  2236. print("Deactivating client certificate.")
  2237. self._deactivate_client_cert()
  2238. elif choice == "2":
  2239. self._generate_persistent_client_cert()
  2240. elif choice == "3":
  2241. self._choose_client_cert()
  2242. elif choice == "4":
  2243. self._load_client_cert()
  2244. else:
  2245. print("Aborting.")
  2246. @restricted
  2247. def do_handler(self, line):
  2248. """View or set handler commands for different MIME types."""
  2249. if not line.strip():
  2250. # Show all current handlers
  2251. for mime in sorted(_MIME_HANDLERS.keys()):
  2252. print("%s %s" % (mime, _MIME_HANDLERS[mime]))
  2253. elif len(line.split()) == 1:
  2254. mime = line.strip()
  2255. if mime in _MIME_HANDLERS:
  2256. print("%s %s" % (mime, _MIME_HANDLERS[mime]))
  2257. else:
  2258. print("No handler set for MIME type %s" % mime)
  2259. else:
  2260. mime, handler = line.split(" ", 1)
  2261. _MIME_HANDLERS[mime] = handler
  2262. if "%s" not in handler:
  2263. print("Are you sure you don't want to pass the filename to the handler?")
  2264. def do_abbrevs(self, *args):
  2265. """Print all Offpunk command abbreviations."""
  2266. header = "Command Abbreviations:"
  2267. self.stdout.write("\n{}\n".format(str(header)))
  2268. if self.ruler:
  2269. self.stdout.write("{}\n".format(str(self.ruler * len(header))))
  2270. for k, v in _ABBREVS.items():
  2271. self.stdout.write("{:<7} {}\n".format(k, v))
  2272. self.stdout.write("\n")
  2273. def do_offline(self, *args):
  2274. """Use Offpunk offline by only accessing cached content"""
  2275. if self.offline_only:
  2276. print("Offline and undisturbed.")
  2277. else:
  2278. self.offline_only = True
  2279. self.prompt = self.offline_prompt
  2280. print("Offpunk is now offline and will only access cached content")
  2281. def do_online(self, *args):
  2282. """Use Offpunk online with a direct connection"""
  2283. if self.offline_only:
  2284. self.offline_only = False
  2285. self.prompt = self.no_cert_prompt
  2286. print("Offpunk is online and will access the network")
  2287. else:
  2288. print("Already online. Try offline.")
  2289. def do_copy(self, *args):
  2290. """Copy the content of the last visited page as gemtext in the clipboard.
  2291. Use with "url" as argument to only copy the adress.
  2292. Use with "raw" to copy the content as seen in your terminal (not gemtext)"""
  2293. if self.gi:
  2294. if _HAS_XSEL:
  2295. if args and args[0] == "url":
  2296. subprocess.call(("echo %s |xsel -b -i" % self.gi.url), shell=True)
  2297. elif args and args[0] == "raw":
  2298. subprocess.call(("cat %s |xsel -b -i" % self._get_active_tmpfile()), shell=True)
  2299. else:
  2300. subprocess.call(("cat %s |xsel -b -i" % self.gi.get_body(as_file=True)), shell=True)
  2301. else:
  2302. print("Please install xsel to use copy")
  2303. else:
  2304. print("No content to copy, visit a page first")
  2305. ### Stuff for getting around
  2306. def do_go(self, line):
  2307. """Go to a gemini URL or marked item."""
  2308. line = line.strip()
  2309. if not line:
  2310. if shutil.which('xsel'):
  2311. clipboards = []
  2312. urls = []
  2313. clipboards.append(subprocess.check_output(['xsel','-p'],text=True))
  2314. clipboards.append(subprocess.check_output(['xsel','-s'],text=True))
  2315. clipboards.append(subprocess.check_output(['xsel','-b'],text=True))
  2316. for u in clipboards:
  2317. if looks_like_url(u) :
  2318. urls.append(u)
  2319. if len(urls) > 1:
  2320. self.lookup = []
  2321. for u in urls:
  2322. self.lookup.append(GeminiItem(u))
  2323. print("Where do you want to go today?")
  2324. self._show_lookup()
  2325. elif len(urls) == 1:
  2326. self.do_go(urls[0])
  2327. else:
  2328. print("Go where? (hint: simply copy an URL)")
  2329. else:
  2330. print("Go where? (hint: install xsel to go to copied URLs)")
  2331. # First, check for possible marks
  2332. elif line in self.marks:
  2333. gi = self.marks[line]
  2334. self._go_to_gi(gi)
  2335. # or a local file
  2336. elif os.path.exists(os.path.expanduser(line)):
  2337. self._go_to_gi(GeminiItem(line))
  2338. # If this isn't a mark, treat it as a URL
  2339. elif looks_like_url(line):
  2340. self._go_to_gi(GeminiItem(line))
  2341. @needs_gi
  2342. def do_reload(self, *args):
  2343. """Reload the current URL."""
  2344. if self.offline_only:
  2345. self.get_list("to_fetch")
  2346. r = self.list_add_line("to_fetch",gi=self.gi,verbose=False)
  2347. if r:
  2348. print("%s marked for syncing" %self.gi.url)
  2349. else:
  2350. print("%s already marked for syncing" %self.gi.url)
  2351. else:
  2352. self._go_to_gi(self.gi, check_cache=False)
  2353. @needs_gi
  2354. def do_up(self, *args):
  2355. """Go up one directory in the path."""
  2356. self._go_to_gi(self.gi.up())
  2357. def do_back(self, *args):
  2358. """Go back to the previous gemini item."""
  2359. histfile = self.get_list("history")
  2360. links = self.list_get_links("history")
  2361. if self.hist_index >= len(links) -1:
  2362. return
  2363. self.hist_index += 1
  2364. gi = links[self.hist_index]
  2365. self._go_to_gi(gi, update_hist=False)
  2366. def do_forward(self, *args):
  2367. """Go forward to the next gemini item."""
  2368. histfile = self.get_list("history")
  2369. links = self.list_get_links("history")
  2370. if self.hist_index <= 0:
  2371. return
  2372. self.hist_index -= 1
  2373. gi = links[self.hist_index]
  2374. self._go_to_gi(gi, update_hist=False)
  2375. def do_next(self, *args):
  2376. """Go to next item after current in index."""
  2377. return self.onecmd(str(self.index_index+1))
  2378. def do_previous(self, *args):
  2379. """Go to previous item before current in index."""
  2380. self.lookup = self.index
  2381. return self.onecmd(str(self.index_index-1))
  2382. @needs_gi
  2383. def do_root(self, *args):
  2384. """Go to root selector of the server hosting current item."""
  2385. self._go_to_gi(self.gi.root())
  2386. def do_tour(self, line):
  2387. """Add index items as waypoints on a tour, which is basically a FIFO
  2388. queue of gemini items.
  2389. Items can be added with `tour 1 2 3 4` or ranges like `tour 1-4`.
  2390. All items in current menu can be added with `tour *`.
  2391. Current item can be added back to the end of the tour with `tour .`.
  2392. Current tour can be listed with `tour ls` and scrubbed with `tour clear`."""
  2393. # Creating the tour list if needed
  2394. self.get_list("tour")
  2395. line = line.strip()
  2396. if not line:
  2397. # Fly to next waypoint on tour
  2398. if len(self.list_get_links("tour")) < 1:
  2399. print("End of tour.")
  2400. else:
  2401. url = self.list_go_to_line("1","tour")
  2402. if url:
  2403. self.list_rm_url(url,"tour")
  2404. elif line == "ls":
  2405. self.list_show("tour")
  2406. elif line == "clear":
  2407. for l in self.list_get_links("tour"):
  2408. self.list_rm_url(l.url,"tour")
  2409. elif line == "*":
  2410. for l in self.lookup:
  2411. self.list_add_line("tour",gi=l,verbose=False)
  2412. elif line == ".":
  2413. self.list_add_line("tour",verbose=False)
  2414. elif looks_like_url(line):
  2415. self.list_add_line("tour",gi=GeminiItem(line))
  2416. else:
  2417. for index in line.split():
  2418. try:
  2419. pair = index.split('-')
  2420. if len(pair) == 1:
  2421. # Just a single index
  2422. n = int(index)
  2423. gi = self.lookup[n-1]
  2424. self.list_add_line("tour",gi=gi,verbose=False)
  2425. elif len(pair) == 2:
  2426. # Two endpoints for a range of indices
  2427. if int(pair[0]) < int(pair[1]):
  2428. for n in range(int(pair[0]), int(pair[1]) + 1):
  2429. gi = self.lookup[n-1]
  2430. self.list_add_line("tour",gi=gi,verbose=False)
  2431. else:
  2432. for n in range(int(pair[0]), int(pair[1]) - 1, -1):
  2433. gi = self.lookup[n-1]
  2434. self.list_add_line("tour",gi=gi,verbose=False)
  2435. else:
  2436. # Syntax error
  2437. print("Invalid use of range syntax %s, skipping" % index)
  2438. except ValueError:
  2439. print("Non-numeric index %s, skipping." % index)
  2440. except IndexError:
  2441. print("Invalid index %d, skipping." % n)
  2442. @needs_gi
  2443. def do_mark(self, line):
  2444. """Mark the current item with a single letter. This letter can then
  2445. be passed to the 'go' command to return to the current item later.
  2446. Think of it like marks in vi: 'mark a'='ma' and 'go a'=''a'."""
  2447. line = line.strip()
  2448. if not line:
  2449. for mark, gi in self.marks.items():
  2450. print("[%s] %s (%s)" % (mark, gi.name, gi.url))
  2451. elif line.isalpha() and len(line) == 1:
  2452. self.marks[line] = self.gi
  2453. else:
  2454. print("Invalid mark, must be one letter")
  2455. @needs_gi
  2456. def do_info(self,line):
  2457. """Display information about current page."""
  2458. out = self.gi.full_title() + "\n\n"
  2459. out += "URL : " + self.gi.url + "\n"
  2460. out += "Path : " + self.gi.path + "\n"
  2461. out += "Mime : " + self.gi.get_mime() + "\n"
  2462. out += "Cache : " + self.gi.get_cache_path() + "\n"
  2463. if self.gi.renderer :
  2464. rend = str(self.gi.renderer.__class__)
  2465. rend = rend.lstrip("<class '__main__.").rstrip("'>")
  2466. else:
  2467. rende = "None"
  2468. out += "Renderer : " + rend + "\n\n"
  2469. lists = []
  2470. for l in self.list_lists():
  2471. if self.list_has_url(self.gi.url,l):
  2472. lists.append(l)
  2473. if len(lists) > 0:
  2474. out += "Page appeard in following lists :\n"
  2475. for l in lists:
  2476. if not self.list_is_system(l):
  2477. status = "normal list"
  2478. if self.list_is_subscribed(l):
  2479. status = "subscription"
  2480. elif self.list_is_frozen(l):
  2481. status = "frozen list"
  2482. out += " * %s\t(%s)\n" %(l,status)
  2483. for l in lists:
  2484. if self.list_is_system(l):
  2485. out += " * %s\n" %l
  2486. else:
  2487. out += "Page is not save in any list"
  2488. print(out)
  2489. def do_version(self, line):
  2490. """Display version and system information."""
  2491. def has(value):
  2492. if value:
  2493. return "\t\x1b[1;32mInstalled\x1b[0m\n"
  2494. else:
  2495. return "\t\x1b[1;31mNot Installed\x1b[0m\n"
  2496. output = "Offpunk " + _VERSION + "\n"
  2497. output += "===========\n"
  2498. output += " - python-editor : " + has(_HAS_EDITOR)
  2499. output += " - python-ansiwrap : " + has(_HAS_ANSIWRAP)
  2500. output += " - python-pil : " + has(_HAS_PIL)
  2501. output += " - python-cryptography : " + has(_HAS_CRYPTOGRAPHY)
  2502. output += " - python-magic : " + has(_HAS_MAGIC)
  2503. output += " - python-requests : " + has(_DO_HTTP)
  2504. output += " - python-feedparser : " + has(_DO_FEED)
  2505. output += " - python-bs4 : " + has(_HAS_SOUP)
  2506. output += " - python-readability : " + has(_HAS_READABILITY)
  2507. output += " - python-xdg : " + has(_HAS_XDG)
  2508. output += " - python-setproctitle : " + has(_HAS_SETPROCTITLE)
  2509. output += " - xdg-open : " + has(_HAS_XDGOPEN)
  2510. output += " - chafa : " + has(_HAS_CHAFA)
  2511. output += " - xsel : " + has(_HAS_XSEL)
  2512. output += "\nFeatures :\n"
  2513. output += " - Render images (ansiwrap,pil,chafa) : " + has(_RENDER_IMAGE)
  2514. output += " - Render HTML (bs4, readability) : " + has(_DO_HTML)
  2515. output += " - Render Atom/RSS feeds (feedparser) : " + has(_DO_FEED)
  2516. output += " - Connect to http/https (requests) : " + has(_DO_HTTP)
  2517. output += " - copy to/from clipboard (xsel) : " + has(_HAS_XSEL)
  2518. output += " - restore last position (less 572+) : " + has(_LESS_RESTORE_POSITION)
  2519. output += "\n"
  2520. output += "Config directory : " + _CONFIG_DIR + "\n"
  2521. output += "User Data directory : " + _DATA_DIR + "\n"
  2522. output += "Cache directoy : " + _CACHE_PATH
  2523. print(output)
  2524. ### Stuff that modifies the lookup table
  2525. def do_ls(self, line):
  2526. """List contents of current index.
  2527. Use 'ls -l' to see URLs."""
  2528. self.lookup = self.index
  2529. self._show_lookup(url = "-l" in line)
  2530. self.page_index = 0
  2531. def do_gus(self, line):
  2532. """Submit a search query to the geminispace.info search engine."""
  2533. gus = GeminiItem("gemini://geminispace.info/search")
  2534. self._go_to_gi(gus.query(line))
  2535. def do_history(self, *args):
  2536. """Display history."""
  2537. self.list_show("history")
  2538. def do_find(self, searchterm):
  2539. """Find in the list of links (case insensitive)."""
  2540. results = [
  2541. gi for gi in self.lookup if searchterm.lower() in gi.name.lower()]
  2542. if results:
  2543. self.lookup = results
  2544. self._show_lookup()
  2545. self.page_index = 0
  2546. else:
  2547. print("No results found.")
  2548. def emptyline(self):
  2549. """Page through index ten lines at a time."""
  2550. i = self.page_index
  2551. if i > len(self.lookup):
  2552. return
  2553. self._show_lookup(offset=i, end=i+10)
  2554. self.page_index += 10
  2555. ### Stuff that does something to most recently viewed item
  2556. @needs_gi
  2557. def do_cat(self, *args):
  2558. """Run most recently visited item through "cat" command."""
  2559. subprocess.call(shlex.split("cat %s" % self._get_active_tmpfile()))
  2560. @needs_gi
  2561. def do_less(self, *args):
  2562. """Run most recently visited item through "less" command, restoring \
  2563. previous position.
  2564. Use "less full" to see a complete html page instead of the article view.
  2565. (the "full" argument has no effect on Gemtext content but doesn’t restore position)."""
  2566. if self.gi and args and args[0] == "full":
  2567. self._go_to_gi(self.gi,readable=False)
  2568. elif self.gi.is_cache_valid():
  2569. less_cmd(self._get_active_tmpfile(),histfile=self.less_histfile)
  2570. else:
  2571. self.do_go(self.gi.url)
  2572. @needs_gi
  2573. def do_open(self, *args):
  2574. """Open current item with the configured handler or xdg-open.
  2575. see "handler" command to set your own."""
  2576. cmd_str = self._get_handler_cmd(self.gi.get_mime())
  2577. file_path = "\"%s\"" %self.gi.get_body(as_file=True)
  2578. cmd_str = cmd_str % file_path
  2579. subprocess.call(cmd_str,shell=True)
  2580. @needs_gi
  2581. def do_fold(self, *args):
  2582. """Run most recently visited item through "fold" command."""
  2583. cmd_str = _DEFAULT_LESS % self._get_active_tmpfile()
  2584. subprocess.call("%s | fold -w 70 -s" % cmd_str, shell=True)
  2585. @restricted
  2586. @needs_gi
  2587. def do_shell(self, line):
  2588. """'cat' most recently visited item through a shell pipeline."""
  2589. subprocess.call(("cat %s |" % self._get_active_tmpfile()) + line, shell=True)
  2590. @restricted
  2591. @needs_gi
  2592. def do_save(self, line):
  2593. """Save an item to the filesystem.
  2594. 'save n filename' saves menu item n to the specified filename.
  2595. 'save filename' saves the last viewed item to the specified filename.
  2596. 'save n' saves menu item n to an automagic filename."""
  2597. args = line.strip().split()
  2598. # First things first, figure out what our arguments are
  2599. if len(args) == 0:
  2600. # No arguments given at all
  2601. # Save current item, if there is one, to a file whose name is
  2602. # inferred from the gemini path
  2603. if not self.gi.is_cache_valid():
  2604. print("You cannot save if not cached!")
  2605. return
  2606. else:
  2607. index = None
  2608. filename = None
  2609. elif len(args) == 1:
  2610. # One argument given
  2611. # If it's numeric, treat it as an index, and infer the filename
  2612. try:
  2613. index = int(args[0])
  2614. filename = None
  2615. # If it's not numeric, treat it as a filename and
  2616. # save the current item
  2617. except ValueError:
  2618. index = None
  2619. filename = os.path.expanduser(args[0])
  2620. elif len(args) == 2:
  2621. # Two arguments given
  2622. # Treat first as an index and second as filename
  2623. index, filename = args
  2624. try:
  2625. index = int(index)
  2626. except ValueError:
  2627. print("First argument is not a valid item index!")
  2628. return
  2629. filename = os.path.expanduser(filename)
  2630. else:
  2631. print("You must provide an index, a filename, or both.")
  2632. return
  2633. # Next, fetch the item to save, if it's not the current one.
  2634. if index:
  2635. last_gi = self.gi
  2636. try:
  2637. gi = self.lookup[index-1]
  2638. self._go_to_gi(gi, update_hist = False, handle = False)
  2639. except IndexError:
  2640. print ("Index too high!")
  2641. self.gi = last_gi
  2642. return
  2643. else:
  2644. gi = self.gi
  2645. # Derive filename from current GI's path, if one hasn't been set
  2646. if not filename:
  2647. filename = gi.get_filename()
  2648. # Check for filename collisions and actually do the save if safe
  2649. if os.path.exists(filename):
  2650. print("File %s already exists!" % filename)
  2651. else:
  2652. # Don't use _get_active_tmpfile() here, because we want to save the
  2653. # "source code" of menus, not the rendered view - this way Offpunk
  2654. # can navigate to it later.
  2655. print("Saved to %s" % filename)
  2656. shutil.copyfile(gi.get_body(as_file=True), filename)
  2657. # Restore gi if necessary
  2658. if index != None:
  2659. self._go_to_gi(last_gi, handle=False)
  2660. @needs_gi
  2661. def do_url(self, *args):
  2662. """Print URL of most recently visited item."""
  2663. print(self.gi.url)
  2664. ### Bookmarking stuff
  2665. @restricted
  2666. @needs_gi
  2667. def do_add(self, line):
  2668. """Add the current URL to the list specied as argument.
  2669. If no argument given, URL is added to Bookmarks."""
  2670. args = line.split()
  2671. if len(args) < 1 :
  2672. self.list_add_line("bookmarks")
  2673. else:
  2674. self.list_add_line(args[0])
  2675. # Get the list file name, creating or migrating it if needed.
  2676. # Migrate bookmarks/tour/to_fetch from XDG_CONFIG to XDG_DATA
  2677. # We migrate only if the file exists in XDG_CONFIG and not XDG_DATA
  2678. def get_list(self,list):
  2679. list_path = self.list_path(list)
  2680. if not list_path:
  2681. old_file_gmi = os.path.join(_CONFIG_DIR,list + ".gmi")
  2682. old_file_nogmi = os.path.join(_CONFIG_DIR,list)
  2683. target = os.path.join(_DATA_DIR,"lists")
  2684. if os.path.exists(old_file_gmi):
  2685. shutil.move(old_file_gmi,target)
  2686. elif os.path.exists(old_file_nogmi):
  2687. targetgmi = os.path.join(target,list+".gmi")
  2688. shutil.move(old_file_nogmi,targetgmi)
  2689. else:
  2690. if list == "subscribed":
  2691. title = "Subscriptions #subscribed (new links in those pages will be added to tour)"
  2692. elif list == "to_fetch":
  2693. title = "Links requested and to be fetched during the next --sync"
  2694. else:
  2695. title = None
  2696. self.list_create(list, title=title)
  2697. list_path = self.list_path(list)
  2698. return list_path
  2699. def do_subscribe(self,line):
  2700. """Subscribe to current page by saving it in the "subscribed" list.
  2701. If a new link is found in the page during a --sync, the new link is automatically
  2702. fetched and added to your next tour.
  2703. To unsubscribe, remove the page from the "subscribed" list."""
  2704. subs = self.gi.get_subscribe_links()
  2705. if len(subs) > 1:
  2706. stri = "Multiple feeds have been found :\n\n"
  2707. counter = 0
  2708. for l in subs:
  2709. stri += "[%s] %s [%s]\n"%(counter+1,l[0],l[1])
  2710. counter += 1
  2711. stri += "\n\n"
  2712. stri += "Which feed do you want to subsribe ? > "
  2713. ans = input(stri)
  2714. if ans.isdigit() and 0 < int(ans) <= len(subs):
  2715. sublink,mime,title = subs[int(ans)-1]
  2716. else:
  2717. sublink,title = None,None
  2718. else:
  2719. sublink,mime,title = subs[0]
  2720. if sublink:
  2721. gi = GeminiItem(sublink,name=title)
  2722. list_path = self.get_list("subscribed")
  2723. added = self.list_add_line("subscribed",gi=gi,verbose=False)
  2724. if added :
  2725. print("Subscribed to %s" %sublink)
  2726. else:
  2727. print("You are already subscribed to %s"%sublink)
  2728. def do_bookmarks(self, line):
  2729. """Show or access the bookmarks menu.
  2730. 'bookmarks' shows all bookmarks.
  2731. 'bookmarks n' navigates immediately to item n in the bookmark menu.
  2732. Bookmarks are stored using the 'add' command."""
  2733. list_path = self.get_list("bookmarks")
  2734. args = line.strip()
  2735. if len(args.split()) > 1 or (args and not args.isnumeric()):
  2736. print("bookmarks command takes a single integer argument!")
  2737. elif args:
  2738. self.list_go_to_line(args,"bookmarks")
  2739. else:
  2740. self.list_show("bookmarks")
  2741. def do_archive(self,args):
  2742. """Archive current page by removing it from every list and adding it to
  2743. archives, which is a special historical list limited in size. It is similar to `move archives`."""
  2744. for li in self.list_lists():
  2745. if li not in ["archives", "history"]:
  2746. deleted = self.list_rm_url(self.gi.url,li)
  2747. if deleted:
  2748. print("Removed from %s"%li)
  2749. self.list_add_top("archives",limit=self.options["archives_size"])
  2750. print("Archiving: %s"%self.gi.full_title())
  2751. print("\x1b[2;34mCurrent maximum size of archives : %s\x1b[0m" %self.options["archives_size"])
  2752. def list_add_line(self,list,gi=None,verbose=True):
  2753. list_path = self.list_path(list)
  2754. if not list_path:
  2755. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  2756. return False
  2757. else:
  2758. if not gi:
  2759. gi = self.gi
  2760. # first we check if url already exists in the file
  2761. with open(list_path,"r") as l_file:
  2762. lines = l_file.readlines()
  2763. l_file.close()
  2764. for l in lines:
  2765. sp = l.split()
  2766. if gi.url in sp:
  2767. if verbose:
  2768. print("%s already in %s."%(gi.url,list))
  2769. return False
  2770. with open(list_path,"a") as l_file:
  2771. l_file.write(gi.to_map_line())
  2772. l_file.close()
  2773. if verbose:
  2774. print("%s added to %s" %(gi.url,list))
  2775. return True
  2776. def list_add_top(self,list,limit=0,truncate_lines=0):
  2777. if not self.gi:
  2778. return
  2779. stri = self.gi.to_map_line().strip("\n")
  2780. if list == "archives":
  2781. stri += ", archived on "
  2782. elif list == "history":
  2783. stri += ", visited on "
  2784. else:
  2785. stri += ", added to %s on "%list
  2786. stri += time.ctime() + "\n"
  2787. list_path = self.get_list(list)
  2788. with open(list_path,"r") as l_file:
  2789. lines = l_file.readlines()
  2790. l_file.close()
  2791. with open(list_path,"w") as l_file:
  2792. l_file.write(stri)
  2793. counter = 0
  2794. to_truncate = truncate_lines
  2795. for l in lines:
  2796. if to_truncate > 0:
  2797. to_truncate -= 1
  2798. elif limit == 0 or counter < limit:
  2799. l_file.write(l)
  2800. counter += 1
  2801. l_file.close()
  2802. # remove an url from a list.
  2803. # return True if the URL was removed
  2804. # return False if the URL was not found
  2805. def list_rm_url(self,url,list):
  2806. return self.list_has_url(url,list,deletion=True)
  2807. # deletion and has_url are so similar, I made them the same method
  2808. def list_has_url(self,url,list,deletion=False):
  2809. list_path = self.list_path(list)
  2810. if list_path:
  2811. to_return = False
  2812. with open(list_path,"r") as lf:
  2813. lines = lf.readlines()
  2814. lf.close()
  2815. to_write = []
  2816. for l in lines:
  2817. # we separate components of the line
  2818. # to ensure we identify a complete URL, not a part of it
  2819. splitted = l.split()
  2820. if url not in splitted:
  2821. #sometimes, we must remove the ending "/"
  2822. if url.endswith("/") and url[:-1] in splitted:
  2823. to_return = True
  2824. else:
  2825. to_write.append(l)
  2826. else:
  2827. to_return = True
  2828. if deletion :
  2829. with open(list_path,"w") as lf:
  2830. for l in to_write:
  2831. lf.write(l)
  2832. lf.close()
  2833. return to_return
  2834. else:
  2835. return False
  2836. def list_get_links(self,list):
  2837. list_path = self.list_path(list)
  2838. if list_path:
  2839. gi = GeminiItem("file://" + list_path)
  2840. return gi.get_links()
  2841. else:
  2842. return []
  2843. def list_go_to_line(self,line,list):
  2844. list_path = self.list_path(list)
  2845. if not list_path:
  2846. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  2847. elif not line.isnumeric():
  2848. print("go_to_line requires a number as parameter")
  2849. else:
  2850. gi = GeminiItem("file://" + list_path,list)
  2851. gi = gi.get_link(int(line))
  2852. display = not self.sync_only
  2853. self._go_to_gi(gi,handle=display)
  2854. return gi.url
  2855. def list_show(self,list):
  2856. list_path = self.list_path(list)
  2857. if not list_path:
  2858. print("List %s does not exist. Create it with ""list create %s"""%(list,list))
  2859. else:
  2860. gi = GeminiItem("file://" + list_path,list)
  2861. display = not self.sync_only
  2862. self._go_to_gi(gi,handle=display)
  2863. #return the path of the list file if list exists.
  2864. #return None if the list doesn’t exist.
  2865. def list_path(self,list):
  2866. listdir = os.path.join(_DATA_DIR,"lists")
  2867. list_path = os.path.join(listdir, "%s.gmi"%list)
  2868. if os.path.exists(list_path):
  2869. return list_path
  2870. else:
  2871. return None
  2872. def list_create(self,list,title=None):
  2873. list_path = self.list_path(list)
  2874. if list in ["create","edit","delete","help"]:
  2875. print("%s is not allowed as a name for a list"%list)
  2876. elif not list_path:
  2877. listdir = os.path.join(_DATA_DIR,"lists")
  2878. os.makedirs(listdir,exist_ok=True)
  2879. list_path = os.path.join(listdir, "%s.gmi"%list)
  2880. with open(list_path,"a") as lfile:
  2881. if title:
  2882. lfile.write("# %s\n"%title)
  2883. else:
  2884. lfile.write("# %s\n"%list)
  2885. lfile.close()
  2886. print("list created. Display with `list %s`"%list)
  2887. else:
  2888. print("list %s already exists" %list)
  2889. def do_move(self,arg):
  2890. """move LIST will add the current page to the list LIST.
  2891. With a major twist: current page will be removed from all other lists.
  2892. If current page was not in a list, this command is similar to `add LIST`."""
  2893. if not arg:
  2894. print("LIST argument is required as the target for your move")
  2895. elif arg[0] == "archives":
  2896. self.do_archive()
  2897. else:
  2898. args = arg.split()
  2899. list_path = self.list_path(args[0])
  2900. if not list_path:
  2901. print("%s is not a list, aborting the move" %args[0])
  2902. else:
  2903. lists = self.list_lists()
  2904. for l in lists:
  2905. if l != args[0] and l not in ["archives", "history"]:
  2906. isremoved = self.list_rm_url(self.gi.url,l)
  2907. if isremoved:
  2908. print("Removed from %s"%l)
  2909. self.list_add_line(args[0])
  2910. def list_lists(self):
  2911. listdir = os.path.join(_DATA_DIR,"lists")
  2912. to_return = []
  2913. if os.path.exists(listdir):
  2914. lists = os.listdir(listdir)
  2915. if len(lists) > 0:
  2916. for l in lists:
  2917. #removing the .gmi at the end of the name
  2918. to_return.append(l[:-4])
  2919. return to_return
  2920. def list_has_status(self,list,status):
  2921. path = self.list_path(list)
  2922. toreturn = False
  2923. if path:
  2924. with open(path) as f:
  2925. line = f.readline().strip()
  2926. f.close()
  2927. if line.startswith("#") and status in line:
  2928. toreturn = True
  2929. return toreturn
  2930. def list_is_subscribed(self,list):
  2931. return self.list_has_status(list,"#subscribed")
  2932. def list_is_frozen(self,list):
  2933. return self.list_has_status(list,"#frozen")
  2934. def list_is_system(self,list):
  2935. return list in ["history","to_fetch","archives","tour"]
  2936. # This modify the status of a list to one of :
  2937. # normal, frozen, subscribed
  2938. # action is either #frozen, #subscribed or None
  2939. def list_modify(self,list,action=None):
  2940. path = self.list_path(list)
  2941. with open(path) as f:
  2942. lines = f.readlines()
  2943. f.close()
  2944. if lines[0].strip().startswith("#"):
  2945. first_line = lines.pop(0).strip("\n")
  2946. else:
  2947. first_line = "# %s "%list
  2948. first_line = first_line.replace("#subscribed","").replace("#frozen","")
  2949. if action:
  2950. first_line += " " + action
  2951. print("List %s has been marked as %s"%(list,action))
  2952. else:
  2953. print("List %s is now a normal list" %list)
  2954. first_line += "\n"
  2955. lines.insert(0,first_line)
  2956. with open(path,"w") as f:
  2957. for line in lines:
  2958. f.write(line)
  2959. f.close()
  2960. def do_list(self,arg):
  2961. """Manage list of bookmarked pages.
  2962. - list : display available lists
  2963. - list $LIST : display pages in $LIST
  2964. - list create $NEWLIST : create a new list
  2965. - list edit $LIST : edit the list
  2966. - list subscribe $LIST : during sync, add new links found in listed pages to tour
  2967. - list freeze $LIST : don’t update pages in list during sync if a cache already exists
  2968. - list normal $LIST : update pages in list during sync but don’t add anything to tour
  2969. - list delete $LIST : delete a list permanently (a confirmation is required)
  2970. - list help : print this help
  2971. See also :
  2972. - add $LIST (to add current page to $LIST or, by default, to bookmarks)
  2973. - move $LIST (to add current page to list while removing from all others)
  2974. - archive (to remove current page from all lists while adding to archives)"""
  2975. listdir = os.path.join(_DATA_DIR,"lists")
  2976. os.makedirs(listdir,exist_ok=True)
  2977. if not arg:
  2978. lists = self.list_lists()
  2979. if len(lists) > 0:
  2980. lgi = GeminiItem(listdir, "My lists")
  2981. self._go_to_gi(lgi)
  2982. else:
  2983. print("No lists yet. Use `list create`")
  2984. else:
  2985. args = arg.split()
  2986. if args[0] == "create":
  2987. if len(args) > 2:
  2988. name = " ".join(args[2:])
  2989. self.list_create(args[1].lower(),title=name)
  2990. elif len(args) == 2:
  2991. self.list_create(args[1].lower())
  2992. else:
  2993. print("A name is required to create a new list. Use `list create NAME`")
  2994. elif args[0] == "edit":
  2995. if not _HAS_EDITOR:
  2996. print("Please install python-editor to edit you lists")
  2997. elif len(args) > 1:
  2998. if args[1] in self.list_lists():
  2999. path = os.path.join(listdir,args[1]+".gmi")
  3000. editor.edit(path)
  3001. else:
  3002. print("A valid list name is required to edit a list")
  3003. else:
  3004. print("A valid list name is required to edit a list")
  3005. elif args[0] == "delete":
  3006. if len(args) > 1:
  3007. if self.list_is_system(args[1]):
  3008. print("%s is a system list which cannot be deleted"%args[1])
  3009. elif args[1] in self.list_lists():
  3010. size = len(self.list_get_links(args[1]))
  3011. stri = "Are you sure you want to delete %s ?\n"%args[1]
  3012. confirm = "YES"
  3013. if size > 0:
  3014. stri += "! %s items in the list will be lost !\n"%size
  3015. confirm = "YES DELETE %s" %size
  3016. else :
  3017. stri += "The list is empty, it should be safe to delete it.\n"
  3018. stri += "Type \"%s\" (in capital, without quotes) to confirm :"%confirm
  3019. answer = input(stri)
  3020. if answer == confirm:
  3021. path = os.path.join(listdir,args[1]+".gmi")
  3022. os.remove(path)
  3023. print("* * * %s has been deleted" %args[1])
  3024. else:
  3025. print("A valid list name is required to be deleted")
  3026. else:
  3027. print("A valid list name is required to be deleted")
  3028. elif args[0] in ["subscribe","freeze","normal"]:
  3029. if len(args) > 1:
  3030. if self.list_is_system(args[1]):
  3031. print("You cannot modify %s which is a system list"%args[1])
  3032. elif args[1] in self.list_lists():
  3033. if args[0] == "subscribe":
  3034. action = "#subscribed"
  3035. elif args[0] == "freeze":
  3036. action = "#frozen"
  3037. else:
  3038. action = None
  3039. self.list_modify(args[1],action=action)
  3040. else:
  3041. print("A valid list name is required after %s" %args[0])
  3042. elif args[0] == "help":
  3043. self.onecmd("help list")
  3044. elif len(args) == 1:
  3045. self.list_show(args[0].lower())
  3046. else:
  3047. self.list_go_to_line(args[1],args[0].lower())
  3048. def do_help(self, arg):
  3049. """ALARM! Recursion detected! ALARM! Prepare to eject!"""
  3050. if arg == "!":
  3051. print("! is an alias for 'shell'")
  3052. elif arg == "?":
  3053. print("? is an alias for 'help'")
  3054. elif arg in _ABBREVS:
  3055. full_cmd = _ABBREVS[arg]
  3056. print("%s is aan alias for '%s'" %(arg,full_cmd))
  3057. print("See the list of aliases with 'abbrevs'")
  3058. print("'help %s':"%full_cmd)
  3059. cmd.Cmd.do_help(self, full_cmd)
  3060. else:
  3061. cmd.Cmd.do_help(self, arg)
  3062. ### Flight recorder
  3063. def do_blackbox(self, *args):
  3064. """Display contents of flight recorder, showing statistics for the
  3065. current gemini browsing session."""
  3066. lines = []
  3067. # Compute flight time
  3068. now = time.time()
  3069. delta = now - self.log["start_time"]
  3070. hours, remainder = divmod(delta, 3600)
  3071. minutes, seconds = divmod(remainder, 60)
  3072. # Count hosts
  3073. ipv4_hosts = len([host for host in self.visited_hosts if host[0] == socket.AF_INET])
  3074. ipv6_hosts = len([host for host in self.visited_hosts if host[0] == socket.AF_INET6])
  3075. # Assemble lines
  3076. lines.append(("Patrol duration", "%02d:%02d:%02d" % (hours, minutes, seconds)))
  3077. lines.append(("Requests sent:", self.log["requests"]))
  3078. lines.append((" IPv4 requests:", self.log["ipv4_requests"]))
  3079. lines.append((" IPv6 requests:", self.log["ipv6_requests"]))
  3080. lines.append(("Bytes received:", self.log["bytes_recvd"]))
  3081. lines.append((" IPv4 bytes:", self.log["ipv4_bytes_recvd"]))
  3082. lines.append((" IPv6 bytes:", self.log["ipv6_bytes_recvd"]))
  3083. lines.append(("Unique hosts visited:", len(self.visited_hosts)))
  3084. lines.append((" IPv4 hosts:", ipv4_hosts))
  3085. lines.append((" IPv6 hosts:", ipv6_hosts))
  3086. lines.append(("DNS failures:", self.log["dns_failures"]))
  3087. lines.append(("Timeouts:", self.log["timeouts"]))
  3088. lines.append(("Refused connections:", self.log["refused_connections"]))
  3089. lines.append(("Reset connections:", self.log["reset_connections"]))
  3090. lines.append(("Cache hits:", self.log["cache_hits"]))
  3091. # Print
  3092. for key, value in lines:
  3093. print(key.ljust(24) + str(value).rjust(8))
  3094. def do_sync(self, line):
  3095. """Synchronize all bookmarks lists.
  3096. - New elements in pages in subscribed lists will be added to tour
  3097. - Elements in list to_fetch will be retrieved and added to tour
  3098. - Normal lists will be synchronized and updated
  3099. - Frozen lists will be fetched only if not present.
  3100. Argument : duration of cache validity (in seconds)."""
  3101. if self.offline_only:
  3102. print("Sync can only be achieved online. Change status with `online`.")
  3103. return
  3104. args = line.split()
  3105. if len(args) > 0:
  3106. if not args[0].isdigit():
  3107. print("sync argument should be the cache validity expressed in seconds")
  3108. return
  3109. else:
  3110. validity = int(args[0])
  3111. else:
  3112. validity = 0
  3113. self.call_sync(refresh_time=validity)
  3114. def call_sync(self,refresh_time=0,depth=1):
  3115. # fetch_gitem is the core of the sync algorithm.
  3116. # It takes as input :
  3117. # - a GeminiItem to be fetched
  3118. # - depth : the degree of recursion to build the cache (0 means no recursion)
  3119. # - validity : the age, in seconds, existing caches need to have before
  3120. # being refreshed (0 = never refreshed if it already exists)
  3121. # - savetotour : if True, newly cached items are added to tour
  3122. def add_to_tour(gitem):
  3123. if gitem.is_cache_valid():
  3124. print(" -> adding to tour: %s" %gitem.url)
  3125. self.list_add_line("tour",gi=gitem,verbose=False)
  3126. return True
  3127. else:
  3128. return False
  3129. def fetch_gitem(gitem,depth=0,validity=0,savetotour=False,count=[0,0],strin=""):
  3130. #savetotour = True will save to tour newly cached content
  3131. # else, do not save to tour
  3132. #regardless of valitidy
  3133. if not gitem.is_cache_valid(validity=validity):
  3134. if strin != "":
  3135. endline = '\r'
  3136. else:
  3137. endline = None
  3138. #Did we already had a cache (even an old one) ?
  3139. isnew = not gitem.is_cache_valid()
  3140. print("%s [%s/%s] Fetch "%(strin,count[0],count[1]),gitem.url,end=endline)
  3141. self._go_to_gi(gitem,update_hist=False)
  3142. if savetotour and isnew and gitem.is_cache_valid():
  3143. #we add to the next tour only if we managed to cache
  3144. #the ressource
  3145. add_to_tour(gitem)
  3146. #Now, recursive call, even if we didn’t refresh the cache
  3147. if depth > 0:
  3148. #we only savetotour at the first level of recursion
  3149. if depth > 1:
  3150. savetotour=False
  3151. links = gitem.get_links()
  3152. subcount = [0,len(links)]
  3153. d = depth - 1
  3154. for k in links:
  3155. #recursive call (validity is always 0 in recursion)
  3156. substri = strin + " -->"
  3157. subcount[0] += 1
  3158. fetch_gitem(k,depth=d,validity=0,savetotour=savetotour,\
  3159. count=subcount,strin=substri)
  3160. def fetch_list(list,validity=0,depth=1,tourandremove=False,tourchildren=False):
  3161. links = self.list_get_links(list)
  3162. end = len(links)
  3163. counter = 0
  3164. print(" * * * %s to fetch in %s * * *" %(end,list))
  3165. for l in links:
  3166. counter += 1
  3167. fetch_gitem(l,depth=depth,validity=validity,savetotour=tourchildren,count=[counter,end])
  3168. if tourandremove:
  3169. if add_to_tour(l):
  3170. self.list_rm_url(l.url,list)
  3171. self.sync_only = True
  3172. lists = self.list_lists()
  3173. # We will fetch all the lists except "archives" and "history"
  3174. # We keep tour for the last round
  3175. subscriptions = []
  3176. normal_lists = []
  3177. fridge = []
  3178. for l in lists:
  3179. if not self.list_is_system(l):
  3180. if self.list_is_frozen(l):
  3181. fridge.append(l)
  3182. elif self.list_is_subscribed(l):
  3183. subscriptions.append(l)
  3184. else:
  3185. normal_lists.append(l)
  3186. # We start with the "subscribed" as we need to find new items
  3187. for l in subscriptions:
  3188. fetch_list(l,validity=refresh_time,depth=depth,tourchildren=True)
  3189. #Then the fetch list (item are removed from the list after fetch)
  3190. if "to_fetch" in lists:
  3191. fetch_list("to_fetch",validity=refresh_time,depth=depth,tourandremove=True)
  3192. #then we fetch all the rest (including bookmarks and tour)
  3193. for l in normal_lists:
  3194. fetch_list(l,validity=refresh_time,depth=depth)
  3195. for l in fridge:
  3196. fetch_list(l,validity=0,depth=depth)
  3197. #tour should be the last one as item my be added to it by others
  3198. fetch_list("tour",validity=refresh_time,depth=depth)
  3199. print("End of sync")
  3200. self.sync_only = False
  3201. ### The end!
  3202. def do_quit(self, *args):
  3203. """Exit Offpunk."""
  3204. def unlink(filename):
  3205. if filename and os.path.exists(filename):
  3206. os.unlink(filename)
  3207. # Close TOFU DB
  3208. self.db_conn.commit()
  3209. self.db_conn.close()
  3210. # Clean up after ourself
  3211. unlink(self.tmp_filename)
  3212. unlink(self.idx_filename)
  3213. unlink(self.less_histfile)
  3214. for cert in self.transient_certs_created:
  3215. for ext in (".crt", ".key"):
  3216. certfile = os.path.join(_CONFIG_DIR, "transient_certs", cert+ext)
  3217. if os.path.exists(certfile):
  3218. os.remove(certfile)
  3219. print("You can close your screen!")
  3220. sys.exit()
  3221. do_exit = do_quit
  3222. # Main function
  3223. def main():
  3224. # Parse args
  3225. parser = argparse.ArgumentParser(description='A command line gemini client.')
  3226. parser.add_argument('--bookmarks', action='store_true',
  3227. help='start with your list of bookmarks')
  3228. parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file')
  3229. parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file')
  3230. parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands')
  3231. parser.add_argument('--sync', action='store_true',
  3232. help='run non-interactively to build cache by exploring bookmarks')
  3233. parser.add_argument('--assume-yes', action='store_true',
  3234. help='assume-yes when asked questions about certificates/redirections during sync')
  3235. parser.add_argument('--disable-http',action='store_true',
  3236. help='do not try to get http(s) links (but already cached will be displayed)')
  3237. parser.add_argument('--fetch-later', action='store_true',
  3238. help='run non-interactively with an URL as argument to fetch it later')
  3239. parser.add_argument('--depth',
  3240. help='depth of the cache to build. Default is 1. More is crazy. Use at your own risks!')
  3241. parser.add_argument('--cache-validity',
  3242. help='duration for which a cache is valid before sync (seconds)')
  3243. parser.add_argument('--version', action='store_true',
  3244. help='display version information and quit')
  3245. parser.add_argument('url', metavar='URL', nargs='*',
  3246. help='start with this URL')
  3247. args = parser.parse_args()
  3248. # Handle --version
  3249. if args.version:
  3250. print("Offpunk " + _VERSION)
  3251. sys.exit()
  3252. # Instantiate client
  3253. gc = GeminiClient(restricted=args.restricted,synconly=args.sync)
  3254. torun_queue = []
  3255. # Act on args
  3256. if args.tls_cert:
  3257. # If tls_key is None, python will attempt to load the key from tls_cert.
  3258. gc._activate_client_cert(args.tls_cert, args.tls_key)
  3259. if args.bookmarks:
  3260. torun_queue.append("bookmarks")
  3261. elif args.url:
  3262. if len(args.url) == 1:
  3263. torun_queue.append("go %s" % args.url[0])
  3264. else:
  3265. for url in args.url:
  3266. torun_queue.append("tour %s" % url)
  3267. torun_queue.append("tour")
  3268. if args.disable_http:
  3269. gc.support_http = False
  3270. # Endless interpret loop (except while --sync or --fetch-later)
  3271. if args.fetch_later:
  3272. if args.url:
  3273. gc.sync_only = True
  3274. for u in args.url:
  3275. gi = GeminiItem(u)
  3276. if gi and gi.is_cache_valid():
  3277. gc.list_add_line("tour",gi)
  3278. else:
  3279. gc.list_add_line("to_fetch",gi)
  3280. else:
  3281. print("--fetch-later requires an URL (or a list of URLS) as argument")
  3282. elif args.sync:
  3283. if args.assume_yes:
  3284. gc.automatic_choice = "y"
  3285. if args.cache_validity:
  3286. refresh_time = int(args.cache_validity)
  3287. else:
  3288. # if no refresh time, a default of 0 is used (which means "infinite")
  3289. refresh_time = 0
  3290. if args.depth:
  3291. depth = int(args.depth)
  3292. else:
  3293. depth = 1
  3294. gc.call_sync(refresh_time=refresh_time,depth=depth)
  3295. gc.onecmd("blackbox")
  3296. else:
  3297. # We are in the normal mode. First process config file
  3298. rcfile = os.path.join(_CONFIG_DIR, "offpunkrc")
  3299. if os.path.exists(rcfile):
  3300. print("Using config %s" % rcfile)
  3301. with open(rcfile, "r") as fp:
  3302. for line in fp:
  3303. line = line.strip()
  3304. if ((args.bookmarks or args.url) and
  3305. any((line.startswith(x) for x in ("go", "g", "tour", "t")))
  3306. ):
  3307. if args.bookmarks:
  3308. print("Skipping rc command \"%s\" due to --bookmarks option." % line)
  3309. else:
  3310. print("Skipping rc command \"%s\" due to provided URLs." % line)
  3311. continue
  3312. torun_queue.append(line)
  3313. print("Welcome to Offpunk!")
  3314. if args.restricted:
  3315. print("Restricted mode engaged!")
  3316. print("Type `help` to get the list of available command.")
  3317. for line in torun_queue:
  3318. gc.onecmd(line)
  3319. gc.cmdloop()
  3320. if __name__ == '__main__':
  3321. main()