123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- #!/usr/bin/env python3
- # vim: tabstop=4 shiftwidth=4 expandtab
- import re
- import urllib.parse
- urllib.parse.uses_relative.append("gemini")
- urllib.parse.uses_netloc.append("gemini")
- class URLHelper():
- def __init__(self, blocklist: str = "blocklist.txt"):
- self.blockrules: set = set()
- if not Path(blocklist).exists():
- return
- with open(blocklist) as blockfile:
- self.blockrules = set(blockfile.read().split("\n")) - {""}
- def isBlocked(self, url) -> bool:
- """
- Check whether a URL is blocked by the rules.
- This method calls :meth:`~URLHelper.resolve`.
- """
- url = self.resolve(url)
- for rule in self.blockrules:
- if url.startswith(rule):
- return True
- return False
- @classmethod
- def mightBeAURL(cls, url: str) -> bool:
- """
- Naive URL validation.
- >>> URLHelper.mightBeAURL("gemini://example.com/feed")
- True
- >>> URLHelper.mightBeAURL("my feed")
- False
- """
- pattern = r'^[\w]+://[^/]+\.[^/]+.*'
- return bool(re.match(pattern, url))
- @classmethod
- def getNetLoc(cls, url: str) -> str:
- return urllib.parse.urlparse(url).netloc
- @classmethod
- def resolve(cls, url: str, url2: str = "") -> str:
- """
- Resolve relative paths in URLs.
- This method calls :meth:`~URLHelper.correct` beforehand.
- >>> URLHelper.resolve("gemini://example.com/1/../2")
- 'gemini://example.com/2'
- """
- url = urllib.parse.unquote(url)
- url2 = urllib.parse.unquote(url2)
- fullUrl = urllib.parse.urljoin(url, url2)
- if not fullUrl.startswith("gemini://"):
- fullUrl = f"gemini://{fullUrl}"
- parseResult = urllib.parse.urlparse(fullUrl)
- return f"{parseResult.scheme}://{parseResult.netloc}{parseResult.path}"
|