zertbert
/
gemini-antenna
ter-fork dari tinyrabbit/gemini-antenna


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
							#!/usr/bin/env python3
# vim: tabstop=4 shiftwidth=4 expandtab

import re
import urllib.parse
urllib.parse.uses_relative.append("gemini")
urllib.parse.uses_netloc.append("gemini")

class URLHelper():

    def __init__(self, blocklist: str = "blocklist.txt"):
        self.blockrules: set = set()
        if not Path(blocklist).exists():
            return

        with open(blocklist) as blockfile:
            self.blockrules = set(blockfile.read().split("\n")) - {""}

    def isBlocked(self, url) -> bool:
        """
        Check whether a URL is blocked by the rules.
        This method calls :meth:`~URLHelper.resolve`.
        """
        url = self.resolve(url)
        for rule in self.blockrules:
            if url.startswith(rule):
                return True
        return False

    @classmethod
    def mightBeAURL(cls, url: str) -> bool:
        """
        Naive URL validation.

        >>> URLHelper.mightBeAURL("gemini://example.com/feed")
        True
        >>> URLHelper.mightBeAURL("my feed")
        False
        """
        pattern = r'^[\w]+://[^/]+\.[^/]+.*'
        return bool(re.match(pattern, url))

    @classmethod
    def getNetLoc(cls, url: str) -> str:
        return urllib.parse.urlparse(url).netloc

    @classmethod
    def resolve(cls, url: str, url2: str = "") -> str:
        """
        Resolve relative paths in URLs.
        This method calls :meth:`~URLHelper.correct` beforehand.

        >>> URLHelper.resolve("gemini://example.com/1/../2")
        'gemini://example.com/2'
        """

        url = urllib.parse.unquote(url)
        url2 = urllib.parse.unquote(url2)
        fullUrl = urllib.parse.urljoin(url, url2)
        if not fullUrl.startswith("gemini://"):
            fullUrl = f"gemini://{fullUrl}"

        parseResult = urllib.parse.urlparse(fullUrl)

        return f"{parseResult.scheme}://{parseResult.netloc}{parseResult.path}"