Tarry_Dan
/
microblog.py
派生自 likho/microblog.py


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
							
import sys, os, traceback
import dateutil.parser

# returns html-formatted string
def make_buttons(btn_dict, msg_id):
    buttons = "<div class=\"buttons\">"
    fmt = "<a href=\"%s\">[%s]</a>"
    for key in btn_dict:
        url = btn_dict[key] 
        if url[-1] == '=':
        # then interpret it as a query string
            url += str(msg_id)
        buttons += fmt % (url,key)
    buttons += "</div>"
    return buttons

# apply div classes for use with .css
def make_post(num, timestamp, conf, msg):
    fmt  = conf["format"]
    if "buttons" in conf:
        b = make_buttons(conf["buttons"], num)
    else:
        b = ""
    return fmt.format(
        __timestamp__=timestamp, __num__=num, __msg__=msg, __btn__=b)

def make_gallery(indices, w, conf=None):
    tag = []
    if indices == []:
        return tag
    template = '''
<div class=\"panel\">
    <a href=\"%s\"><img src=\"%s\" class=\"embed\"></a>
</div>
''' 
    tag.append("<div class=\"gallery\">")
    for index in reversed(indices):
        image = w.pop(index)
        is_path = image[0] == '.' or image[0] == '/'
        if conf and not is_path:
            thumb = "%s/%s" % (conf["path_to_thumb"], image)
            full = "%s/%s" % (conf["path_to_fullsize"], image)
            tag.append(template % (full,thumb))
            continue
        elif not conf and not is_path:
            msg = ("Warning: no path defined for image %s!" % image)
            print(msg,file=sys.stderr)
        else: 
            pass
        tag.append(template % (image, image))
    tag.append("</div>")
    return tag

def markup(message, config):
    def is_image(s, image_formats):
        l = s.rsplit('.', maxsplit=1)
        if len(l) < 2:
            return False
        # Python 3.10.5
        # example result that had to be filtered:
        # string:   started. 
        # result:   ['started', '']
        if l[1] == str(''):
            return False
        #print(s, l, file=sys.stderr)
        if l[1] in image_formats:
            return True
        return False

    result = 0
    tagged = ""
    # support multiple images (gallery style)
    tags   = [] # list of strings
    output = []
    gallery = []
    ptags = config["tag_paragraphs"]
    sep = ""
    if "line_separator" in config:
        sep = config["line_separator"]
    for line in message:
        images = [] # list of integers
        words  = line.split()
        for i in range(len(words)):
            word  = words[i]
            # don't help people click http
            if word.find("src=") == 0 or word.find("href=") == 0:
                continue
            elif word.find("https://") != -1: 
                w = escape(word)
                new_word = ("<a href=\"%s\">%s</a>") % (w, w)
                words[i] = new_word
            elif word.find("#") != -1 and len(word) > 1:
                # split by unicode blank character if present
                # allows tagging such as #fanfic|tion
                w = word.split(chr(8206)) 
                # w[0] is the portion closest to the #
                tags.append(w[0])
                new_word = "<span class=\"hashtag\">%s</span>" % (w[0])
                if len(w) > 1:
                    new_word += w[1]
                words[i] = new_word
            elif is_image(word, config["accepted_images"]):
                images.append(i)
        if len(images) > 0: 
            # function invokes pop() which modifies list 'words'
            gc = config["gallery"] if "gallery" in config else None
            gallery = make_gallery(images, words, gc)
        if ptags and len(words) > 0:
            words.insert(0,"<p>")
            words.append("</p>")
        output.append(" ".join(words))
        # avoid paragraph with an image gallery
        if len(gallery) > 0:
            output.append("".join(gallery))
            gallery = []
    return sep.join(output), tags

# apply basic HTML formatting - only div class here is gallery
from html import escape
class Post:
    def __init__(self, ts, msg):
        self.timestamp = ts.strip() # string
        self.message = msg  # list

    # format used for sorting
    def get_epoch_time(self):
        t = dateutil.parser.parse(self.timestamp)
        return int(t.timestamp())

    # format used for display
    def get_short_time(self):
        t = dateutil.parser.parse(self.timestamp)
        return t.strftime("%y %b %d")

def parse_txt(filename):
    content   = []
    with open(filename, 'r') as f:
        content = f.readlines()
    posts   = [] # list of posts - same order as file
    message = [] # list of lines
    #  {-1 = init;; 0 = timestamp is next, 1 = message is next}
    state       = -1 
    timestamp   = ""
    for line in content:
        if state == -1:
            state = 0
            continue
        elif state == 0:
            timestamp = line
            state = 1
        elif state == 1:
            if len(line) > 1:
                message.append(line)
            else:
                p = Post(timestamp, message)
                posts.append(p)
                # reset
                message = []
                state = 0 
    return posts

def get_posts(filename, config):
    posts = parse_txt(filename)
    taginfos = []
    tagcloud = dict() # (tag, count)
    tagged   = dict() # (tag, index of message)
    total = len(posts)
    count = total
    index = count # - 1
    timeline = []
    btns = None
    for post in posts:
        markedup, tags = markup(post.message, config)
        count -= 1
        index -= 1
        timeline.append(
            make_post(count, post.get_short_time(), config, markedup)
        )
        for tag in tags:
            if tagcloud.get(tag) == None:
                tagcloud[tag] = 0
            tagcloud[tag] += 1
            if tagged.get(tag) == None:
                tagged[tag] = []
            tagged[tag].append(index)
    return timeline, tagcloud, tagged

def make_tagcloud(d, rell):
    sorted_d = {k: v for k, 
                v in sorted(d.items(), 
                key=lambda item: -item[1])}
    output = []
    fmt = "<span class=\"hashtag\"><a href=\"%s\">%s(%i)</a></span>"
    #fmt    = "<span class=\"hashtag\">%s(%i)</span>"
    for key in d.keys():
        link = rell % key[1:]
        output.append(fmt % (link, key, d[key]))
    return output

class Paginator:
    def __init__(self, post_count, ppp, loc=None):
        if post_count <= 0:
            raise Exception
        if not loc:
            loc = "pages"
        if loc and not os.path.exists(loc):
            os.mkdir(loc)
        self.TOTAL_POSTS = post_count
        self.PPP = ppp
        self.TOTAL_PAGES = int(post_count/self.PPP)
        self.SUBDIR      = loc
        self.FILENAME = "%i.html"
        self.written = []

    def toc(self, current_page=None, path=None): #style 1
        if self.TOTAL_PAGES < 1:
            return "[no pages]"
        if path == None:
            path = self.SUBDIR
        # For page 'n' do not create an anchor tag
        fmt = "<a href=\"%s\">[%i]</a>" #(filename, page number)
        anchors = []
        for i in reversed(range(self.TOTAL_PAGES)):
            if i != current_page:
                x = path + "/" + (self.FILENAME % i)
                anchors.append(fmt % (x, i))
            else:
                anchors.append("<b>[%i]</b>" % i)
        return "\n".join(anchors)

    # makes one page
    def singlepage(self, template, tagcloud, timeline_, i=None, p=None):
        tc  = "\n".join(tagcloud)
        tl  = "\n\n".join(timeline_)
        toc = self.toc(i, p)
        return template.format(
            postcount=self.TOTAL_POSTS, tags=tc, pages=toc, timeline=tl
        )

    def paginate(self, template, tagcloud, timeline, is_tagline=False):
        outfile = "%s/%s" % (self.SUBDIR, self.FILENAME)
        timeline.reverse() # reorder from oldest to newest
        start = 0
        for i in range(start, self.TOTAL_PAGES):
            fn = outfile % i
            with open(fn, 'w') as f:
                self.written.append(fn)
                prev = self.PPP * i
                curr = self.PPP * (i+1)
                sliced = timeline[prev:curr]
                sliced.reverse()
                f.write(self.singlepage(template, tagcloud, sliced, i, "."))
        return

import argparse
if __name__ == "__main__":
    def sort(filename):
        def export(new_content, new_filename):
            with open(new_filename, 'w') as f:
                print(file=f)
                for post in new_content:
                    print(post.timestamp, file=f)
                    print("".join(post.message), file=f)
            return
        posts = parse_txt(filename)
        posts.sort(key=lambda e: e.get_epoch_time())
        outfile = ("%s.sorted" % filename)
        print("Sorted text written to ", outfile)
        export(reversed(posts),  outfile)

    def get_args():
        p = argparse.ArgumentParser()
        p.add_argument("template", help="an html template file")
        p.add_argument("content", help="text file for microblog content")
        p.add_argument("--sort",  \
            help="sorts content from oldest to newest"
                " (this is a separate operation from page generation)", \
            action="store_true")
        args = p.parse_args()
        if args.sort:
            sort(args.content)
            exit()
        return args.template, args.content

    # assume relative path
    def demote_css(template, css_list,  level=1):
        prepend = ""
        if level == 1:
            prepend = '.'
        else:
            for i in range(level):
                prepend = ("../%s" % prepend)
        tpl = template
        for css in css_list:
            tpl = tpl.replace(css, ("%s%s" % (prepend, css) ))
        return tpl

# needs review / clean-up
# ideally relate 'lvl' with sub dir instead of hardcoding
    def writepage(template, timeline, tagcloud, config, subdir = None):
        html   = ""
        with open(template,'r') as f:
            html = f.read()
        try:
            count  = len(timeline)
            p      = config["postsperpage"]
            pagectrl = Paginator(count, p, subdir)
        except ZeroDivisionError as e:
            print("error: ",e, ". check 'postsperpage' in config", file=sys.stderr)
            exit()
        except Exception as e:
            print("error: ",e, ("(number of posts = %i)" % count), file=sys.stderr)
            exit()
        latest = timeline if count <= pagectrl.PPP else timeline[:pagectrl.PPP]
        if subdir == None: # if top level page
            lvl    = 1
            tcloud = make_tagcloud(tagcloud, "./tags/%s/latest.html")
            print(pagectrl.singlepage(html, tcloud, latest))
            tcloud = make_tagcloud(tagcloud, "../tags/%s/latest.html")
            pagectrl.paginate(
                demote_css(html, config["relative_css"], lvl), 
                tcloud, timeline
            )
        else: # if timelines per tag
            is_tagline = True
            lvl = 2
            newhtml = demote_css(html, config["relative_css"], lvl)
            tcloud = make_tagcloud(tagcloud, "../%s/latest.html")
            fn = "%s/latest.html" % subdir
            with open(fn, 'w') as f:
                pagectrl.written.append(fn)
                f.write(
                    pagectrl.singlepage(newhtml, tcloud, latest, p=".")
                )
                pagectrl.paginate(newhtml, tcloud, timeline, is_tagline)
        return pagectrl.written

    import toml
    def load_settings():
        s = dict()
        filename = "settings.toml"
        if os.path.exists(filename):
            with open(filename, 'r') as f:
                s = toml.loads(f.read())
        else:
            s = None
        return s

    def main():
        tpl, content = get_args()
        cfg = load_settings()
        if cfg == None:
            print("exit: no settings.toml found.", file=sys.stderr)
            return
        if "post" not in cfg:
            print("exit: table 'post' absent in settings.toml", file=sys.stderr)
            return
        if "page" not in cfg:
            print("exit: table 'page' absent in settings.toml", file=sys.stderr)
            return
        tl, tc, tg = get_posts(content, cfg["post"])
        if tl == []:
            return
        # main timeline
        updated = []
        updated += writepage(tpl, tl, tc, cfg["page"])
        # timeline per tag
        if tc != dict() and tg != dict():
            if not os.path.exists("tags"):
                os.mkdir("tags")
        for key in tg.keys():
            tagline = []
            for index in tg[key]:
                tagline.append(tl[index])
            # [1:] means to omit hashtag from dir name
            updated += writepage(
                tpl, tagline, tc, cfg["page"], \
                subdir="tags/%s" % key[1:]     \
            ) 
        with open("updatedfiles.txt", 'w') as f:
            for filename in updated:
                print(filename, file=f) # sys.stderr)
            if "latestpage" in cfg:
                print(cfg["latestpage"], file=f)
    try:
        main()
    except KeyError as e:
        traceback.print_exc()
        print("\n\tA key may be missing from your settings file.", file=sys.stderr)
    except dateutil.parser._parser.ParserError as e:
        traceback.print_exc()
        print("\n\tFailed to interpret a date from string..",
              "\n\tYour file of posts may be malformed.",
              "\n\tCheck if your file starts with a line break.", file=sys.stderr)