#5 video image preview

باز‌کردن
abergasov قصد ادغام 8 تغییر را از abergasov/_image_preview به Black_Triangle/master دارد
9فایلهای تغییر یافته به همراه196 افزوده شده و 87 حذف شده
  1. 3 0
      .gitignore
  2. 0 0
      cache/.gitkeep
  3. 0 0
      cache/video_previews/.gitkeep
  4. BIN
      default.png
  5. 9 0
      helpers/logger.py
  6. 130 0
      helpers/not_youtube.py
  7. 14 0
      helpers/player.py
  8. 16 0
      preview_calc.py
  9. 24 87
      start.py

+ 3 - 0
.gitignore

@@ -0,0 +1,3 @@
+cache/*
+!cache/.gitkeep
+!cache/video_previews/.gitkeep

+ 0 - 0
cache/.gitkeep


+ 0 - 0
cache/video_previews/.gitkeep


BIN
default.png


+ 9 - 0
helpers/logger.py

@@ -0,0 +1,9 @@
+class Logger:
+
+    @staticmethod
+    def log_error(message, *args):
+        print('error here: {0} '.format(message) + ','.join(args))
+
+    @staticmethod
+    def log_info(message, *args):
+        print('{0} '.format(message) + ','.join(args))

+ 130 - 0
helpers/not_youtube.py

@@ -0,0 +1,130 @@
+import time
+
+from helpers.logger import Logger
+import hashlib
+import requests
+from bs4 import BeautifulSoup as bs
+from multiprocessing import Process
+from math import ceil
+import datetime
+import os
+
+
+class NotYoutube:
+
+    def __init__(self, cache_dir: str):
+        self.cache_file = cache_dir + "not_youtube.cache"
+        self.cache_images = cache_dir + "video_previews/"
+        self.source_cache = {}
+        self.list = self._restore_from_cache()
+
+    def get_channel_name(self, channel_url: str) -> str:
+        if NotYoutube._is_not_youtube_url(channel_url) is not True:
+            Logger.log_info("link is not for NotYoutube", channel_url)
+            return ""
+
+        Logger.log_info("try get info about channel", channel_url)
+        cache_name = self._check_in_cache(channel_url)
+        if cache_name != "":
+            return cache_name
+
+        page_source = requests.get(channel_url)
+        if page_source.status_code != 200:
+            Logger.log_error("can't load info about channel", channel_url)
+            return ""
+
+        # put request in cache, will reuse later on channels load
+        self.source_cache[channel_url] = {"src": page_source, "tms": datetime.datetime.now().timestamp()}
+        page_source.encoding = 'utf-8'
+        pre_soup = bs(page_source.text, 'html.parser')
+        pre_name = pre_soup.select("span")
+        if len(pre_name) == 0:
+            Logger.log_error("can't get channel name", channel_url)
+            return ""
+        channel_name = pre_name[1].text
+        self._put_in_cache(channel_url, channel_name)
+        return channel_name
+
+    def get_channel_video(self, channel_url: str) -> dict:
+        source = self._get_page_source(channel_url)
+        source.encoding = 'utf-8'
+        soup = bs(source.text, 'html.parser')
+        video_list = {}
+        prev_download = []
+        for video_box in soup.select("div.h-box"):
+            link = video_box.select("p a")
+            if len(link) == 0:
+                continue
+            link = link[0]
+            if link['href'].startswith('/watch?') is not True:
+                continue
+            if link.text.strip() == "":
+                continue
+            video_prev = video_box.select('img.thumbnail')
+            if len(video_prev) > 0:
+                prev_download.append({"url": "https://notyoutube.org" + video_prev[0]['src'], "txt": link.text})
+            video_list[link.text] = link['href']
+        self._bulk_save_images(prev_download)
+        return video_list
+
+    def _get_page_source(self, url: str):
+        current_tms = datetime.datetime.now().timestamp()
+        if self.source_cache.get(url) is not None:
+            # found page in cache, check how old is it
+            if current_tms - self.source_cache.get(url)['tms'] < 300:  # if cache is not older than 5 min - return
+                return self.source_cache.get(url)['src']
+        page_source = requests.get(url)
+        if page_source.status_code == 200:
+            self.source_cache[url] = {"src": page_source, "tms": current_tms}
+        return page_source
+
+    def _restore_from_cache(self) -> dict:
+        result = {}
+        try:
+            channels_list_file = open(self.cache_file, 'r+')
+            for i in channels_list_file.readlines():
+                ch_data = i.split(";")
+                if len(ch_data) != 2:
+                    Logger.log_error("invalid cache data, skip it", i)
+                    continue
+                result[ch_data[0]] = ch_data[1]
+            channels_list_file.close()
+            return result
+        except Exception as e:
+            Logger.log_error("error read from cache", str(e))
+        return result
+
+    def _check_in_cache(self, channel_url: str) -> str:
+        if self.list.get(channel_url) is not None:
+            return self.list.get(channel_url)
+        return ""
+
+    def _put_in_cache(self, channel_url: str, channel_name: str):
+        self.list[channel_url] = channel_name
+        with open(self.cache_file, "a") as cache_file:
+            cache_file.write("{0};{1}".format(channel_url, channel_name))
+
+    @staticmethod
+    def _is_not_youtube_url(channel_url: str) -> bool:
+        return "notyoutube.org" in channel_url
+
+    def _bulk_save_images(self, images_to_download: list):
+        num_processes = 6
+        part_len = ceil(len(images_to_download) / num_processes)
+        chunks = [images_to_download[part_len * k:part_len * (k + 1)] for k in range(4)]
+        i = 0
+        for ch in chunks:
+            i += 1
+            Process(target=self._save_images, args=(ch, i)).start()
+
+    def _save_images(self, chunk: list, j: int):
+        for i in chunk:
+            file = self.cache_images + hashlib.md5(i['txt'].encode("utf-8")).hexdigest()
+            if os.path.exists(file) and os.path.isfile(file):
+                continue
+            # print("download {0} {1}".format(j, i['url']))
+            response = requests.get(i['url'])
+            if response.status_code == 200:
+                file = open(file, "wb")
+                file.write(response.content)
+                file.close()

+ 14 - 0
helpers/player.py

@@ -0,0 +1,14 @@
+import os
+from helpers.logger import Logger
+
+class Player:
+
+    @staticmethod
+    def play_mpv(link):
+        Logger.log_info("start play video from youtube via mpv", link)
+        os.system("mpv " + link)
+
+    @staticmethod
+    def play_vlc(link):
+        Logger.log_info("start play video from youtube via vlc", link)
+        pass

+ 16 - 0
preview_calc.py

@@ -0,0 +1,16 @@
+import sys
+import os
+import hashlib
+from signal import signal, SIGPIPE, SIG_DFL
+signal(SIGPIPE, SIG_DFL)
+args = sys.argv
+if len(sys.argv) != 2:
+    exit(0)
+
+hash_md5 = hashlib.md5(sys.argv[1].encode("utf-8")).hexdigest()
+cache_path_image = "{0}/cache/video_previews/{1}".format(os.getcwd(), hash_md5)
+if (os.path.exists(cache_path_image) and os.path.isfile(cache_path_image)) is not True:
+    sys.stdout.write("{0}/default.png".format(os.getcwd()))
+    sys.exit(0)
+sys.stdout.write(cache_path_image)
+sys.exit(0)

+ 24 - 87
start.py

@@ -1,97 +1,34 @@
 # -*- coding: utf-8 -*-
-
-from bs4 import BeautifulSoup as bs
-import requests
-import re
+from helpers.logger import Logger
+from helpers.not_youtube import NotYoutube
+from helpers.player import Player
 from pyfzf.pyfzf import FzfPrompt
 import os
 
-print("Загрузка....")
-
-f = [i.strip('\n').split(',') for i in open('subscribe.txt')]
-
+subscriptions_list = open('subscribe.txt', 'r').readlines()
 
-chanel_list = []
+channel_list = {}
+prev_h = "30"  # высота для превью
+prev_w = "90"  # ширина для превью
+cache_path = "{0}/cache/".format(os.getcwd())
+n_you = NotYoutube(cache_path)
 
-for k in f:
-    h = (k[0])
-    pre_source = requests.get(h)
-    pre_source.encoding = 'utf-8'
-    pre_soup = bs(pre_source.text, 'html.parser')
-    pre_name = pre_soup.select("span")
-    pre_name1 = pre_name[1]
-    pre_canell = (pre_name1.text)
-    chanel_list.append(pre_canell)
+Logger.log_info("Загрузка каналов....")
+for channel_url in subscriptions_list:
+    channel_url = channel_url.strip()
+    name = n_you.get_channel_name(channel_url)
+    if name == "":
+        continue
+    Logger.log_info("found info about channel", channel_url, name)
+    channel_list[name] = channel_url
 
 fzf = FzfPrompt()
+target_channel = fzf.prompt(channel_list)
+Logger.log_info("selected channel:", target_channel[0])
 
+video_list = n_you.get_channel_video(channel_list.get(target_channel[0]))
+target_video = fzf.prompt(video_list, "--preview 'viu -w "+prev_w+" -h "+prev_h+" $(python3 preview_calc.py {})'")
+Logger.log_info("selected video from channel:", target_video[0])
+video_url = video_list.get(target_video[0])
 
-
-target_chanel=fzf.prompt(chanel_list)
-print(target_chanel[0])
-nomber_chanel = (chanel_list.index(target_chanel[0]))
-
-
-url = f[nomber_chanel]
-
-end_url = url[0]
-
-
-
-source = requests.get(end_url)
-
-source.encoding = 'utf-8' 
-
-
-soup = bs(source.text, 'html.parser')
-
-
-name = soup.select("span")
-name1 = name[1]
-canell = (name1.text)
-
-
-
-spisokNAME=[]
-z=0
-for title in soup.select("div p"):
-    kkk = title.find('a')
-
-
-
-    
-    if kkk is not None:
-        xxx = kkk
-        ggg=xxx.text
-        if re.search(canell, ggg):
-            a = 1
-        else:
-            z=z+1
-
-            if z >= 2:
-
-                spisokNAME.append(ggg) 
-                
-
-target=fzf.prompt(spisokNAME)
-
-print(target[0])
-nomber = (spisokNAME.index(target[0]))
-
-
-spisok=[]
-
-for a in soup.find_all('a', href=True):
-    link=(a['href'])
-    
-    if re.search(r'com/watch\b', link):   
-        spisok.append(link) 
-
-
-
-end_target = spisok[nomber]
-
-comand = "mpv " + end_target
-
-os.system(comand)           
-                
+Player.play_mpv("https://www.youtube.com" + video_url)