#3 Cache reqeusts

Open
abergasov wants to merge 4 commits from abergasov/_cache_requests into Black_Triangle/master
5 changed files with 137 additions and 86 deletions
  1. 0 0
      cache/.gitkeep
  2. 9 0
      helpers/logger.py
  3. 93 0
      helpers/not_youtube.py
  4. 14 0
      helpers/player.py
  5. 21 86
      start.py

+ 0 - 0
cache/.gitkeep


+ 9 - 0
helpers/logger.py

@@ -0,0 +1,9 @@
+class Logger:
+
+    @staticmethod
+    def log_error(message, *args):
+        print('error here: {0} '.format(message) + ','.join(args))
+
+    @staticmethod
+    def log_info(message, *args):
+        print('{0} '.format(message) + ','.join(args))

+ 93 - 0
helpers/not_youtube.py

@@ -0,0 +1,93 @@
+from helpers.logger import Logger
+import requests
+from bs4 import BeautifulSoup as bs
+import datetime
+
+
+class NotYoutube:
+
+    def __init__(self, cache_dir: str):
+        self.cache_file = cache_dir + "not_youtube.cache"
+        self.source_cache = {}
+        self.list = self._restore_from_cache()
+
+    def get_channel_name(self, channel_url: str) -> str:
+        if NotYoutube._is_not_youtube_url(channel_url) is not True:
+            Logger.log_info("link is not for NotYoutube", channel_url)
+            return ""
+
+        Logger.log_info("try get info about channel", channel_url)
+        cache_name = self._check_in_cache(channel_url)
+        if cache_name != "":
+            return cache_name
+
+        page_source = requests.get(channel_url)
+        if page_source.status_code != 200:
+            Logger.log_error("can't load info about channel", channel_url)
+            return ""
+
+        # put request in cache, will reuse later on channels load
+        self.source_cache[channel_url] = {"src": page_source, "tms": datetime.datetime.now().timestamp()}
+        page_source.encoding = 'utf-8'
+        pre_soup = bs(page_source.text, 'html.parser')
+        pre_name = pre_soup.select("span")
+        if len(pre_name) == 0:
+            Logger.log_error("can't get channel name", channel_url)
+            return ""
+        channel_name = pre_name[1].text
+        self._put_in_cache(channel_url, channel_name)
+        return channel_name
+
+    def get_channel_video(self, channel_url: str) -> dict:
+        source = self._get_page_source(channel_url)
+        source.encoding = 'utf-8'
+        soup = bs(source.text, 'html.parser')
+        video_list = {}
+        for link in soup.select("div.h-box p a"):
+            if link['href'].startswith('/watch?') is not True:
+                continue
+            if link.text.strip() == "":
+                continue
+            video_list[link.text] = link['href']
+        return video_list
+
+    def _get_page_source(self, url: str):
+        current_tms = datetime.datetime.now().timestamp()
+        if self.source_cache.get(url) is not None:
+            # found page in cache, check how old is it
+            if current_tms - self.source_cache.get(url)['tms'] < 300:  # if cache is not older than 5 min - return
+                return self.source_cache.get(url)['src']
+        page_source = requests.get(url)
+        if page_source.status_code == 200:
+            self.source_cache[url] = {"src": page_source, "tms": current_tms}
+        return page_source
+
+    def _restore_from_cache(self) -> dict:
+        result = {}
+        try:
+            channels_list_file = open(self.cache_file, 'r+')
+            for i in channels_list_file.readlines():
+                ch_data = i.split(";")
+                if len(ch_data) != 2:
+                    Logger.log_error("invalid cache data, skip it", i)
+                    continue
+                result[ch_data[0]] = ch_data[1]
+            channels_list_file.close()
+            return result
+        except Exception as e:
+            Logger.log_error("error read from cache", str(e))
+        return result
+
+    def _check_in_cache(self, channel_url: str) -> str:
+        if self.list.get(channel_url) is not None:
+            return self.list.get(channel_url)
+        return ""
+
+    def _put_in_cache(self, channel_url: str, channel_name: str):
+        self.list[channel_url] = channel_name
+        with open(self.cache_file, "a") as cache_file:
+            cache_file.write("{0};{1}".format(channel_url, channel_name))
+
+    @staticmethod
+    def _is_not_youtube_url(channel_url: str) -> bool:
+        return "notyoutube.org" in channel_url

+ 14 - 0
helpers/player.py

@@ -0,0 +1,14 @@
+import os
+from helpers.logger import Logger
+
+class Player:
+
+    @staticmethod
+    def play_mpv(link):
+        Logger.log_info("start play video from youtube via mpv", link)
+        os.system("mpv " + link)
+
+    @staticmethod
+    def play_vlc(link):
+        Logger.log_info("start play video from youtube via vlc", link)
+        pass

+ 21 - 86
start.py

@@ -1,97 +1,32 @@
 # -*- coding: utf-8 -*-
-
-from bs4 import BeautifulSoup as bs
-import requests
+from helpers.logger import Logger
+from helpers.not_youtube import NotYoutube
+from helpers.player import Player
 import re
 from pyfzf.pyfzf import FzfPrompt
 import os
 
-print("Загрузка....")
-
-f = [i.strip('\n').split(',') for i in open('subscribe.txt')]
-
+subscriptions_list = open('subscribe.txt', 'r').readlines()
 
-chanel_list = []
+channel_list = {}
+n_you = NotYoutube("{0}/cache/".format(os.getcwd()))
 
-for k in f:
-    h = (k[0])
-    pre_source = requests.get(h)
-    pre_source.encoding = 'utf-8'
-    pre_soup = bs(pre_source.text, 'html.parser')
-    pre_name = pre_soup.select("span")
-    pre_name1 = pre_name[1]
-    pre_canell = (pre_name1.text)
-    chanel_list.append(pre_canell)
+Logger.log_info("Загрузка каналов....")
+for channel_url in subscriptions_list:
+    channel_url = channel_url.strip()
+    name = n_you.get_channel_name(channel_url)
+    if name == "":
+        continue
+    Logger.log_info("found info about channel", channel_url, name)
+    channel_list[name] = channel_url
 
 fzf = FzfPrompt()
+target_channel = fzf.prompt(channel_list)
+Logger.log_info("selected channel:", target_channel[0])
 
+video_list = n_you.get_channel_video(channel_list.get(target_channel[0]))
+target_video = fzf.prompt(video_list)
+Logger.log_info("selected video from channel:", target_video[0])
+video_url = video_list.get(target_video[0])
 
-
-target_chanel=fzf.prompt(chanel_list)
-print(target_chanel[0])
-nomber_chanel = (chanel_list.index(target_chanel[0]))
-
-
-url = f[nomber_chanel]
-
-end_url = url[0]
-
-
-
-source = requests.get(end_url)
-
-source.encoding = 'utf-8' 
-
-
-soup = bs(source.text, 'html.parser')
-
-
-name = soup.select("span")
-name1 = name[1]
-canell = (name1.text)
-
-
-
-spisokNAME=[]
-z=0
-for title in soup.select("div p"):
-    kkk = title.find('a')
-
-
-
-    
-    if kkk is not None:
-        xxx = kkk
-        ggg=xxx.text
-        if re.search(canell, ggg):
-            a = 1
-        else:
-            z=z+1
-
-            if z >= 2:
-
-                spisokNAME.append(ggg) 
-                
-
-target=fzf.prompt(spisokNAME)
-
-print(target[0])
-nomber = (spisokNAME.index(target[0]))
-
-
-spisok=[]
-
-for a in soup.find_all('a', href=True):
-    link=(a['href'])
-    
-    if re.search(r'com/watch\b', link):   
-        spisok.append(link) 
-
-
-
-end_target = spisok[nomber]
-
-comand = "mpv " + end_target
-
-os.system(comand)           
-                
+Player.play_mpv("https://www.youtube.com" + video_url)