12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import requests
- from bs4 import BeautifulSoup as bs
- from datetime import datetime
- import logging
- logfile = 'data.log'
- logging.basicConfig(filename=logfile,
- format='%(asctime)s %(levelname)-2s %(message)s',
- filemode='w',
- datefmt='%Y-%m-%d %H:%M:%S',
- level=logging.INFO)
- def get_data(url):
- r = requests.get(url)
- html = bs(r.text, 'html.parser')
- title_channel = html.title.string
- try:
- last_post_date = html.find_all('time')[-1]['datetime']
- except IndexError:
- return
- else:
- # 2021-12-24T09:42:43+00:00
- last_post_date = datetime.strptime(
- last_post_date, "%Y-%m-%dT%H:%M:%S%z")
- title_channel = title_channel.split(' – ')[0]
- return {
- 'title_channel': title_channel,
- 'last_post_date': datetime.strftime(last_post_date, '%Y-%m-%d %H:%M')
- }
- print("СТАРТ. Добываю информацию. Подождите немного...")
- with open('channels.txt', 'r') as file:
- links = file.readlines()
- links = [link.rstrip() for link in links]
- for link in links:
- data = get_data(link)
- if data:
- logging.info(
- f"{data['title_channel']} | Самый новый пост: {data['last_post_date']}")
- else:
- logging.error(f'Не удалось получить данные по ссылке {link}')
- print(f"====\nВЫПОЛНЕНО. Результат смотрите в файле {logfile}")
|