123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- import asyncio
- from utils import json, os, wget
- from bs4 import BeautifulSoup
- async def _html_parse(url) -> BeautifulSoup:
- content = await wget(url)
- return BeautifulSoup(content, "html.parser")
- def _text(element):
- try:
- return element.text.strip()
- except:
- pass
- async def song(url, number):
- html = await _html_parse(url)
- lyrics = ""
- lyrics_element = html.select_one("div.lyrics p")
- if (lyrics_element is None):
- lyrics_elements = html.select('[class^="Lyrics__Container"]')
- for element in lyrics_elements:
- for span in element.select('span[style^="position:absolute;"]'):
- if span:
- span.extract()
- for br in element.select('br'):
- if br:
- br.replace_with(BeautifulSoup("\n", "html.parser"))
- lyrics += _text(element) + "\n\n"
- else:
- raise f"Old version {url}"
- artist_element = html.select_one('[class*="SongHeaderVariantdesktop__Artist"]')
- title = html.select_one("h1")
- artist = _text(artist_element)
- return {
- "title": _text(title),
- "lyrics": lyrics.strip(),
- "artist": artist,
- "url": url,
- "number": number
- }
- async def album(url: str, path=None):
- filename = "album.json"
- if path is not None:
- filename = f"{path}/{filename}"
- if os.path_exists(filename):
- return await json.load(filename)
- html = await _html_parse(url)
- title = html.select_one("h1")
- artist = html.select_one("h2")
- released = html.select_one(".header_with_cover_art-primary_info .metadata_unit")
- cover = html.select_one('meta[property="og:image"]')
- album = {
- "title": _text(title),
- "artist": _text(artist),
- "released": _text(released),
- "cover_url": cover.get("content")
- }
- tasks = []
- for i, row in enumerate(html.select("a.u-display_block")):
- task = asyncio.create_task(song(row["href"], i+1))
- tasks.append(task)
- album["songs"] = await asyncio.gather(*tasks)
- await json.dump(album, filename)
- return album
|