__init__.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import asyncio
  2. from utils import json, os, wget
  3. from bs4 import BeautifulSoup
  4. async def _html_parse(url) -> BeautifulSoup:
  5. content = await wget(url)
  6. return BeautifulSoup(content, "html.parser")
  7. def _text(element):
  8. try:
  9. return element.text.strip()
  10. except:
  11. pass
  12. async def song(url, number):
  13. html = await _html_parse(url)
  14. lyrics = ""
  15. lyrics_element = html.select_one("div.lyrics p")
  16. if (lyrics_element is None):
  17. lyrics_elements = html.select('[class^="Lyrics__Container"]')
  18. for element in lyrics_elements:
  19. for span in element.select('span[style^="position:absolute;"]'):
  20. if span:
  21. span.extract()
  22. for br in element.select('br'):
  23. if br:
  24. br.replace_with(BeautifulSoup("\n", "html.parser"))
  25. lyrics += _text(element) + "\n\n"
  26. else:
  27. raise f"Old version {url}"
  28. artist_element = html.select_one('[class*="SongHeaderVariantdesktop__Artist"]')
  29. title = html.select_one("h1")
  30. artist = _text(artist_element)
  31. return {
  32. "title": _text(title),
  33. "lyrics": lyrics.strip(),
  34. "artist": artist,
  35. "url": url,
  36. "number": number
  37. }
  38. async def album(url: str, path=None):
  39. filename = "album.json"
  40. if path is not None:
  41. filename = f"{path}/{filename}"
  42. if os.path_exists(filename):
  43. return await json.load(filename)
  44. html = await _html_parse(url)
  45. title = html.select_one("h1")
  46. artist = html.select_one("h2")
  47. released = html.select_one(".header_with_cover_art-primary_info .metadata_unit")
  48. cover = html.select_one('meta[property="og:image"]')
  49. album = {
  50. "title": _text(title),
  51. "artist": _text(artist),
  52. "released": _text(released),
  53. "cover_url": cover.get("content")
  54. }
  55. tasks = []
  56. for i, row in enumerate(html.select("a.u-display_block")):
  57. task = asyncio.create_task(song(row["href"], i+1))
  58. tasks.append(task)
  59. album["songs"] = await asyncio.gather(*tasks)
  60. await json.dump(album, filename)
  61. return album