trailerdl.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. #!/usr/bin/env python
  2. import requests
  3. import re
  4. import json
  5. from urllib.parse import urljoin, urlparse
  6. import sys
  7. import mimetypes
  8. import time
  9. import argparse
  10. def get_arguments():
  11. parser = argparse.ArgumentParser(description='IMDB trailer downloader')
  12. parser.add_argument('link', type=str, metavar='URL', help='IMDB link')
  13. #parser.add_argument('-r', '--random', action="store_true", default=False, help='Copy the link to the pasted file')
  14. args = parser.parse_args()
  15. return args
  16. def get_data(imdb_link):
  17. # Clean out ?ref_=nv_sr_srsg_0 nonsense if it's there
  18. imdb_link = urljoin(imdb_link, urlparse(imdb_link).path)
  19. # Found form over here: https://regex101.com/library/uO6fZ6
  20. # somehow in the future I want to detect imdb id's and libremdb
  21. # links
  22. pattern = re.compile("^(?:http:\/\/|https:\/\/)?(?:www\.)?(?:imdb.com\/title\/)?(tt[0-9]*)(.+?)")
  23. if not re.fullmatch(pattern, imdb_link):
  24. sys.exit("Invalid imdb link")
  25. request = requests.get(imdb_link)
  26. #with open("index.html", "w", encoding="utf-8") as f:
  27. # f.write(request.text)
  28. if request.status_code != 200:
  29. sys.exit(f"Could not connect to {imdb_link}")
  30. # This is a json data goldmine I found when looking for a
  31. # trailer imdb_link... Could be used for a lot of other things.
  32. data = json.loads(re.findall('<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', request.text)[0])
  33. try:
  34. trailer_imdb_link = data["props"]["pageProps"]["aboveTheFoldData"]["primaryVideos"]["edges"][0]["node"]["playbackURLs"][0]["url"]
  35. title = data["props"]["pageProps"]["aboveTheFoldData"]["titleText"]["text"]
  36. imdb_id = data["query"]["tconst"]
  37. except Exception:
  38. sys.exit("Something went wrong... Couldn't find the trailer.")
  39. #print(data)
  40. return {"link": trailer_imdb_link, "title": title, "id": imdb_id}
  41. def download(link):
  42. data = get_data(link)
  43. response = requests.get(data["link"], stream= True)
  44. extension = mimetypes.guess_all_extensions(response.headers['Content-Type'], strict=False)[0]
  45. title = data["title"]
  46. invalid = '<>:"/\|?*'
  47. for char in invalid:
  48. title = title.replace(char, '')
  49. filename = f'{title} [{data["id"]}]{extension}'
  50. print(f"[download] Destination: {filename}")
  51. with open(filename, 'wb') as f:
  52. total = int(response.headers.get('content-length'))
  53. megabytes = f"{round(float(total) / 1024, 1)}MiB"
  54. if total is None:
  55. f.write(response.content)
  56. return
  57. downloaded = 0
  58. st = time.time()
  59. for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)):
  60. downloaded += len(data)
  61. f.write(data)
  62. et = str(time.strftime("%H:%M:%S", time.gmtime(time.time() - st)))
  63. sys.stdout.write(f'\r[download] {round((downloaded / total) * 100)}% of {megabytes} in {et}')
  64. sys.stdout.flush()
  65. sys.stdout.write('\n')
  66. def main():
  67. args = get_arguments()
  68. if args.link:
  69. download(args.link)
  70. if __name__ == "__main__":
  71. main()