TrueAuraCoral
/
Python-Projects
tükrözi: https://codeberg.org/zortazert/Python-Projects


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
							#!/usr/bin/env python

import requests
import re
import json
from urllib.parse import urljoin, urlparse
import sys
import mimetypes
import time
import argparse

def get_arguments():
 
    parser = argparse.ArgumentParser(description='IMDB trailer downloader')
 
    parser.add_argument('link', type=str, metavar='URL', help='IMDB link')
    #parser.add_argument('-r', '--random', action="store_true", default=False, help='Copy the link to the pasted file')
 
    args = parser.parse_args()
 
    return args

def get_data(imdb_link):
    # Clean out ?ref_=nv_sr_srsg_0 nonsense if it's there
    imdb_link = urljoin(imdb_link, urlparse(imdb_link).path)
    # Found form over here: https://regex101.com/library/uO6fZ6
    # somehow in the future I want to detect imdb id's and libremdb
    # links
    pattern = re.compile("^(?:http:\/\/|https:\/\/)?(?:www\.)?(?:imdb.com\/title\/)?(tt[0-9]*)(.+?)")
    if not re.fullmatch(pattern, imdb_link):
        sys.exit("Invalid imdb link")
    request = requests.get(imdb_link)
    #with open("index.html", "w", encoding="utf-8") as f:
    #    f.write(request.text)
    if request.status_code != 200:
        sys.exit(f"Could not connect to {imdb_link}")
    # This is a json data goldmine I found when looking for a
    # trailer imdb_link... Could be used for a lot of other things.
    data = json.loads(re.findall('<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', request.text)[0])
    try:
        trailer_imdb_link = data["props"]["pageProps"]["aboveTheFoldData"]["primaryVideos"]["edges"][0]["node"]["playbackURLs"][0]["url"]
        title = data["props"]["pageProps"]["aboveTheFoldData"]["titleText"]["text"]
        imdb_id = data["query"]["tconst"]
    except Exception:
        sys.exit("Something went wrong... Couldn't find the trailer.")
    #print(data)
    return {"link": trailer_imdb_link, "title": title, "id": imdb_id}


def download(link):
    data = get_data(link)
    response = requests.get(data["link"], stream= True)
    extension = mimetypes.guess_all_extensions(response.headers['Content-Type'], strict=False)[0]
    title = data["title"]
    invalid = '<>:"/\|?*'
    for char in invalid:
        title = title.replace(char, '')
    filename = f'{title} [{data["id"]}]{extension}'
    print(f"[download] Destination: {filename}")
    with open(filename, 'wb') as f:
        total = int(response.headers.get('content-length'))
        megabytes = f"{round(float(total) / 1024, 1)}MiB"

        if total is None:
            f.write(response.content)
            return

        downloaded = 0
        st = time.time()
        for data in response.iter_content(chunk_size=max(int(total/1000), 1024*1024)):
            downloaded += len(data)
            f.write(data)
            et = str(time.strftime("%H:%M:%S", time.gmtime(time.time() - st)))
            sys.stdout.write(f'\r[download] {round((downloaded / total) * 100)}% of {megabytes} in {et}')
            sys.stdout.flush()
    sys.stdout.write('\n')
 

def main():
    args = get_arguments()
    if args.link:
        download(args.link)

if __name__ == "__main__":
    main()