TrueAuraCoral
/
Python-Projects
mirror of https://codeberg.org/zortazert/Python-Projects


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334
							#!/usr/bin/env python
# FORK OF: https://github.com/vaginessa/Scribd-Downloader/
# Python 2 to Python 3 and making it work for my usecase
from bs4 import BeautifulSoup
import requests
import sys
import re
import os

response = requests.request(method='GET', url=sys.argv[1])
with open("index.html", "w", encoding="utf-8") as f:
    f.write(response.text)
soup = BeautifulSoup(response.text, 'html.parser')
extraction = ''
train = 1

title = soup.find("div", {"class": "auto__app_page_body_metadata_original_title data_row original_title inline"}).get_text().split(":")[1].replace("&", "").replace("_","")

js_text = soup.find('script', type='text/javascript')
print(js_text)
for opening in js_text:
    for inner_opening in opening:
        urls = re.findall("contentUrl\: \"(.*?)\"", inner_opening)
        if not urls == '':
            for url in urls:
                replacement = url.replace('/pages/', '/images/').replace('jsonp', 'jpg')
                print(replacement)
                #print replacement
                print('Downloading page ' + str(train))
                #response = requests.get(replacement, stream=True)
                #with open(str(train) + '.jpg', 'wb') as out_file:
                #    out_file.write(response.content)
                train+=1