komplekt_17
/
Python_OlegMolchanov


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
							import requests
from bs4 import BeautifulSoup
import csv


def get_normalise_str(name, string):  # нормализация данных
    if name == "IMG":
        return string.replace('//www.', 'https://www.')
    else:  # name == "DATE"
        result = string.split('- ')
        return result[-1]  # -1 последний элемент


def write_csv(data):  # конвертация данных в csv
    # 'a' - it appends a data in file
    with open('catalogue.csv', 'a', newline='', encoding='utf-8') as file_csv:
        writer_file = csv.writer(file_csv)
        writer_file.writerow([
            data['name'],
            data['link'],
            data['img'],
            data['group'],
            data['discr'],
            data['date']
        ])


def get_html(url):  # получение dom-html
    response = requests.get(url)
    if response.ok:  # ok == 200
        return response.text
    print(response.status_code)  # ok == 403|404


def get_page_data(html):  # получение данных из html
    soup = BeautifulSoup(html, 'lxml')
    divs = soup.find_all('div', class_='b-catalog__feed__box')

    for item in divs:
        elem1 = item.find('div', class_="b-catalog__feed__image")
        try:
            link = elem1.find('a').get('href')
        except:
            link = 'None'
        try:
            img = elem1.find('a').find('img').get('src')
        except:
            img = 'None'

        elem2 = item.find('div', class_="b-catalog__feed__text")
        try:
            header = elem2.find('h3').find('a').text.strip()
        except:
            header = 'None'
        try:
            date = elem2.find('h3').find('small').text.strip()
        except:
            date = 'None'
        try:
            group = elem2.find('h6').find('a').text.strip()
        except:
            group = 'None'

        cls3 = "b-catalog__feed__text__description"
        try:
            discr = elem2.find('div', class_=cls3).text.strip()
        except:
            discr = 'None'

        data = {
            "name": header,
            "link": 'https://www.shopolog.ru'+link,
            "img": get_normalise_str("IMG", img),
            "group": group,
            "discr": discr,
            "date": get_normalise_str("DATE", date)
        }
        # write_csv(data)
        print(count_item)


def main():
    # url = 'https://yandex.ru/yaca/cat/Entertainment/'
    count = 1
    while count <= 3:
        url = f'https://www.shopolog.ru/company/section/pc/?page={count}'
        html = get_html(url)
        get_page_data(html)
        # print(url)
        count = count + 1


if __name__ == '__main__':
    main()