123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- ########################################################################
- # Wiizard - A Wii games manager
- # Copyright (C) 2023 CYBERDEViL
- #
- # This file is part of Wiizard.
- #
- # Wiizard is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Wiizard is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- ########################################################################
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urlparse
- from wiizard.scrapers.common import (
- ScraperBase,
- GameImageLink,
- IMG_FRONT_COVER,
- IMG_FRONT_3D_COVER,
- IMG_FULL,
- IMG_DISC,
- ScraperError,
- ScraperNotFoundError
- )
- IMG_MAP = {
- "cover" : IMG_FRONT_COVER,
- "cover3D" : IMG_FRONT_3D_COVER,
- "coverfullHQ": IMG_FULL,
- "disc" : IMG_DISC
- }
- class GameTdbScraper(ScraperBase):
- __BASE_URL = "https://www.gametdb.com/Wii/{id6}"
- def __init__(self, requestHandler):
- ScraperBase.__init__(self, requestHandler)
- def getGameImageLinks(self, gameId6Str):
- """ Extract the links to downloadable images from gametdb html
- """
- images = []
- # Do the request
- url = self.__BASE_URL.format(id6=gameId6Str)
- try:
- response = self.request.get(url)
- except requests.ConnectionError as err:
- raise ScraperError(err)
- if response.status_code != 200:
- raise ScraperNotFoundError("Game page with id6 '{}' not found".format(gameId6Str))
- html = response.text
- del response
- # Parse the html
- soup = BeautifulSoup(html, "lxml")
- contentElem = soup.find("div", {"id": "wikitext"})
- if not contentElem:
- print("Content element not found")
- # TODO error passing?
- return images
- # Get all 'img' elements with a 'a' element as parent and our game id is
- # present in the 'href'.
- imageElems = contentElem.find_all("img")
- for imageElem in imageElems:
- link = None
- parent = imageElem.parent
- # The 'img' element doesn't have a parent element.
- if parent is None:
- continue
- # Parent a 'a' element, we might find larger images here
- if parent.name == "a":
- link = parent.get("href", "")
- if gameId6Str not in link:
- continue
- # The parent isn't a 'a' element
- # Check if it is still contains a image we want (like disc and front cover images)
- else:
- imgHref = imageElem.get("src", "")
- if gameId6Str in imgHref:
- link = imgHref
- else:
- continue
- """
- Parse values from the url
- =========================
- Example url: https://art.gametdb.com/wii/cover3D/NL/RMCP01.png?1234
- ___________/ / /
- / __________/ /
- / / /
- / / /
- We will parse the console, image type and language, the gameid is already
- known by us.
- Consoles
- --------
- wii (this includes gamecube)
- Image types
- -----------
- cover3D
- cover3D2
- cover3DB
- cover3DB2
- coverfullHQ
- coverfullHQ2
- coverfullHQB
- coverfullHQB2
- """
- parsedLink = urlparse(link)
- splittedPath = parsedLink.path.split("/")
- if len(splittedPath) != 5:
- print("Error on link", link)
- # TODO error passing?
- continue
- parsedLinkSegments = parsedLink.path.split("/")
- #console = parsedLinkSegments[1]
- imgType = parsedLinkSegments[2]
- language = parsedLinkSegments[3]
- if imgType not in IMG_MAP:
- continue
- gameImgLink = GameImageLink(link, IMG_MAP[imgType], language)
- images.append(gameImgLink)
- return images
|