CYBERDEViL
/
Wiizard


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
							########################################################################
#  Wiizard - A Wii games manager
#  Copyright (C) 2023  CYBERDEViL
#
#  This file is part of Wiizard.
#
#  Wiizard is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  Wiizard is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
########################################################################


import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse

from wiizard.scrapers.common import (
  ScraperBase,
  GameImageLink,
  IMG_FRONT_COVER,
  IMG_FRONT_3D_COVER,
  IMG_FULL,
  IMG_DISC,
  ScraperError,
  ScraperNotFoundError
)


IMG_MAP = {
  "cover"      : IMG_FRONT_COVER,
  "cover3D"    : IMG_FRONT_3D_COVER,
  "coverfullHQ": IMG_FULL,
  "disc"       : IMG_DISC
}


class GameTdbScraper(ScraperBase):
  __BASE_URL = "https://www.gametdb.com/Wii/{id6}"

  def __init__(self, requestHandler):
    ScraperBase.__init__(self, requestHandler)

  def getGameImageLinks(self, gameId6Str):
    """ Extract the links to downloadable images from gametdb html
    """
    images = []

    # Do the request
    url = self.__BASE_URL.format(id6=gameId6Str)

    try:
      response = self.request.get(url)
    except requests.ConnectionError as err:
      raise ScraperError(err)

    if response.status_code != 200:
      raise ScraperNotFoundError("Game page with id6 '{}' not found".format(gameId6Str))

    html = response.text
    del response

    # Parse the html
    soup = BeautifulSoup(html, "lxml")

    contentElem = soup.find("div", {"id": "wikitext"})
    if not contentElem:
      print("Content element not found")
      # TODO error passing?
      return images

    # Get all 'img' elements with a 'a' element as parent and our game id is
    # present in the 'href'.
    imageElems = contentElem.find_all("img")
    for imageElem in imageElems:
      link   = None
      parent = imageElem.parent

      # The 'img' element doesn't have a parent element.
      if parent is None:
        continue

      # Parent a 'a' element, we might find larger images here
      if parent.name == "a":
        link = parent.get("href", "")
        if gameId6Str not in link:
          continue

      # The parent isn't a 'a' element
      # Check if it is still contains a image we want (like disc and front cover images)
      else:
        imgHref = imageElem.get("src", "")
        if gameId6Str in imgHref:
          link = imgHref
        else:
          continue

      """
      Parse values from the url
      =========================

        Example url: https://art.gametdb.com/wii/cover3D/NL/RMCP01.png?1234
                                 ___________/     /     /
                                /      __________/     /
                               /      /               /
                              /      /               /
        We will parse the console, image type and language, the gameid is already
        known by us.


      Consoles
      --------

        wii (this includes gamecube)


      Image types
      -----------

        cover3D
        cover3D2
        cover3DB
        cover3DB2
        coverfullHQ
        coverfullHQ2
        coverfullHQB
        coverfullHQB2
      """

      parsedLink   = urlparse(link)
      splittedPath = parsedLink.path.split("/")
      if len(splittedPath) != 5:
        print("Error on link", link)
        # TODO error passing?
        continue

      parsedLinkSegments = parsedLink.path.split("/")
      #console  = parsedLinkSegments[1]
      imgType  = parsedLinkSegments[2]
      language = parsedLinkSegments[3]

      if imgType not in IMG_MAP:
        continue

      gameImgLink = GameImageLink(link, IMG_MAP[imgType], language)
      images.append(gameImgLink)

    return images