gametdb.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. ########################################################################
  2. # Wiizard - A Wii games manager
  3. # Copyright (C) 2023 CYBERDEViL
  4. #
  5. # This file is part of Wiizard.
  6. #
  7. # Wiizard is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # Wiizard is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. ########################################################################
  21. import requests
  22. from bs4 import BeautifulSoup
  23. from urllib.parse import urlparse
  24. from wiizard.scrapers.common import (
  25. ScraperBase,
  26. GameImageLink,
  27. IMG_FRONT_COVER,
  28. IMG_FRONT_3D_COVER,
  29. IMG_FULL,
  30. IMG_DISC,
  31. ScraperError,
  32. ScraperNotFoundError
  33. )
  34. IMG_MAP = {
  35. "cover" : IMG_FRONT_COVER,
  36. "cover3D" : IMG_FRONT_3D_COVER,
  37. "coverfullHQ": IMG_FULL,
  38. "disc" : IMG_DISC
  39. }
  40. class GameTdbScraper(ScraperBase):
  41. __BASE_URL = "https://www.gametdb.com/Wii/{id6}"
  42. def __init__(self, requestHandler):
  43. ScraperBase.__init__(self, requestHandler)
  44. def getGameImageLinks(self, gameId6Str):
  45. """ Extract the links to downloadable images from gametdb html
  46. """
  47. images = []
  48. # Do the request
  49. url = self.__BASE_URL.format(id6=gameId6Str)
  50. try:
  51. response = self.request.get(url)
  52. except requests.ConnectionError as err:
  53. raise ScraperError(err)
  54. if response.status_code != 200:
  55. raise ScraperNotFoundError("Game page with id6 '{}' not found".format(gameId6Str))
  56. html = response.text
  57. del response
  58. # Parse the html
  59. soup = BeautifulSoup(html, "lxml")
  60. contentElem = soup.find("div", {"id": "wikitext"})
  61. if not contentElem:
  62. print("Content element not found")
  63. # TODO error passing?
  64. return images
  65. # Get all 'img' elements with a 'a' element as parent and our game id is
  66. # present in the 'href'.
  67. imageElems = contentElem.find_all("img")
  68. for imageElem in imageElems:
  69. link = None
  70. parent = imageElem.parent
  71. # The 'img' element doesn't have a parent element.
  72. if parent is None:
  73. continue
  74. # Parent a 'a' element, we might find larger images here
  75. if parent.name == "a":
  76. link = parent.get("href", "")
  77. if gameId6Str not in link:
  78. continue
  79. # The parent isn't a 'a' element
  80. # Check if it is still contains a image we want (like disc and front cover images)
  81. else:
  82. imgHref = imageElem.get("src", "")
  83. if gameId6Str in imgHref:
  84. link = imgHref
  85. else:
  86. continue
  87. """
  88. Parse values from the url
  89. =========================
  90. Example url: https://art.gametdb.com/wii/cover3D/NL/RMCP01.png?1234
  91. ___________/ / /
  92. / __________/ /
  93. / / /
  94. / / /
  95. We will parse the console, image type and language, the gameid is already
  96. known by us.
  97. Consoles
  98. --------
  99. wii (this includes gamecube)
  100. Image types
  101. -----------
  102. cover3D
  103. cover3D2
  104. cover3DB
  105. cover3DB2
  106. coverfullHQ
  107. coverfullHQ2
  108. coverfullHQB
  109. coverfullHQB2
  110. """
  111. parsedLink = urlparse(link)
  112. splittedPath = parsedLink.path.split("/")
  113. if len(splittedPath) != 5:
  114. print("Error on link", link)
  115. # TODO error passing?
  116. continue
  117. parsedLinkSegments = parsedLink.path.split("/")
  118. #console = parsedLinkSegments[1]
  119. imgType = parsedLinkSegments[2]
  120. language = parsedLinkSegments[3]
  121. if imgType not in IMG_MAP:
  122. continue
  123. gameImgLink = GameImageLink(link, IMG_MAP[imgType], language)
  124. images.append(gameImgLink)
  125. return images