adobe_stock.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """`Adobe Stock`_ is a service that gives access to millions of royalty-free
  3. assets. Assets types include photos, vectors, illustrations, templates, 3D
  4. assets, videos, motion graphics templates and audio tracks.
  5. .. Adobe Stock: https://stock.adobe.com/
  6. Configuration
  7. =============
  8. The engine has the following mandatory setting:
  9. - SearXNG's :ref:`engine categories`
  10. - Adobe-Stock's :py:obj:`adobe_order`
  11. - Adobe-Stock's :py:obj:`adobe_content_types`
  12. .. code:: yaml
  13. - name: adobe stock
  14. engine: adobe_stock
  15. shortcut: asi
  16. categories: [images]
  17. adobe_order: relevance
  18. adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"]
  19. - name: adobe stock video
  20. engine: adobe_stock
  21. network: adobe stock
  22. shortcut: asi
  23. categories: [videos]
  24. adobe_order: relevance
  25. adobe_content_types: ["video"]
  26. Implementation
  27. ==============
  28. """
  29. from __future__ import annotations
  30. from typing import TYPE_CHECKING
  31. from datetime import datetime, timedelta
  32. from urllib.parse import urlencode
  33. import isodate
  34. if TYPE_CHECKING:
  35. import logging
  36. logger: logging.Logger
  37. about = {
  38. "website": "https://stock.adobe.com/",
  39. "wikidata_id": "Q5977430",
  40. "official_api_documentation": None,
  41. "use_official_api": False,
  42. "require_api_key": False,
  43. "results": "JSON",
  44. }
  45. categories = []
  46. paging = True
  47. send_accept_language_header = True
  48. results_per_page = 10
  49. base_url = "https://stock.adobe.com"
  50. adobe_order: str = ""
  51. """Sort order, can be one of:
  52. - ``relevance`` or
  53. - ``featured`` or
  54. - ``creation`` (most recent) or
  55. - ``nb_downloads`` (number of downloads)
  56. """
  57. ADOBE_VALID_TYPES = ["photo", "illustration", "zip_vector", "video", "template", "3d", "audio", "image"]
  58. adobe_content_types: list = []
  59. """A list of of content types. The following content types are offered:
  60. - Images: ``image``
  61. - Videos: ``video``
  62. - Templates: ``template``
  63. - 3D: ``3d``
  64. - Audio ``audio``
  65. Additional subcategories:
  66. - Photos: ``photo``
  67. - Illustrations: ``illustration``
  68. - Vectors: ``zip_vector`` (Vectors),
  69. """
  70. # Do we need support for "free_collection" and "include_stock_enterprise"?
  71. def init(_):
  72. if not categories:
  73. raise ValueError("adobe_stock engine: categories is unset")
  74. # adobe_order
  75. if not adobe_order:
  76. raise ValueError("adobe_stock engine: adobe_order is unset")
  77. if adobe_order not in ["relevance", "featured", "creation", "nb_downloads"]:
  78. raise ValueError(f"unsupported adobe_order: {adobe_order}")
  79. # adobe_content_types
  80. if not adobe_content_types:
  81. raise ValueError("adobe_stock engine: adobe_content_types is unset")
  82. if isinstance(adobe_content_types, list):
  83. for t in adobe_content_types:
  84. if t not in ADOBE_VALID_TYPES:
  85. raise ValueError("adobe_stock engine: adobe_content_types: '%s' is invalid" % t)
  86. else:
  87. raise ValueError(
  88. "adobe_stock engine: adobe_content_types must be a list of strings not %s" % type(adobe_content_types)
  89. )
  90. def request(query, params):
  91. args = {
  92. "k": query,
  93. "limit": results_per_page,
  94. "order": adobe_order,
  95. "search_page": params["pageno"],
  96. "search_type": "pagination",
  97. }
  98. for content_type in ADOBE_VALID_TYPES:
  99. args[f"filters[content_type:{content_type}]"] = 1 if content_type in adobe_content_types else 0
  100. params["url"] = f"{base_url}/de/Ajax/Search?{urlencode(args)}"
  101. # headers required to bypass bot-detection
  102. if params["searxng_locale"] == "all":
  103. params["headers"]["Accept-Language"] = "en-US,en;q=0.5"
  104. return params
  105. def parse_image_item(item):
  106. return {
  107. "template": "images.html",
  108. "url": item["content_url"],
  109. "title": item["title"],
  110. "content": item["asset_type"],
  111. "img_src": item["content_thumb_extra_large_url"],
  112. "thumbnail_src": item["thumbnail_url"],
  113. "resolution": f"{item['content_original_width']}x{item['content_original_height']}",
  114. "img_format": item["format"],
  115. "author": item["author"],
  116. }
  117. def parse_video_item(item):
  118. # in video items, the title is more or less a "content description", we try
  119. # to reduce the lenght of the title ..
  120. title = item["title"]
  121. content = ""
  122. if "." in title.strip()[:-1]:
  123. content = title
  124. title = title.split(".", 1)[0]
  125. elif "," in title:
  126. content = title
  127. title = title.split(",", 1)[0]
  128. elif len(title) > 50:
  129. content = title
  130. title = ""
  131. for w in content.split(" "):
  132. title += f" {w}"
  133. if len(title) > 50:
  134. title = title.strip() + "\u2026"
  135. break
  136. return {
  137. "template": "videos.html",
  138. "url": item["content_url"],
  139. "title": title,
  140. "content": content,
  141. # https://en.wikipedia.org/wiki/ISO_8601#Durations
  142. "length": isodate.parse_duration(item["time_duration"]),
  143. "publishedDate": datetime.strptime(item["creation_date"], "%Y-%m-%d"),
  144. "thumbnail": item["thumbnail_url"],
  145. "iframe_src": item["video_small_preview_url"],
  146. "metadata": item["asset_type"],
  147. }
  148. def parse_audio_item(item):
  149. audio_data = item["audio_data"]
  150. content = audio_data.get("description") or ""
  151. if audio_data.get("album"):
  152. content = audio_data["album"] + " - " + content
  153. return {
  154. "url": item["content_url"],
  155. "title": item["title"],
  156. "content": content,
  157. # "thumbnail": base_url + item["thumbnail_url"],
  158. "iframe_src": audio_data["preview"]["url"],
  159. "publishedDate": datetime.fromisoformat(audio_data["release_date"]) if audio_data["release_date"] else None,
  160. "length": timedelta(seconds=round(audio_data["duration"] / 1000)) if audio_data["duration"] else None,
  161. "author": item.get("artist_name"),
  162. }
  163. def response(resp):
  164. results = []
  165. json_resp = resp.json()
  166. if isinstance(json_resp["items"], list):
  167. return None
  168. for item in json_resp["items"].values():
  169. if item["asset_type"].lower() in ["image", "premium-image", "illustration", "vector"]:
  170. result = parse_image_item(item)
  171. elif item["asset_type"].lower() == "video":
  172. result = parse_video_item(item)
  173. elif item["asset_type"].lower() == "audio":
  174. result = parse_audio_item(item)
  175. else:
  176. logger.error("no handle for %s --> %s", item["asset_type"], item)
  177. continue
  178. results.append(result)
  179. return results