123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """SoundCloud is a German audio streaming service."""
- import re
- from urllib.parse import quote_plus, urlencode
- import datetime
- from dateutil import parser
- from lxml import html
- from searx.network import get as http_get
- about = {
- "website": "ttps://soundcloud.com",
- "wikidata_id": "Q568769",
- "official_api_documentation": "https://developers.soundcloud.com/docs/api/guide",
- "use_official_api": False,
- "require_api_key": False,
- "results": 'JSON',
- }
- categories = ["music"]
- paging = True
- search_url = "https://api-v2.soundcloud.com/search"
- """This is not the official (developer) url, it is the API which is used by the
- HTML frontend of the common WEB site.
- """
- cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
- guest_client_id = ""
- results_per_page = 10
- soundcloud_facet = "model"
- app_locale_map = {
- "de": "de",
- "en": "en",
- "es": "es",
- "fr": "fr",
- "oc": "fr",
- "it": "it",
- "nl": "nl",
- "pl": "pl",
- "szl": "pl",
- "pt": "pt_BR",
- "pap": "pt_BR",
- "sv": "sv",
- }
- def request(query, params):
- # missing attributes: user_id, app_version
- # - user_id=451561-497874-703312-310156
- # - app_version=1740727428
- args = {
- "q": query,
- "offset": (params['pageno'] - 1) * results_per_page,
- "limit": results_per_page,
- "facet": soundcloud_facet,
- "client_id": guest_client_id,
- "app_locale": app_locale_map.get(params["language"].split("-")[0], "en"),
- }
- params['url'] = f"{search_url}?{urlencode(args)}"
- return params
- def response(resp):
- results = []
- data = resp.json()
- for result in data.get("collection", []):
- if result["kind"] in ("track", "playlist"):
- url = result.get("permalink_url")
- if not url:
- continue
- uri = quote_plus(result.get("uri"))
- content = [
- result.get("description"),
- result.get("label_name"),
- ]
- res = {
- "url": url,
- "title": result["title"],
- "content": " / ".join([c for c in content if c]),
- "publishedDate": parser.parse(result["last_modified"]),
- "iframe_src": "https://w.soundcloud.com/player/?url=" + uri,
- "views": result.get("likes_count"),
- }
- thumbnail = result["artwork_url"] or result["user"]["avatar_url"]
- res["thumbnail"] = thumbnail or None
- length = int(result.get("duration", 0) / 1000)
- if length:
- length = datetime.timedelta(seconds=length)
- res["length"] = length
- res["views"] = result.get("playback_count", 0) or None
- res["author"] = result.get("user", {}).get("full_name") or None
- results.append(res)
- return results
- def init(engine_settings=None): # pylint: disable=unused-argument
- global guest_client_id # pylint: disable=global-statement
- guest_client_id = get_client_id()
- def get_client_id() -> str:
- client_id = ""
- url = "https://soundcloud.com"
- resp = http_get(url, timeout=10)
- if not resp.ok:
- logger.error("init: GET %s failed", url)
- return client_id
- tree = html.fromstring(resp.content)
- script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
- app_js_urls = [tag.get("src") for tag in script_tags if tag is not None]
- # extracts valid app_js urls from soundcloud.com content
- for url in app_js_urls[::-1]:
- # gets app_js and search for the client_id
- resp = http_get(url)
- if not resp.ok:
- logger.error("init: app_js GET %s failed", url)
- continue
- cids = cid_re.search(resp.content.decode())
- if cids and len(cids.groups()):
- client_id = cids.groups()[0]
- break
- if client_id:
- logger.info("using client_id '%s' for soundclud queries", client_id)
- else:
- logger.warning("missing valid client_id for soundclud queries")
- return client_id
|