12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """Goodreads (books)
- """
- from urllib.parse import urlencode
- from lxml import html
- from searx.utils import extract_text, eval_xpath, eval_xpath_list
- about = {
- 'website': 'https://www.goodreads.com',
- 'wikidata_id': 'Q2359213',
- 'official_api_documentation': None,
- 'use_official_api': False,
- 'require_api_key': False,
- 'results': 'HTML',
- }
- categories = []
- paging = True
- base_url = "https://www.goodreads.com"
- results_xpath = "//table//tr"
- thumbnail_xpath = ".//img[contains(@class, 'bookCover')]/@src"
- url_xpath = ".//a[contains(@class, 'bookTitle')]/@href"
- title_xpath = ".//a[contains(@class, 'bookTitle')]"
- author_xpath = ".//a[contains(@class, 'authorName')]"
- info_text_xpath = ".//span[contains(@class, 'uitext')]"
- def request(query, params):
- args = {
- 'q': query,
- 'page': params['pageno'],
- }
- params['url'] = f"{base_url}/search?{urlencode(args)}"
- return params
- def response(resp):
- results = []
- dom = html.fromstring(resp.text)
- for result in eval_xpath_list(dom, results_xpath):
- results.append(
- {
- 'url': base_url + extract_text(eval_xpath(result, url_xpath)),
- 'title': extract_text(eval_xpath(result, title_xpath)),
- 'thumbnail': extract_text(eval_xpath(result, thumbnail_xpath)),
- 'content': extract_text(eval_xpath(result, info_text_xpath)),
- 'metadata': extract_text(eval_xpath(result, author_xpath)),
- }
- )
- return results
|