utils.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import requests
  2. import re
  3. from bs4 import BeautifulSoup
  4. from fake_useragent import UserAgent
  5. from time import sleep
  6. headers = {
  7. "Accept": "*/*",
  8. "Connection": "keep-alive",
  9. "Accept-Language": "zh-CN,zh;q=0.8",
  10. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
  11. }
  12. session = requests.Session()
  13. def get_source_requests(url, proxy=None, timeout=30):
  14. """
  15. Get the source by requests
  16. """
  17. proxies = {"http": proxy}
  18. ua = UserAgent()
  19. headers["User-Agent"] = ua.random
  20. response = session.get(url, headers=headers, proxies=proxies, timeout=timeout)
  21. source = re.sub(
  22. r"<!--.*?-->",
  23. "",
  24. response.text,
  25. flags=re.DOTALL,
  26. )
  27. return source
  28. def get_soup_requests(url, proxy=None, timeout=30):
  29. """
  30. Get the soup by requests
  31. """
  32. source = get_source_requests(url, proxy, timeout)
  33. soup = BeautifulSoup(source, "html.parser")
  34. return soup
  35. def close_session():
  36. """
  37. Close the requests session
  38. """
  39. session.close()