driver_spider.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. #!/usr/bin/env python
  2. import scrapy
  3. import re
  4. from time import sleep
  5. class LoginSpider(scrapy.Spider):
  6. name = 'uk-driver-spider'
  7. login_url = 'https://driverpracticaltest.direct.gov.uk/login'
  8. core_url = 'https://driverpracticaltest.direct.gov.uk'
  9. start_urls = [login_url]
  10. def parse(self, response):
  11. # Get username and password.
  12. #
  13. with open('./username', 'r') as f:
  14. username = f.read()
  15. with open('./password', 'r') as f:
  16. password = f.read()
  17. self.logger.info('\nUSER {}PASS {}'.format(username, password))
  18. return scrapy.FormRequest.from_response(
  19. response,
  20. formdata={'username': username, 'password': password},
  21. callback=self.after_login
  22. )
  23. def after_login(self, response):
  24. # Simulate stupid human.
  25. #
  26. sleep(5)
  27. # Check login succeed before going on.
  28. #
  29. if 'authentication failed' in response.body:
  30. self.logger.error('Login failed')
  31. return
  32. # Obtain the csrftoken.
  33. #
  34. href_w_token = response.xpath('//a[@id="date-time-change"]').extract()
  35. token = re.search(r'csrftoken=[\w]+', str(href_w_token)).group(0).split('=')[1]
  36. self.logger.info(token)
  37. url = self.core_url + response.xpath('//a[@id="date-time-change"]/@href').extract_first()
  38. self.logger.info(url)
  39. yield scrapy.Request(url, callback=self.after_change)
  40. def after_change(self, response):
  41. # Simulate stupid human.
  42. #
  43. sleep(4)
  44. return scrapy.FormRequest.from_response(
  45. response,
  46. callback=self.parse_calendar
  47. )
  48. def parse_calendar(self, response):
  49. # Simulate stupid human.
  50. #
  51. sleep(6)
  52. earliest_date = response.xpath('//td[contains(@class, "bookable")]//a/@href').extract_first()[6:]
  53. yield{
  54. 'date': earliest_date
  55. }
  56. # EOF
  57. # vim: set tabstop=4 shiftwidth=4 expandtab :