123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- #!/usr/bin/env python
- import scrapy
- import re
- from time import sleep
- class LoginSpider(scrapy.Spider):
- name = 'uk-driver-spider'
- login_url = 'https://driverpracticaltest.direct.gov.uk/login'
- core_url = 'https://driverpracticaltest.direct.gov.uk'
- start_urls = [login_url]
- def parse(self, response):
- # Get username and password.
- #
- with open('./username', 'r') as f:
- username = f.read()
- with open('./password', 'r') as f:
- password = f.read()
- self.logger.info('\nUSER {}PASS {}'.format(username, password))
- return scrapy.FormRequest.from_response(
- response,
- formdata={'username': username, 'password': password},
- callback=self.after_login
- )
- def after_login(self, response):
- # Simulate stupid human.
- #
- sleep(5)
- # Check login succeed before going on.
- #
- if 'authentication failed' in response.body:
- self.logger.error('Login failed')
- return
- # Obtain the csrftoken.
- #
- href_w_token = response.xpath('//a[@id="date-time-change"]').extract()
- token = re.search(r'csrftoken=[\w]+', str(href_w_token)).group(0).split('=')[1]
- self.logger.info(token)
- url = self.core_url + response.xpath('//a[@id="date-time-change"]/@href').extract_first()
- self.logger.info(url)
- yield scrapy.Request(url, callback=self.after_change)
- def after_change(self, response):
- # Simulate stupid human.
- #
- sleep(4)
- return scrapy.FormRequest.from_response(
- response,
- callback=self.parse_calendar
- )
- def parse_calendar(self, response):
- # Simulate stupid human.
- #
- sleep(6)
- earliest_date = response.xpath('//td[contains(@class, "bookable")]//a/@href').extract_first()[6:]
- yield{
- 'date': earliest_date
- }
- # EOF
- # vim: set tabstop=4 shiftwidth=4 expandtab :
|