|
@@ -0,0 +1,61 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+
|
|
|
+import scrapy
|
|
|
+import re
|
|
|
+
|
|
|
+
|
|
|
+class LoginSpider(scrapy.Spider):
|
|
|
+ name = 'uk-driver-spider'
|
|
|
+ login_url = 'https://driverpracticaltest.direct.gov.uk/login'
|
|
|
+ core_url = 'https://driverpracticaltest.direct.gov.uk'
|
|
|
+ start_urls = [login_url]
|
|
|
+
|
|
|
+ def parse(self, response):
|
|
|
+
|
|
|
+ # Get username and password.
|
|
|
+ #
|
|
|
+ with open('./username', 'r') as f:
|
|
|
+ username = f.read()
|
|
|
+ with open('./password', 'r') as f:
|
|
|
+ password = f.read()
|
|
|
+
|
|
|
+ self.logger.info('\nUSER {}PASS {}'.format(username, password))
|
|
|
+ return scrapy.FormRequest.from_response(
|
|
|
+ response,
|
|
|
+ formdata={'username': username, 'password': password},
|
|
|
+ callback=self.after_login
|
|
|
+ )
|
|
|
+
|
|
|
+ def after_login(self, response):
|
|
|
+ # Check login succeed before going on.
|
|
|
+ #
|
|
|
+ if 'authentication failed' in response.body:
|
|
|
+ self.logger.error('Login failed')
|
|
|
+ return
|
|
|
+
|
|
|
+ # Obtain the csrftoken.
|
|
|
+ #
|
|
|
+ href_w_token = response.xpath('//a[@id="date-time-change"]').extract()
|
|
|
+ token = re.search(r'csrftoken=[\w]+', str(href_w_token)).group(0).split('=')[1]
|
|
|
+ self.logger.info(token)
|
|
|
+
|
|
|
+ url = self.core_url + response.xpath('//a[@id="date-time-change"]/@href').extract_first()
|
|
|
+ self.logger.info(url)
|
|
|
+
|
|
|
+ yield scrapy.Request(url, callback=self.after_change)
|
|
|
+
|
|
|
+ def after_change(self, response):
|
|
|
+ return scrapy.FormRequest.from_response(
|
|
|
+ response,
|
|
|
+ callback=self.parse_calendar
|
|
|
+ )
|
|
|
+
|
|
|
+ def parse_calendar(self, response):
|
|
|
+ earliest_date = response.xpath('//td[contains(@class, "bookable")]//a/@href').extract_first()[6:]
|
|
|
+
|
|
|
+ yield{
|
|
|
+ 'date': earliest_date
|
|
|
+ }
|
|
|
+
|
|
|
+# EOF
|
|
|
+# vim: set tabstop=4 shiftwidth=4 expandtab :
|