TheFirstVicar
/
driverpracticaltest-uk


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
							#!/usr/bin/env python

import scrapy
import re
from time import sleep


class LoginSpider(scrapy.Spider):
    name = 'uk-driver-spider'
    login_url = 'https://driverpracticaltest.direct.gov.uk/login'
    core_url = 'https://driverpracticaltest.direct.gov.uk'
    start_urls = [login_url]

    def parse(self, response):

        # Get username and password.
        #
        with open('./username', 'r') as f:
            username = f.read()
        with open('./password', 'r') as f:
            password = f.read()

        self.logger.info('\nUSER {}PASS {}'.format(username, password))
        return scrapy.FormRequest.from_response(
            response,
            formdata={'username': username, 'password': password},
            callback=self.after_login
        )

    def after_login(self, response):
        # Simulate stupid human.
        #
        sleep(5)

        # Check login succeed before going on.
        #
        if 'authentication failed' in response.body:
            self.logger.error('Login failed')
            return

        # Obtain the csrftoken.
        #
        href_w_token = response.xpath('//a[@id="date-time-change"]').extract()
        token = re.search(r'csrftoken=[\w]+', str(href_w_token)).group(0).split('=')[1]
        self.logger.info(token)

        url = self.core_url + response.xpath('//a[@id="date-time-change"]/@href').extract_first()
        self.logger.info(url)

        yield scrapy.Request(url, callback=self.after_change)

    def after_change(self, response):
        # Simulate stupid human.
        #
        sleep(4)
        return scrapy.FormRequest.from_response(
            response,
            callback=self.parse_calendar
        )

    def parse_calendar(self, response):
        # Simulate stupid human.
        #
        sleep(6)
        earliest_date = response.xpath('//td[contains(@class, "bookable")]//a/@href').extract_first()[6:]
        yield{
            'date': earliest_date
        }

# EOF
# vim: set tabstop=4 shiftwidth=4 expandtab :