5 Commits fd6ce12c82 ... dad2688deb

Author SHA1 Message Date
  Kirill Kupriyanov dad2688deb Use calendar to retrieve the earliest date. 6 years ago
  Kirill Kupriyanov 718393b0a5 Edit gitignore to ignore __init__.py. 6 years ago
  Kirill Kupriyanov 99bcfc3873 Web spider for retrieving earliest date. 6 years ago
  Kirill Kupriyanov 1aa2433409 Email login and password security files. 6 years ago
  Kirill Kupriyanov 5d7af2a08a Username, password security files. 6 years ago
10 changed files with 129 additions and 0 deletions
  1. 2 0
      .gitignore
  2. 33 0
      daemon_process.py
  3. 3 0
      date.json
  4. 61 0
      driver_spider.py
  5. 0 0
      email_login
  6. 0 0
      email_password
  7. 22 0
      notify.py
  8. 0 0
      password
  9. 8 0
      run_spider.sh
  10. 0 0
      send_to

+ 2 - 0
.gitignore

@@ -58,3 +58,5 @@ docs/_build/
 # PyBuilder
 target/
 
+__init__.py
+

+ 33 - 0
daemon_process.py

@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+
+import json
+from notify import notify
+from time import sleep
+import os
+
+os.system('./run_spider.sh')
+
+with open('./date.json') as json_data:
+    file_content = json.load(json_data)
+
+curr_date = file_content[0]['date']
+
+
+while True:
+    os.system('./run_spider.sh')
+    with open('./date.json') as json_data:
+        file_content = json.load(json_data)
+
+    new_date = file_content[0]['date']
+
+    if new_date != curr_date:
+        notify(new_date)
+        curr_date = new_date
+
+    # Sleep for 30 min.
+    #
+    sleep(1800)
+
+
+# EOF
+# vim: set tabstop=4 shiftwidth=4 expandtab :

+ 3 - 0
date.json

@@ -0,0 +1,3 @@
+[
+{"date": "2018-06-04"}
+]

+ 61 - 0
driver_spider.py

@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import scrapy
+import re
+
+
+class LoginSpider(scrapy.Spider):
+    name = 'uk-driver-spider'
+    login_url = 'https://driverpracticaltest.direct.gov.uk/login'
+    core_url = 'https://driverpracticaltest.direct.gov.uk'
+    start_urls = [login_url]
+
+    def parse(self, response):
+
+        # Get username and password.
+        #
+        with open('./username', 'r') as f:
+            username = f.read()
+        with open('./password', 'r') as f:
+            password = f.read()
+
+        self.logger.info('\nUSER {}PASS {}'.format(username, password))
+        return scrapy.FormRequest.from_response(
+            response,
+            formdata={'username': username, 'password': password},
+            callback=self.after_login
+        )
+
+    def after_login(self, response):
+        # Check login succeed before going on.
+        #
+        if 'authentication failed' in response.body:
+            self.logger.error('Login failed')
+            return
+
+        # Obtain the csrftoken.
+        #
+        href_w_token = response.xpath('//a[@id="date-time-change"]').extract()
+        token = re.search(r'csrftoken=[\w]+', str(href_w_token)).group(0).split('=')[1]
+        self.logger.info(token)
+
+        url = self.core_url + response.xpath('//a[@id="date-time-change"]/@href').extract_first()
+        self.logger.info(url)
+
+        yield scrapy.Request(url, callback=self.after_change)
+
+    def after_change(self, response):
+        return scrapy.FormRequest.from_response(
+            response,
+            callback=self.parse_calendar
+        )
+
+    def parse_calendar(self, response):
+        earliest_date = response.xpath('//td[contains(@class, "bookable")]//a/@href').extract_first()[6:]
+
+        yield{
+            'date': earliest_date
+        }
+
+# EOF
+# vim: set tabstop=4 shiftwidth=4 expandtab :

+ 0 - 0
email_login


+ 0 - 0
email_password


+ 22 - 0
notify.py

@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+
+def notify(message):
+
+    import smtplib
+
+    with open('./email_login', 'r') as f:
+        username = f.read()
+    with open('./email_password', 'r') as f:
+        password = f.read()
+    with open('./send_to', 'r') as f:
+        send_to = f.read()
+
+    server = smtplib.SMTP('smtp.gmail.com', 587)
+    server.starttls()
+    server.login(username, password)
+    server.sendmail(username, send_to, message)
+    server.quit()
+
+# EOF
+# vim: set tabstop=4 shiftwidth=4 expandtab :

+ 0 - 0
password


+ 8 - 0
run_spider.sh

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+rm ./date.json
+scrapy runspider ./driver_spider.py -o ./date.json
+
+
+# EOF
+# vim: set tabstop=4 shiftwidth=4 expandtab :

+ 0 - 0
send_to


Some files were not shown because too many files changed in this diff