parse-crawled.scm 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #!/bin/sh
  2. # -*- scheme -*-
  3. exec guile -e main -s "$0" "$@"
  4. !#
  5. ;; Parser to turn a set of downloaded WoT files into a standard graph format.
  6. (use-modules (web request)
  7. (web client)
  8. (web response)
  9. (web uri)
  10. (web http)
  11. (ice-9 threads)
  12. (ice-9 vlist)
  13. (ice-9 rdelim)
  14. (rnrs io ports)
  15. (ice-9 match)
  16. (srfi srfi-42)
  17. (srfi srfi-1)
  18. (rnrs bytevectors)
  19. (sxml simple)
  20. (sxml match)
  21. (ice-9 ftw))
  22. (define (non-breaking-sxml-reader xml-port)
  23. (catch #t
  24. (lambda () (xml->sxml xml-port))
  25. (lambda (key . args) (format #t "~A: ~A" key args)(newline) '())))
  26. (define (wot-uri-key uri)
  27. (let ((index (string-index uri #\/)))
  28. (if index
  29. (string-take uri index)
  30. uri))) ;; no / in uri, so it is already a key.
  31. (define (wot-file-key filename)
  32. (let* ((pubkey-identifier ",AQACAAE")
  33. (index (string-contains filename pubkey-identifier)))
  34. (if index
  35. (string-take filename (+ index (string-length pubkey-identifier)))
  36. filename)))
  37. (define (parse-trust-values filename)
  38. (let* ((port (open-input-file filename))
  39. (sxml (non-breaking-sxml-reader port))
  40. (closed (close-port port))
  41. (trust '()))
  42. (let extract-trust ((sxml sxml))
  43. (match sxml
  44. (('Trust ('@ ('Value value) ('Identity uri) rest ...))
  45. (set! trust
  46. (cons (cons (wot-uri-key uri)
  47. (string->number value))
  48. trust)))
  49. ((a b ...)
  50. (map extract-trust sxml))
  51. (else '())))
  52. (cons (wot-file-key filename) trust)))
  53. (define* (trust-lists->csv trusts #:key (target-filename #f))
  54. "Format the list of trust lists as csv file.
  55. See https://gephi.org/users/supported-graph-formats/csv-format/
  56. steps:
  57. - trusting ids: map car trusts
  58. - additional ids: every ID not in trusting ids.
  59. Source;Target;Weight
  60. A;B;0
  61. B;A;1
  62. "
  63. (let ((port (if target-filename
  64. (open-output-file target-filename)
  65. (current-output-port)))
  66. (ids (map car trusts)))
  67. (display "Source;Target;Weight" port)
  68. (newline port)
  69. (let write-edges ((trusts trusts))
  70. (cond ((null? trusts) #t)
  71. (else
  72. (let* ((id (car (car trusts)))
  73. (trusted (cdr (car trusts))))
  74. (let write-trust ((trusted trusted))
  75. (cond ((null? trusted) #t)
  76. (else
  77. (let* ((edge (car trusted))
  78. (trustee (car edge))
  79. (weight (cdr edge)))
  80. (when weight ; avoid stumbling over incorrectly formatted trust values
  81. (format port "~A;~A;~f\n" id trustee weight))
  82. (write-trust (cdr trusted)))))))
  83. (write-edges (cdr trusts)))))
  84. ; (write (car trusts))
  85. ; (newline)
  86. (when target-filename (close-port port))))
  87. (define (main args)
  88. (let ((dir (if (null? (cdr args))
  89. "."
  90. (car (cdr args)))))
  91. (let* ((select? (lambda (x) (or (equal? x ".") (string-prefix? "USK@" x))))
  92. (files (cdr (scandir dir select?))))
  93. (trust-lists->csv
  94. (par-map parse-trust-values files)
  95. #:target-filename "trust.csv"))))