url-queue.el 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. ;;; url-queue.el --- Fetching web pages in parallel
  2. ;; Copyright (C) 2011-2012 Free Software Foundation, Inc.
  3. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
  4. ;; Keywords: comm
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; The point of this package is to allow fetching web pages in
  18. ;; parallel -- but control the level of parallelism to avoid DoS-ing
  19. ;; web servers and Emacs.
  20. ;;; Code:
  21. (eval-when-compile (require 'cl))
  22. (require 'browse-url)
  23. (require 'url-parse)
  24. (defcustom url-queue-parallel-processes 6
  25. "The number of concurrent processes."
  26. :version "24.1"
  27. :type 'integer
  28. :group 'url)
  29. (defcustom url-queue-timeout 5
  30. "How long to let a job live once it's started (in seconds)."
  31. :version "24.1"
  32. :type 'integer
  33. :group 'url)
  34. ;;; Internal variables.
  35. (defvar url-queue nil)
  36. (defstruct url-queue
  37. url callback cbargs silentp
  38. buffer start-time pre-triggered
  39. inhibit-cookiesp)
  40. ;;;###autoload
  41. (defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies)
  42. "Retrieve URL asynchronously and call CALLBACK with CBARGS when finished.
  43. This is like `url-retrieve' (which see for details of the arguments),
  44. but with limits on the degree of parallelism. The variable
  45. `url-queue-parallel-processes' sets the number of concurrent processes.
  46. The variable `url-queue-timeout' sets a timeout."
  47. (setq url-queue
  48. (append url-queue
  49. (list (make-url-queue :url url
  50. :callback callback
  51. :cbargs cbargs
  52. :silentp silent
  53. :inhibit-cookiesp inhibit-cookies))))
  54. (url-queue-setup-runners))
  55. ;; To ensure asynch behaviour, we start the required number of queue
  56. ;; runners from `run-with-idle-timer'. So we're basically going
  57. ;; through the queue in two ways: 1) synchronously when a program
  58. ;; calls `url-queue-retrieve' (which will then start the required
  59. ;; number of queue runners), and 2) at the exit of each job, which
  60. ;; will then not start any further threads, but just reuse the
  61. ;; previous "slot".
  62. (defun url-queue-setup-runners ()
  63. (let ((running 0)
  64. waiting)
  65. (dolist (entry url-queue)
  66. (cond
  67. ((or (url-queue-start-time entry)
  68. (url-queue-pre-triggered entry))
  69. (incf running))
  70. ((not waiting)
  71. (setq waiting entry))))
  72. (when (and waiting
  73. (< running url-queue-parallel-processes))
  74. (setf (url-queue-pre-triggered waiting) t)
  75. (run-with-idle-timer 0.01 nil 'url-queue-run-queue))))
  76. (defun url-queue-run-queue ()
  77. (url-queue-prune-old-entries)
  78. (let ((running 0)
  79. waiting)
  80. (dolist (entry url-queue)
  81. (cond
  82. ((url-queue-start-time entry)
  83. (incf running))
  84. ((not waiting)
  85. (setq waiting entry))))
  86. (when (and waiting
  87. (< running url-queue-parallel-processes))
  88. (setf (url-queue-start-time waiting) (float-time))
  89. (url-queue-start-retrieve waiting))))
  90. (defun url-queue-callback-function (status job)
  91. (setq url-queue (delq job url-queue))
  92. (when (and (eq (car status) :error)
  93. (eq (cadr (cadr status)) 'connection-failed))
  94. ;; If we get a connection error, then flush all other jobs from
  95. ;; the host from the queue. This particularly makes sense if the
  96. ;; error really is a DNS resolver issue, which happens
  97. ;; synchronously and totally halts Emacs.
  98. (url-queue-remove-jobs-from-host
  99. (plist-get (nthcdr 3 (cadr status)) :host)))
  100. (url-queue-run-queue)
  101. (apply (url-queue-callback job) (cons status (url-queue-cbargs job))))
  102. (defun url-queue-remove-jobs-from-host (host)
  103. (let ((jobs nil))
  104. (dolist (job url-queue)
  105. (when (equal (url-host (url-generic-parse-url (url-queue-url job)))
  106. host)
  107. (push job jobs)))
  108. (dolist (job jobs)
  109. (url-queue-kill-job job)
  110. (setq url-queue (delq job url-queue)))))
  111. (defun url-queue-start-retrieve (job)
  112. (setf (url-queue-buffer job)
  113. (ignore-errors
  114. (url-retrieve (url-queue-url job)
  115. #'url-queue-callback-function (list job)
  116. (url-queue-silentp job)
  117. (url-queue-inhibit-cookiesp job)))))
  118. (defun url-queue-prune-old-entries ()
  119. (let (dead-jobs)
  120. (dolist (job url-queue)
  121. ;; Kill jobs that have lasted longer than the timeout.
  122. (when (and (url-queue-start-time job)
  123. (> (- (float-time) (url-queue-start-time job))
  124. url-queue-timeout))
  125. (push job dead-jobs)))
  126. (dolist (job dead-jobs)
  127. (url-queue-kill-job job)
  128. (setq url-queue (delq job url-queue)))))
  129. (defun url-queue-kill-job (job)
  130. (when (bufferp (url-queue-buffer job))
  131. (let (process)
  132. (while (setq process (get-buffer-process (url-queue-buffer job)))
  133. (set-process-sentinel process 'ignore)
  134. (ignore-errors
  135. (delete-process process)))))
  136. ;; Call the callback with an error message to ensure that the caller
  137. ;; is notified that the job has failed.
  138. (with-current-buffer
  139. (if (and (bufferp (url-queue-buffer job))
  140. (buffer-live-p (url-queue-buffer job)))
  141. ;; Use the (partially filled) process buffer it it exists.
  142. (url-queue-buffer job)
  143. ;; If not, just create a new buffer, which will probably be
  144. ;; killed again by the caller.
  145. (generate-new-buffer " *temp*"))
  146. (apply (url-queue-callback job)
  147. (cons (list :error (list 'error 'url-queue-timeout
  148. "Queue timeout exceeded"))
  149. (url-queue-cbargs job)))))
  150. (provide 'url-queue)
  151. ;;; url-queue.el ends here