url-check.yml 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. steps:
  2. - name: check URLs
  3. module: processors/url_check
  4. module_options:
  5. source_directories:
  6. - software
  7. - tags
  8. source_files:
  9. - licenses.yml
  10. errors_are_fatal: True
  11. exclude_regex:
  12. - '^https://github.com/[\w\.\-]+/[\w\.\-]+$' # don't check URLs that will be processed by the github_metadata module
  13. - '^https://retrospring.net/$' # DDoS protection page, always returns 403
  14. - '^https://www.taiga.io/$' # always returns 403 Request forbidden by administrative rules
  15. - '^https://docs.paperless-ngx.com/$' # DDoS protection page, always returns 403
  16. - '^https://demo.paperless-ngx.com/$' # DDoS protection page, always returns 403
  17. - '^https://git.dotclear.org/dev/dotclear$' # DDoS protection page, always returns 403
  18. - '^https://word-mastermind.glitch.me/$' # the demo instance takes a long time to spin up, times out with the default 10s timeout
  19. - '^https://getgrist.com/$' # hecat/python-requests bug? 'Received response with content-encoding: gzip,br, but failed to decode it.'
  20. - '^https://www.uvdesk.com/$' # DDoS protection page, always returns 403
  21. - '^https://demo.uvdesk.com/$' # DDoS protection page, always returns 403
  22. - '^https://notes.orga.cat/$' # DDoS protection page, always returns 403
  23. - '^https://cytu.be$' # DDoS protection page, always returns 403
  24. - '^https://demo.reservo.co/$' # hecat/python-requests bug? always returns 404 but the website works in a browser
  25. - '^https://crates.io/crates/vigil-server$' # hecat/python-requests bug? always returns 404 but the website works in a browser
  26. - '^https://nitter.net$' # always times out from github actions but the website works in a browser
  27. - '^https://demo.memories.gallery/apps/memories/$' # always returns 401
  28. - '^https://tuleap.net/plugins/git/tuleap/tuleap/stable?p=tuleap%2Fstable.git&a=tree' # site takes a while to load, times out with the default 10s timeout
  29. - '^https://www.admidio.org/' # always times out from github actions but the website works in a browser
  30. - '^https://demo.aimeos.org/' # always times out from github actions but the website works in a browser
  31. - '^https://appflowy.io/' # Cannot get certificate but loads fine in browser and shows as secure
  32. - '^https://www.openwebanalytics.com/' # Always returns 403 but loads in browser
  33. - '^https://new-ux.spreecommerce.org/' # always times out from github actions but the website works in a browser
  34. - '^https://www.zenphoto.org/' # always times out from github actions but the website works in a browser
  35. - '^https://alextselegidis.com/try/plainpad/' # shows 200 when curling, loads fine in browser
  36. - '^https://alextselegidis.com/get/plainpad/' # shows 200 when curling, loads fine in browser
  37. - '^http://www.openwebanalytics.com/' # shows 200 when curling, loads fine in browser
  38. - '^https://moodle.org/' # shows 200 when curling, loads fine in browser