hostsupdate 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/bin/sh
  2. # hostsupdate will download lists of domains from various sources
  3. # and convert them to a format suitable for an /etc/hosts file
  4. # ultimately overwriting said file.
  5. # if you want to keep your hosts file's header intact, create /etc/hosts.head
  6. # which will be prepended to the resulting hosts file
  7. # otherwise a sane default version will be used.
  8. minlines=100000 # if the end result has less than that many entries, do not overwrite /etc/hosts
  9. prepend="0.0.0.0" # some people prefer 127.0.0.1
  10. uassets="https://github.com/uBlockOrigin/uAssets/archive/refs/heads/master.tar.gz"
  11. metalist="https://v.firebog.net/hosts/lists.php?type=tick"
  12. extra_urls="https://malware-filter.gitlab.io/malware-filter/phishing-filter-hosts.txt
  13. https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts.txt
  14. https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling-porn-social/hosts"
  15. head="/etc/hosts.head"
  16. hosts="/etc/hosts"
  17. for dep in mktemp tar grep sed sort curl; do
  18. type $dep >/dev/null || exit 1
  19. done
  20. tmpdir="$(mktemp -d)"
  21. cleanup() {
  22. cd -
  23. rm -r "$tmpdir"
  24. }
  25. trap cleanup EXIT
  26. cd "$tmpdir" || exit 1
  27. [ -r "$head" ] && cp "$head" head || {
  28. printf '%s\n' "# $0 $(date)" "127.0.0.1 localhost.localdomain localhost" "::1 localhost6.localdomain6 localhost6 ip6-localhost" > head
  29. }
  30. curlopts="--silent --retry 10 --retry-connrefused --retry-delay 5 --location"
  31. # Download & Extract what we need only
  32. dir='*u?ssets*'
  33. echo "Downloading $uassets"
  34. curl ${curlopts} "$uassets" -o - | tar xzf - "$dir/thirdparties/" "$dir/filters/" --exclude "$dir/thirdparties/*publicsuffix*"
  35. find $dir -type f -name '*\.md' -delete
  36. echo "uAssets: search for AdBlock formatted entries (hosts only)"
  37. regex='^\|\|[a-zA-Z0-9\-\.]*\^[[:space:]]*$'
  38. grep -rhE "$regex" $dir/ | tr -d '|^' > list-uassets
  39. echo "uAssets: search for domain only entries, 1 per line"
  40. regex='^[[:space:]]*[a-zA-Z0-9][a-zA-Z0-9\-\.]*[a-zA-Z0-9][[:space:]]*$'
  41. grep -rhE "$regex" $dir/ >> list-uassets
  42. echo "uAssets: search for hosts file syntax entries"
  43. regex='^[[:space:]]*[0-9]\+\.0\.0\.[01][[:space:]]\+[a-zA-Z0-9][a-zA-Z0-9\-\.]*[a-zA-Z0-9][[:space:]]*$'
  44. grep -rhE "$regex" $dir/ | sed 's/^[[:space:]]*[0-9]\+\.0\.0\.[01][[:space:]]\+//g' >> list-uassets
  45. rm -r $dir/
  46. #########################################
  47. curl ${curlopts} "$metalist" -o meta.list
  48. echo "$extraurls" >> meta.list
  49. i=0
  50. while read url; do
  51. [ "${url%%:*}" = http ] || [ "${url%%:*}" = https ] || continue
  52. echo "Downloading $url"
  53. curl ${curlopts} "$url" >>list-meta
  54. done <meta.list
  55. echo "list-meta: removing unwanted stuff"
  56. sed -i '
  57. /^#.*/d
  58. s/^127\.0\.0\.1[[:space:]]*//g
  59. s/0\.0\.0\.0[[:space:]]*//g
  60. s/::1[[:space:]]*//g
  61. /^0\.0\.0\.0[[:space:]]*$/d
  62. /^127\.0\.0\.1[[:space:]]*$/d
  63. /^localhost[[:space:]]*$/d
  64. /^Malvertising list/d
  65. /^malware-check.disconnect.me/d
  66. s/#.*//g
  67. s/\^.*//g
  68. s/||//g
  69. s/[[:space:]]*//g
  70. /^$/d
  71. ' list-meta
  72. echo "Unique-sorting all lists"
  73. sort -bfiuo hosts.new list-meta list-uassets
  74. rm list-meta list-uassets meta.list
  75. lines="$(sed -n '$=' hosts.new)"
  76. [ "$lines" -lt "$minlines" ] && {
  77. echo "Not overwriting $hosts with less than $minlines unique entries ($lines)"
  78. exit 1
  79. }
  80. echo "Prepending $prepend..."
  81. sed -i "s/^/$prepend /g" hosts.new
  82. cat head hosts.new > hosts && mv -f hosts "$hosts"