123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- #!/bin/sh
- # hostsupdate will download lists of domains from various sources
- # and convert them to a format suitable for an /etc/hosts file
- # ultimately overwriting said file.
- # if you want to keep your hosts file's header intact, create /etc/hosts.head
- # which will be prepended to the resulting hosts file
- # otherwise a sane default version will be used.
- minlines=100000 # if the end result has less than that many entries, do not overwrite /etc/hosts
- prepend="0.0.0.0" # some people prefer 127.0.0.1
- uassets="https://github.com/uBlockOrigin/uAssets/archive/refs/heads/master.tar.gz"
- metalist="https://v.firebog.net/hosts/lists.php?type=tick"
- extra_urls="https://malware-filter.gitlab.io/malware-filter/phishing-filter-hosts.txt
- https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts.txt
- https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling-porn-social/hosts"
- head="/etc/hosts.head"
- hosts="/etc/hosts"
- for dep in mktemp tar grep sed sort curl; do
- type $dep >/dev/null || exit 1
- done
- tmpdir="$(mktemp -d)"
- cleanup() {
- cd -
- rm -r "$tmpdir"
- }
- trap cleanup EXIT
- cd "$tmpdir" || exit 1
- [ -r "$head" ] && cp "$head" head || {
- printf '%s\n' "# $0 $(date)" "127.0.0.1 localhost.localdomain localhost" "::1 localhost6.localdomain6 localhost6 ip6-localhost" > head
- }
- curlopts="--silent --retry 10 --retry-connrefused --retry-delay 5 --location"
- # Download & Extract what we need only
- dir='*u?ssets*'
- echo "Downloading $uassets"
- curl ${curlopts} "$uassets" -o - | tar xzf - "$dir/thirdparties/" "$dir/filters/" --exclude "$dir/thirdparties/*publicsuffix*"
- find $dir -type f -name '*\.md' -delete
- echo "uAssets: search for AdBlock formatted entries (hosts only)"
- regex='^\|\|[a-zA-Z0-9\-\.]*\^[[:space:]]*$'
- grep -rhE "$regex" $dir/ | tr -d '|^' > list-uassets
- echo "uAssets: search for domain only entries, 1 per line"
- regex='^[[:space:]]*[a-zA-Z0-9][a-zA-Z0-9\-\.]*[a-zA-Z0-9][[:space:]]*$'
- grep -rhE "$regex" $dir/ >> list-uassets
- echo "uAssets: search for hosts file syntax entries"
- regex='^[[:space:]]*[0-9]\+\.0\.0\.[01][[:space:]]\+[a-zA-Z0-9][a-zA-Z0-9\-\.]*[a-zA-Z0-9][[:space:]]*$'
- grep -rhE "$regex" $dir/ | sed 's/^[[:space:]]*[0-9]\+\.0\.0\.[01][[:space:]]\+//g' >> list-uassets
- rm -r $dir/
- #########################################
- curl ${curlopts} "$metalist" -o meta.list
- echo "$extraurls" >> meta.list
- i=0
- while read url; do
- [ "${url%%:*}" = http ] || [ "${url%%:*}" = https ] || continue
- echo "Downloading $url"
- curl ${curlopts} "$url" >>list-meta
- done <meta.list
- echo "list-meta: removing unwanted stuff"
- sed -i '
- /^#.*/d
- s/^127\.0\.0\.1[[:space:]]*//g
- s/0\.0\.0\.0[[:space:]]*//g
- s/::1[[:space:]]*//g
- /^0\.0\.0\.0[[:space:]]*$/d
- /^127\.0\.0\.1[[:space:]]*$/d
- /^localhost[[:space:]]*$/d
- /^Malvertising list/d
- /^malware-check.disconnect.me/d
- s/#.*//g
- s/\^.*//g
- s/||//g
- s/[[:space:]]*//g
- /^$/d
- ' list-meta
- echo "Unique-sorting all lists"
- sort -bfiuo hosts.new list-meta list-uassets
- rm list-meta list-uassets meta.list
- lines="$(sed -n '$=' hosts.new)"
- [ "$lines" -lt "$minlines" ] && {
- echo "Not overwriting $hosts with less than $minlines unique entries ($lines)"
- exit 1
- }
- echo "Prepending $prepend..."
- sed -i "s/^/$prepend /g" hosts.new
- cat head hosts.new > hosts && mv -f hosts "$hosts"
|