From 1c83d822d6a1433677d59ded3ff009ff874bdeec Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 9 Dec 2022 10:57:39 -0700 Subject: Filter against cloudflare proxy (not nameservers) The previous approach to filtering cloudflare instances was checking if cloudflare nameservers were assigned to that domain. I believe this was too harsh of a filter, as quite a few instances were removed even though they were not using cloudflare's proxying feature. To solve this, the filtering script has been updated to manually fetch each IP associated with a domain and see if it returns the 1003 error that cloudflare proxied sites return when queried directly. --- tools/un-cloudflare.sh | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/un-cloudflare.sh b/tools/un-cloudflare.sh index dd5c716..dd2f726 100755 --- a/tools/un-cloudflare.sh +++ b/tools/un-cloudflare.sh @@ -7,12 +7,27 @@ file="services-full.json" while read -r line; do if [[ "$line" == "\"https://"* ]]; then domain=$(echo "$line" | sed -e "s/^\"https:\/\///" -e "s/\",//" -e "s/\"//") - ns=$(dig ns "$domain" || true) - if [[ "$ns" == *"cloudflare"* ]]; then - echo "\"$domain\" using cloudflare, skipping..." - elif [[ "$ns" != *"NOERROR"* ]]; then - echo "Unable to verify records for \"$domain\", skipping..." - else + ips=$(dig "$domain" +short || true) + cf=0 + echo "$domain" + + for ip in $ips + do + echo " - $ip" + resp=$(curl -s "$ip") + + # Cloudflare does not allow accessing sites using their IP, + # and returns a 1003 error code when attempting to do so. This + # allows us to check for sites using Cloudflare for proxying, + # rather than just their nameservers. + if [[ "$resp" == *"error code: 1003"* ]]; then + cf=1 + echo " ! Using cloudflare proxy, skipping..." + break + fi + done + + if [ $cf -eq 0 ]; then echo "$line" >> out.json fi else @@ -21,7 +36,7 @@ while read -r line; do done <$file # Remove any trailing commas from new instance lists -sed -i -e ':begin' -e '$!N' -e 's/,\n]/\n]/g' -e 'tbegin' -e 'P' -e 'D' out.json +#sed -i -e ':begin' -e '$!N' -e 's/,\n]/\n]/g' -e 'tbegin' -e 'P' -e 'D' out.json -cat out.json | jq --indent 2 . > services.json -rm -f out.json +#cat out.json | jq --indent 2 . > services.json +#rm -f out.json -- cgit v1.2.3