#!/usr/bin/perl -s # This descends from huepl and TTYtter, and is therefore under the Floodgap # Free Software License. It is not required to build or run Firefox or # TenFourFox. BEGIN { $VERSION = "v0.5"; if ($] >= 5.014000 && $ENV{'PERL_SIGNALS'} ne 'unsafe') { $signals_use_posix = 1; } else { $ENV{'PERL_SIGNALS'} = 'unsafe'; } } $lastexception = 0; @wend = ('-L', '-s', '-m', '20', '-A', '', '-H', 'Expect:'); eval "use POSIX;"; $URL = "https://raw.githubusercontent.com/mozilla-services/shavar-prod-lists/master/disconnect-blacklist.json"; $exit_mode = 0; $stringify_args = sub { my $basecom = shift; my $resource = shift; my $data = shift; my $p; my $l = ''; foreach $p (@_) { if ($p =~ /^-/) { $l .= "\n" if (length($l)); $l .= "$p "; next; } $l .= $p; } $l .= "\n"; # if resource is an arrayref, then it's a GET with URL # and args $resource = join('?', @{ $resource }) if (ref($resource) eq 'ARRAY'); $l .= "url = \"$resource\"\n"; $data =~ s/"/\\"/g; $l .= "data = \"$data\"\n" if length($data); return ("$basecom -K -", $l, undef); }; sub sigify { # this routine abstracts setting signals to a subroutine reference. # check and see if we have to use POSIX.pm (Perl 5.14+) or we can # still use $SIG for proper signalling. We prefer the latter, but # must support the former. my $subref = shift; my $k; if ($signals_use_posix) { my @w; my $sigaction = POSIX::SigAction->new($subref); while ($k = shift) { my $e = &posix_signal_of($k); # some signals may not exist on all systems. next if (!(0+$e)); POSIX::sigaction($e, $sigaction) || die("sigaction failure: $! $@\n"); } } else { while ($k = shift) { $SIG{$k} = $subref; } } } sub posix_signal_of { die("never call posix_signal_of if signals_use_posix is false\n") if (!$signals_use_posix); # this assumes that POSIX::SIG* returns a scalar int value. # not all signals exist on all systems. this ensures zeroes are # returned for locally bogus ones. return 0+(eval("return POSIX::SIG".shift)); } sub parsejson { my $data = shift; my $my_json_ref = undef; # durrr hat go on foot my $i; my $tdata; my $seed; my $bbqqmask; my $ddqqmask; my $ssqqmask; # test for single logicals return { 'ok' => 1, 'result' => (($1 eq 'true') ? 1 : 0), 'literal' => $1, } if ($data =~ /^['"]?(true|false)['"]?$/); # first isolate escaped backslashes with a unique sequence. $bbqqmask = "BBQQ"; $seed = 0; $seed++ while ($data =~ /$bbqqmask$seed/); $bbqqmask .= $seed; $data =~ s/\\\\/$bbqqmask/g; # next isolate escaped quotes with another unique sequence. $ddqqmask = "DDQQ"; $seed = 0; $seed++ while ($data =~ /$ddqqmask$seed/); $ddqqmask .= $seed; $data =~ s/\\\"/$ddqqmask/g; # then turn literal ' into another unique sequence. you'll see # why momentarily. $ssqqmask = "SSQQ"; $seed = 0; $seed++ while ($data =~ /$ssqqmask$seed/); $ssqqmask .= $seed; $data =~ s/\'/$ssqqmask/g; # here's why: we're going to turn doublequoted strings into single # quoted strings to avoid nastiness like variable interpolation. $data =~ s/\"/\'/g; # and then we're going to turn the inline ones all back except # ssqq, which we'll do last so that our syntax checker still works. $data =~ s/$bbqqmask/\\\\/g; $data =~ s/$ddqqmask/"/g; print STDOUT "$data\n" if ($superverbose); # first, generate a syntax tree. $tdata = $data; 1 while $tdata =~ s/'[^']*'//; # empty strings are valid too ... $tdata =~ s/-?[0-9]+\.?[0-9]*([eE][+-][0-9]+)?//g; # have to handle floats *and* their exponents $tdata =~ s/(true|false|null)//g; $tdata =~ s/\s//g; print STDOUT "$tdata\n" if ($superverbose); # now verify the syntax tree. # the remaining stuff should just be enclosed in [ ], and only {}:, # for example, imagine if a bare semicolon were in this ... if ($tdata !~ s/^\[// || $tdata !~ s/\]$// || $tdata =~ /[^{}:,]/) { $tdata =~ s/'[^']*$//; # cut trailing strings if (($tdata =~ /^\[/ && $tdata !~ /\]$/) || ($tdata =~ /^\{/ && $tdata !~ /\}$/)) { # incomplete transmission &exception(10, "*** JSON warning: connection cut\n"); return undef; } if ($tdata =~ /(^|[^:])\[\]($|[^},])/) { # oddity &exception(11, "*** JSON warning: null list\n"); return undef; } # at this point all we should have are structural elements. # if something other than JSON structure is visible, then # the syntax tree is mangled. don't try to run it, it # might be unsafe. if ($tdata =~ /[^\[\]\{\}:,]/) { &exception(99, "*** JSON syntax error\n"); die(<<"EOF"); --- data received --- $data --- syntax tree --- $tdata --- JSON PARSING ABORTED DUE TO SYNTAX TREE FAILURE -- EOF exit; return undef; } } # syntax tree passed, so let's turn it into a Perl reference. # have to turn colons into ,s or Perl will gripe. but INTELLIGENTLY! 1 while ($data =~ s/([^'])'\s*:\s*(true|false|null|\'|\{|\[|-?[0-9])/\1\',\2/); # removing whitespace to improve interpretation speed actually made # it SLOWER. #($data =~ s/'\s*,\s*/',/sg); #($data =~ s/\n\s*//sg); # finally, single quotes, just before interpretation. $data =~ s/$ssqqmask/\\'/g; # now somewhat validated, so safe (?) to eval() into a Perl struct eval "\$my_json_ref = $data;"; print STDOUT "$data => $my_json_ref $@\n" if ($superverbose); # do a sanity check if (!defined($my_json_ref)) { &exception(99, "*** JSON syntax error\n"); print STDOUT <<"EOF" if ($verbose); --- data received --- $data --- syntax tree --- $tdata --- JSON PARSING FAILED -- $@ --- JSON PARSING FAILED -- EOF } return $my_json_ref; } sub backticks { # more efficient/flexible backticks system my $comm = shift; my $rerr = shift; my $rout = shift; my $resource = shift; my $data = shift; my $dont_do_auth = shift; my $buf = ''; my $undersave = $_; my $pid; my $args; ($comm, $args, $data) = &$stringify_args($comm, $resource, $data, $dont_do_auth, @_); print STDOUT "$comm\n$args\n$data\n" if ($superverbose); if(open(BACTIX, '-|')) { while() { $buf .= $_; } close(BACTIX); $_ = $undersave; return $buf; # and $? is still in $? } else { $in_backticks = 1; &sigify(sub { die( "** user agent not honouring timeout (caught by sigalarm)\n"); }, qw(ALRM)); alarm 120; # this should be sufficient if (length($rerr)) { close(STDERR); open(STDERR, ">$rerr"); } if (length($rout)) { close(STDOUT); open(STDOUT, ">$rout"); } if(open(FRONTIX, "|$comm")) { print FRONTIX "$args\n"; print FRONTIX "$data" if (length($data)); close(FRONTIX); } else { die( "backticks() failure for $comm $rerr $rout @_: $!\n"); } $rv = $? >> 8; exit $rv; } } sub wherecheck { my ($prompt, $filename, $fatal) = (@_); my (@paths) = split(/\:/, $ENV{'PATH'}); my $setv = ''; push(@paths, '/usr/bin'); # the usual place @paths = ('') if ($filename =~ m#^/#); # for absolute paths foreach(@paths) { if (-r "$_/$filename") { $setv = "$_/$filename"; 1 while $setv =~ s#//#/#; last; } } if (!length($setv)) { die ($fatal) if ($fatal); exit(1); } return $setv; } sub url_oauth_sub { my $x = shift; $x =~ s/([^-0-9a-zA-Z._~])/"%".uc(unpack("H*",$1))/eg; return $x; } # this is a sucky nonce generator. I was looking for an awesome nonce # generator, and then I realized it would only be used once, so who cares? # *rimshot* sub generate_nonce { unpack("H32", pack("u", rand($$).$$.time())); } sub exception { my ($num, $tex) = (@_); $lastexception = $num; print STDOUT "$tex" if ($verbose); } sub grabjson { my $url = shift; my $no_auth = shift; my $data; chomp($data = &backticks($curl, '/dev/null', undef, $url, undef, $no_auth, @wind)); return &genericnetworkjson($data, $url, $no_auth); } sub postjson { my $url = shift; my $postdata = shift; my $no_auth = shift; my $data; chomp($data = &backticks($curl, '/dev/null', undef, $url, $postdata, $no_auth, @wend)); return &genericnetworkjson($data, $url, $no_auth); } sub putjson { my $url = shift; my $postdata = shift; my $no_auth = shift; my $data; chomp($data = &backticks($curl, '/dev/null', undef, $url, $postdata, $no_auth, @wund)); return &genericnetworkjson($data, $url, $no_auth); } sub genericnetworkjson { my $data = shift; my $url = shift; my $no_auth = shift; my $my_json_ref = undef; # durrr hat go on foot my $i; my $tdata; my $seed; my $k = $? >> 8; $data =~ s/[\r\l\n\s]*$//s; $data =~ s/^[\r\l\n\s]*//s; if (!length($data) || $k == 28 || $k == 7 || $k == 35) { &exception(1, "*** warning: timeout or no data\n"); return undef; } if ($k > 0) { &exception(4, "*** warning: unexpected error code ($k) from user agent\n"); return undef; } # handle things like 304, or other things that look like HTTP # error codes if ($data =~ m#^HTTP/\d\.\d\s+(\d+)\s+#) { $code = 0+$1; print $stdout $data if ($superverbose); # 304 is actually a cop-out code and is not usually # returned, so we should consider it a non-fatal error if ($code == 304 || $code == 200 || $code == 204) { &exception(1, "*** warning: timeout or no data\n"); return undef; } &exception(4, "*** warning: unexpected HTTP return code $code from server\n"); return undef; } $my_json_ref = &parsejson($data); $laststatus = 0; return $my_json_ref; } $curl ||= &wherecheck("checking for cURL", "curl", <<"EOF"); cURL is required. if cURL is not usually in your path, you can hardcode it with -curl=/path/to/curl EOF $vv = `$curl --version`; ($vv =~ /^curl (\d+)\.(\d+)/) && ($major = $1, $minor = $2); die("at least cURL 7.58 required, you have ${major}.${minor}.\n\n$vv\n") if ($major < 7 || ($major == 7 && $minor < 58)); $json_ref = &grabjson("https://raw.githubusercontent.com/mozilla-services/shavar-prod-lists/master/disconnect-blacklist.json"); die("this doesn't look like the right format\n\ncheck URL\n$url\n") if (!defined($json_ref->{'license'}) || !defined($json_ref->{'categories'}) || !defined($json_ref->{'categories'}->{'Cryptomining'})); select(STDOUT); $|++; %dupedupe = (); &emit('Cryptomining'); &emit('FingerprintingInvasive'); # considering #&emit('Analytics'); sub emit { my $cat = shift(@_); foreach $a (@{ $json_ref->{'categories'}->{$cat} }) { foreach $b (keys(%{ $a })) { die("illegal newline: $b\n") if ($b =~ /[\r\n]/s); print "// $b\n"; foreach $c (keys(%{ $a->{$b} })) { next if ($c eq 'performance'); die("illegal newline: $c\n") if ($c =~ /[\r\n]/s); print "// $c\n"; foreach $d (@{ $a->{$b}->{$c} }) { die("illegal quote: $d\n") if ($d =~ /"/); next if ($dupedupe{$d}++); # whitelist (with regrets) next if (0 || $d eq 'ibm.com' || $d eq 'godaddy.com' || 0); print " BLOK(\"$d\") ||\n"; print " BLOKD(\".$d\") ||\n"; } } } } }