Compare commits

...

8 Commits

Author SHA1 Message Date
luRaichu 9425b2fa0a
Merge b937bea370 into b098b3632a 2024-01-06 16:53:03 +01:00
Antoni Sawicki b098b3632a README update 2024-01-06 02:13:22 -08:00
Antoni Sawicki 5ec0266f75 update README 2024-01-06 02:10:01 -08:00
Antoni Sawicki 7d9cec6297 add some anti crawler detection 2024-01-02 21:55:39 -08:00
Antoni Sawicki 43df501d97 add useragent override flag 2024-01-02 21:54:59 -08:00
Antoni Sawicki 85d64a3577 copyright update 2024-01-02 02:09:38 -08:00
Antoni Sawicki 993e5723ed more mod updates 2024-01-02 02:08:14 -08:00
Antoni Sawicki 9056c798d6 go mod update 2024-01-02 02:00:30 -08:00
4 changed files with 41 additions and 25 deletions

View File

@ -94,6 +94,7 @@ Fortunately ACI allows port 80 without encryption.
-d chromedp debug logging (default false)
-n do not free maps and images after use (default false)
-ui html template file (default "wrp.html")
-ua user agent, override the default "headless" agent
-s delay/sleep after page is rendered before screenshot is taken (default 2s)
```
@ -106,12 +107,22 @@ Fortunately ACI allows port 80 without encryption.
### I can't get it to run
This program does not have a GUI and is run from the command line. You may need to enable executable bit on Unix systems, for example:
This program does not have a GUI and is run from the command line. After downloading, you may need to enable executable bit on Unix systems, for example:
```shell
$ cd ~/Downloads
$ chmod +x wrp-amd64-macos
$ ./wrp-amd64-macos -t png
$ ./wrp-amd64-macos
```
### Websites are blocking headless browsers
This is a well known issue. WRP has some provisions to work around it, but it's a cat and mouse game. The first and
foremost recommendation is to change `User Agent`, so that it doesn't say "headless". Add `-ua="my agent"` to override the default one.
Obtain your regular desktop browser user agent and specify it as the flag. For example
```shell
$ wrp -ua="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
```
## History
@ -138,4 +149,4 @@ $ ./wrp-amd64-macos -t png
License: Apache 2.0
Copyright (c) 2013-2018 Antoni Sawicki
Copyright (c) 2019-2022 Google LLC
Copyright (c) 2019-2024 Google LLC

15
go.mod
View File

@ -1,17 +1,20 @@
module github.com/tenox7/wrp
go 1.16
go 1.21.5
require (
github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb
github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28
github.com/chromedp/chromedp v0.8.6
github.com/chromedp/cdproto v0.0.0-20231205062650-00455a960d61
github.com/chromedp/chromedp v0.9.3
github.com/soniakeys/quant v1.0.0
)
require (
github.com/chromedp/sysutil v1.0.0 // indirect
github.com/gobwas/httphead v0.1.0 // indirect
github.com/gobwas/pool v0.2.1 // indirect
github.com/gobwas/ws v1.1.0 // indirect
github.com/gobwas/ws v1.3.1 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/soniakeys/quant v1.0.0
golang.org/x/sys v0.2.0 // indirect
golang.org/x/sys v0.15.0 // indirect
)

26
go.sum
View File

@ -1,20 +1,19 @@
github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb h1:YQ+d0g0P0F/06oDoeEgDHeZCIrnKgLxXcqYOpe8sTuU=
github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb/go.mod h1:J86XzS1wgzJPjpQmpriJ+SetP17JSQUd9l+HWQK86jA=
github.com/chromedp/cdproto v0.0.0-20220924210414-0e3390be1777/go.mod h1:5Y4sD/eXpwrChIuxhSr/G20n9CdbCmoerOHnuAf0Zr0=
github.com/chromedp/cdproto v0.0.0-20221029224954-108014bf7279 h1:7+D/pA8BoNzTpcM0Yw8issS95U/ipn0im5vzhfPzDZc=
github.com/chromedp/cdproto v0.0.0-20221029224954-108014bf7279/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28 h1:i4vpMoaMguVwvDc0qSNbCHCRue6d0kbXjj5bDF4fHBA=
github.com/chromedp/cdproto v0.0.0-20221126224343-3a0787b8dd28/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/chromedp v0.8.6 h1:KobeeqR2dpfKSG1prS3Y6+FbffMmGC6xmAobRXA9QEQ=
github.com/chromedp/chromedp v0.8.6/go.mod h1:nBYHoD6YSNzrr82cIeuOzhw1Jo/s2o0QQ+ifTeoCZ+c=
github.com/chromedp/cdproto v0.0.0-20231011050154-1d073bb38998/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/cdproto v0.0.0-20231205062650-00455a960d61 h1:XD280QPATe9jaz20dylKe3vBsNcH1w3mkssGY0lidn8=
github.com/chromedp/cdproto v0.0.0-20231205062650-00455a960d61/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
github.com/chromedp/chromedp v0.9.3 h1:Wq58e0dZOdHsxaj9Owmfcf+ibtpYN1N0FWVbaxa/esg=
github.com/chromedp/chromedp v0.9.3/go.mod h1:NipeUkUcuzIdFbBP8eNNvl9upcceOfWzoJn6cRe4ksA=
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
github.com/gobwas/ws v1.1.0 h1:7RFti/xnNkMJnrK7D1yQ/iCIB5OrrY/54/H930kIbHA=
github.com/gobwas/ws v1.1.0/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0=
github.com/gobwas/ws v1.3.0/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
github.com/gobwas/ws v1.3.1 h1:Qi34dfLMWJbiKaNbDVzM9x27nZBjmkaW6i4+Ku+pGVU=
github.com/gobwas/ws v1.3.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
@ -25,9 +24,6 @@ github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhA
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/soniakeys/quant v1.0.0 h1:N1um9ktjbkZVcywBVAAYpZYSHxEfJGzshHCxx/DaI0Y=
github.com/soniakeys/quant v1.0.0/go.mod h1:HI1k023QuVbD4H8i9YdfZP2munIHU4QpjsImz6Y6zds=
golang.org/x/sys v0.0.0-20201207223542-d4d67f95c62d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=

8
wrp.go
View File

@ -2,7 +2,7 @@
// WRP - Web Rendering Proxy
//
// Copyright (c) 2013-2018 Antoni Sawicki
// Copyright (c) 2019-2022 Google LLC
// Copyright (c) 2019-2024 Google LLC
//
package main
@ -54,6 +54,7 @@ var (
fgeom = flag.String("g", "1152x600x216", "Geometry: width x height x colors, height can be 0 for unlimited")
htmFnam = flag.String("ui", "wrp.html", "HTML template file for the UI")
delay = flag.Duration("s", 2*time.Second, "Delay/sleep after page is rendered and before screenshot is taken")
userAgent = flag.String("ua", "", "override chrome user agent")
srv http.Server
actx, ctx context.Context
acncl, cncl context.CancelFunc
@ -569,7 +570,12 @@ func main() {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", *headless),
chromedp.Flag("hide-scrollbars", false),
chromedp.Flag("enable-automation", false),
chromedp.Flag("disable-blink-features", "AutomationControlled"),
)
if *userAgent != "" {
opts = append(opts, chromedp.UserAgent(*userAgent))
}
actx, acncl = chromedp.NewExecAllocator(context.Background(), opts...)
defer acncl()
ctx, cncl = chromedp.NewContext(actx)