wrp/wrp.go

219 lines
6.2 KiB
Go
Raw Normal View History

2019-05-30 01:53:05 +00:00
//
// WRP - Web Rendering Proxy
//
// Copyright (c) 2013-2018 Antoni Sawicki
// Copyright (c) 2019 Google LLC
//
2019-05-29 08:29:01 +00:00
package main
import (
"bytes"
2019-05-29 08:52:28 +00:00
"context"
"flag"
"fmt"
_ "image"
"image/gif"
"image/png"
2019-05-29 08:52:28 +00:00
"log"
2019-05-30 09:03:17 +00:00
"math/rand"
2019-05-29 08:52:28 +00:00
"net/http"
"net/url"
2019-05-30 07:07:08 +00:00
"os"
2019-05-29 08:52:28 +00:00
"strconv"
"strings"
2019-05-29 08:52:28 +00:00
"time"
2019-05-29 08:29:01 +00:00
2019-05-29 09:39:06 +00:00
"github.com/chromedp/cdproto/emulation"
2019-05-31 07:19:10 +00:00
"github.com/chromedp/cdproto/runtime"
2019-05-29 09:39:06 +00:00
2019-05-29 08:52:28 +00:00
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/chromedp"
2019-05-29 08:29:01 +00:00
)
var (
2019-05-29 08:52:28 +00:00
ctx context.Context
cancel context.CancelFunc
2019-05-30 09:03:17 +00:00
gifmap = make(map[string]bytes.Buffer)
2019-05-29 08:29:01 +00:00
)
2019-05-31 01:08:48 +00:00
func pageServer(out http.ResponseWriter, r *http.Request) {
r.ParseForm()
u := r.FormValue("url")
2019-05-31 08:56:34 +00:00
var istr string
var i bool
if r.FormValue("i") == "on" {
istr = "CHECKED"
i = true
} else {
istr = ""
i = false
}
2019-05-31 23:41:25 +00:00
p, _ := strconv.ParseInt(r.FormValue("p"), 10, 64)
if r.FormValue("pg") == "Next" {
2019-05-31 23:41:25 +00:00
p++
} else if r.FormValue("pg") == "Prev" {
2019-05-31 23:41:25 +00:00
p--
} else {
p = 0
2019-05-31 23:41:25 +00:00
}
2019-05-31 01:08:48 +00:00
w, _ := strconv.ParseInt(r.FormValue("w"), 10, 64)
if w < 10 {
w = 1024
}
h, _ := strconv.ParseInt(r.FormValue("h"), 10, 64)
if h < 10 {
h = 768
2019-05-29 08:52:28 +00:00
}
2019-05-31 01:08:48 +00:00
s, _ := strconv.ParseFloat(r.FormValue("s"), 64)
if s < 0.1 {
s = 1.0
}
log.Printf("%s Page Reqest for url=\"%s\" [%s]\n", r.RemoteAddr, u, r.URL.Path)
2019-05-29 08:52:28 +00:00
out.Header().Set("Content-Type", "text/html")
2019-06-01 01:20:55 +00:00
fmt.Fprintf(out, "<HTML>\n<HEAD><TITLE>WRP %s</TITLE></HEAD>\n<BODY BGCOLOR=\"#F0F0F0\">", u)
fmt.Fprintf(out, "<FORM ACTION=\"/\">URL/Search: <INPUT TYPE=\"TEXT\" NAME=\"url\" VALUE=\"%s\" SIZE=\"40\">", u)
2019-05-31 01:08:48 +00:00
fmt.Fprintf(out, "<INPUT TYPE=\"SUBMIT\" VALUE=\"Go\"><P>\n")
2019-06-01 01:20:55 +00:00
fmt.Fprintf(out, "ISMAP:<INPUT TYPE=\"CHECKBOX\" NAME=\"i\" %s> \n", istr)
2019-05-31 07:19:10 +00:00
fmt.Fprintf(out, "Width:<INPUT TYPE=\"TEXT\" NAME=\"w\" VALUE=\"%d\" SIZE=\"4\"> \n", w)
fmt.Fprintf(out, "Height:<INPUT TYPE=\"TEXT\" NAME=\"h\" VALUE=\"%d\" SIZE=\"4\"> \n", h)
fmt.Fprintf(out, "Scale:<INPUT TYPE=\"TEXT\" NAME=\"s\" VALUE=\"%1.2f\" SIZE=\"3\"> \n", s)
2019-05-31 23:41:25 +00:00
fmt.Fprintf(out, "Page:<INPUT TYPE=\"HIDDEN\" NAME=\"p\" VALUE=\"%d\"> \n", p)
fmt.Fprintf(out, "<INPUT TYPE=\"SUBMIT\" NAME=\"pg\" VALUE=\"Prev\"> %d \n", p)
fmt.Fprintf(out, "<INPUT TYPE=\"SUBMIT\" NAME=\"pg\" VALUE=\"Next\"> \n")
2019-06-01 01:20:55 +00:00
fmt.Fprintf(out, "</FORM><P>\n")
2019-05-31 01:08:48 +00:00
if len(u) > 4 {
if strings.HasPrefix(u, "http") {
2019-06-01 01:20:55 +00:00
capture(u, w, h, s, p, i, out)
} else {
2019-06-01 01:20:55 +00:00
capture(fmt.Sprintf("http://www.google.com/search?q=%s", url.QueryEscape(u)), w, h, s, p, i, out)
}
2019-05-31 01:08:48 +00:00
} else {
fmt.Fprintf(out, "No URL or search query specified")
2019-05-29 08:52:28 +00:00
}
fmt.Fprintf(out, "</BODY>\n</HTML>\n")
2019-05-29 08:29:01 +00:00
}
func imgServer(out http.ResponseWriter, req *http.Request) {
2019-06-01 01:20:55 +00:00
log.Printf("%s IMG Request for %s\n", req.RemoteAddr, req.URL.Path)
2019-05-30 09:03:17 +00:00
gifbuf := gifmap[req.URL.Path]
defer delete(gifmap, req.URL.Path)
2019-05-30 01:02:29 +00:00
out.Header().Set("Content-Type", "image/gif")
out.Header().Set("Content-Length", strconv.Itoa(len(gifbuf.Bytes())))
out.Write(gifbuf.Bytes())
2019-05-30 09:03:17 +00:00
out.(http.Flusher).Flush()
2019-05-29 08:29:01 +00:00
}
2019-06-01 01:20:55 +00:00
func mapServer(out http.ResponseWriter, req *http.Request) {
log.Printf("%s MAP Request for %s [%v]\n", req.RemoteAddr, req.URL.Path, req.URL.Query())
}
2019-05-30 07:07:08 +00:00
func haltServer(out http.ResponseWriter, req *http.Request) {
log.Printf("%s Shutdown request received [%s]\n", req.RemoteAddr, req.URL.Path)
out.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(out, "WRP Shutdown")
out.(http.Flusher).Flush()
cancel()
os.Exit(0)
}
2019-06-01 01:20:55 +00:00
func capture(gourl string, w int64, h int64, s float64, p int64, ismap bool, out http.ResponseWriter) {
2019-05-29 08:52:28 +00:00
var nodes []*cdp.Node
ctxx := chromedp.FromContext(ctx)
2019-05-30 09:03:17 +00:00
var pngbuf []byte
var gifbuf bytes.Buffer
2019-05-30 01:48:07 +00:00
var loc string
2019-05-31 07:19:10 +00:00
var res *runtime.RemoteObject
2019-05-29 08:29:01 +00:00
2019-05-30 09:03:17 +00:00
log.Printf("Processing Caputure Request for %s\n", gourl)
2019-05-29 09:39:06 +00:00
2019-05-30 09:03:17 +00:00
// Run ChromeDP Magic
2019-05-31 07:41:46 +00:00
err := chromedp.Run(ctx,
2019-05-31 01:08:48 +00:00
emulation.SetDeviceMetricsOverride(w, h, s, false),
2019-05-30 01:48:07 +00:00
chromedp.Navigate(gourl),
2019-05-31 23:41:25 +00:00
chromedp.Evaluate(fmt.Sprintf("window.scrollTo(0, %d);", p*int64(float64(h)*float64(0.9))), &res),
2019-05-31 07:19:10 +00:00
chromedp.Sleep(time.Second*1),
2019-05-30 09:03:17 +00:00
chromedp.CaptureScreenshot(&pngbuf),
2019-05-30 01:48:07 +00:00
chromedp.Location(&loc),
2019-05-29 08:52:28 +00:00
chromedp.Nodes("a", &nodes, chromedp.ByQueryAll))
2019-05-29 08:29:01 +00:00
2019-05-31 07:41:46 +00:00
if err != nil {
log.Printf("%s", err)
fmt.Fprintf(out, "<BR>%s<BR>", err)
return
}
2019-05-31 07:19:10 +00:00
log.Printf("Landed on: %s, Nodes: %d\n", loc, len(nodes))
2019-05-30 01:48:07 +00:00
2019-05-30 09:03:17 +00:00
// Process Screenshot Image
2019-05-30 09:15:52 +00:00
bytes.NewReader(pngbuf).Seek(0, 0)
2019-05-30 09:03:17 +00:00
img, err := png.Decode(bytes.NewReader(pngbuf))
2019-05-30 01:48:07 +00:00
if err != nil {
log.Printf("Failed to decode screenshot: %s\n", err)
fmt.Fprintf(out, "<BR>Unable to decode page screenshot:<BR>%s<BR>\n", err)
return
2019-05-30 01:48:07 +00:00
}
gifbuf.Reset()
2019-05-30 07:53:59 +00:00
err = gif.Encode(&gifbuf, img, nil)
if err != nil {
log.Printf("Failed to encode GIF: %s\n", err)
fmt.Fprintf(out, "<BR>Unable to encode GIF:<BR>%s<BR>\n", err)
return
}
2019-05-30 09:03:17 +00:00
imgpath := fmt.Sprintf("/img/%04d.gif", rand.Intn(9999))
2019-06-01 00:12:26 +00:00
log.Printf("Encoded GIF image: %s, Size: %dKB\n", imgpath, len(gifbuf.Bytes())/1024)
2019-05-30 09:03:17 +00:00
gifmap[imgpath] = gifbuf
2019-05-30 01:02:29 +00:00
2019-05-30 09:03:17 +00:00
// Process Nodes
2019-05-30 01:48:07 +00:00
base, _ := url.Parse(loc)
2019-06-01 01:20:55 +00:00
if ismap {
fmt.Fprintf(out, "<A HREF=\"/map/123.map\"><IMG SRC=\"%s\" ALT=\"wrp\" ISMAP></A>", imgpath)
} else {
fmt.Fprintf(out, "<IMG SRC=\"%s\" ALT=\"wrp\" USEMAP=\"#map\">\n<MAP NAME=\"map\">\n", imgpath)
}
2019-05-29 08:29:01 +00:00
2019-05-29 08:52:28 +00:00
for _, n := range nodes {
b, err := dom.GetBoxModel().WithNodeID(n.NodeID).Do(cdp.WithExecutor(ctx, ctxx.Target))
2019-05-30 01:48:07 +00:00
if err != nil {
continue
}
tgt, err := base.Parse(n.AttributeValue("href"))
if err != nil {
continue
2019-05-29 08:52:28 +00:00
}
2019-05-31 23:41:25 +00:00
target := fmt.Sprintf("/?url=%s&w=%d&h=%d&s=%1.2f&", tgt, w, h, s) // no page# here
2019-05-29 08:29:01 +00:00
2019-05-30 01:48:07 +00:00
if len(b.Content) > 6 && len(target) > 7 {
2019-06-01 01:20:55 +00:00
if ismap {
} else {
2019-05-29 08:52:28 +00:00
fmt.Fprintf(out, "<AREA SHAPE=\"RECT\" COORDS=\"%.f,%.f,%.f,%.f\" ALT=\"%s\" TITLE=\"%s\" HREF=\"%s\">\n",
2019-05-31 06:40:43 +00:00
b.Content[0]*s, b.Content[1]*s, b.Content[4]*s, b.Content[5]*s, n.AttributeValue("href"), n.AttributeValue("href"), target)
2019-06-01 01:20:55 +00:00
}
2019-05-29 08:52:28 +00:00
}
}
2019-05-29 08:29:01 +00:00
2019-06-01 01:20:55 +00:00
if !ismap {
fmt.Fprintf(out, "</MAP>\n")
}
2019-05-30 09:03:17 +00:00
out.(http.Flusher).Flush()
2019-05-30 01:48:07 +00:00
log.Printf("Done with caputure for %s\n", gourl)
2019-05-29 08:29:01 +00:00
}
func main() {
2019-05-29 08:52:28 +00:00
ctx, cancel = chromedp.NewContext(context.Background())
defer cancel()
var addr string
flag.StringVar(&addr, "l", ":8080", "Listen address:port, default :8080")
flag.Parse()
2019-05-30 09:03:17 +00:00
rand.Seed(time.Now().UnixNano())
2019-05-29 08:52:28 +00:00
http.HandleFunc("/", pageServer)
2019-05-30 09:03:17 +00:00
http.HandleFunc("/img/", imgServer)
2019-06-01 01:20:55 +00:00
http.HandleFunc("/map/", mapServer)
2019-05-30 01:47:03 +00:00
http.HandleFunc("/favicon.ico", http.NotFound)
2019-05-30 07:07:08 +00:00
http.HandleFunc("/halt", haltServer)
2019-05-31 06:40:43 +00:00
log.Printf("Starting WRP http server on %s\n", addr)
2019-05-29 08:52:28 +00:00
http.ListenAndServe(addr, nil)
2019-05-29 08:29:01 +00:00
}