resolve relative a href urls

This commit is contained in:
Antoni Sawicki 2019-05-29 18:48:07 -07:00
parent c6186d6fb4
commit d8617af9c2

58
wrp.go
View File

@ -7,12 +7,12 @@ import (
"log"
"net/http"
"strconv"
"strings"
"time"
"bytes"
_ "image"
"image/png"
"image/gif"
"net/url"
"github.com/chromedp/cdproto/emulation"
@ -30,19 +30,19 @@ var (
func pageServer(out http.ResponseWriter, req *http.Request) {
req.ParseForm()
furl := req.Form["url"]
var url string
var gourl string
if len(furl) >= 1 && len(furl[0]) > 4 {
url = furl[0]
gourl = furl[0]
} else {
url = "https://en.wikipedia.org/wiki/"
gourl = "https://en.wikipedia.org/wiki/"
}
log.Printf("%s Page Reqest for %s URL=%s\n", req.RemoteAddr, req.URL.Path, url)
log.Printf("%s Page Reqest for %s URL=%s\n", req.RemoteAddr, req.URL.Path, gourl)
out.Header().Set("Content-Type", "text/html")
fmt.Fprintf(out, "<HTML>\n<HEAD><TITLE>WRP %s</TITLE>\n<BODY BGCOLOR=\"#F0F0F0\">", url)
fmt.Fprintf(out, "<FORM ACTION=\"/\">URL: <INPUT TYPE=\"TEXT\" NAME=\"url\" VALUE=\"%s\">", url)
fmt.Fprintf(out, "<HTML>\n<HEAD><TITLE>WRP %s</TITLE>\n<BODY BGCOLOR=\"#F0F0F0\">", gourl)
fmt.Fprintf(out, "<FORM ACTION=\"/\">URL: <INPUT TYPE=\"TEXT\" NAME=\"url\" VALUE=\"%s\">", gourl)
fmt.Fprintf(out, "<INPUT TYPE=\"SUBMIT\" VALUE=\"Go\"></FORM><P>\n")
if len(url) > 4 {
capture(url, out)
if len(gourl) > 4 {
capture(gourl, out)
}
fmt.Fprintf(out, "</BODY>\n</HTML>\n")
}
@ -54,46 +54,54 @@ func imgServer(out http.ResponseWriter, req *http.Request) {
out.Write(gifbuf.Bytes())
}
func capture(url string, out http.ResponseWriter) {
func capture(gourl string, out http.ResponseWriter) {
var nodes []*cdp.Node
ctxx := chromedp.FromContext(ctx)
var target string
var scrcap []byte
var loc string
log.Printf("Caputure Request for %s\n", url)
log.Printf("Caputure Request for %s\n", gourl)
chromedp.Run(ctx,
emulation.SetDeviceMetricsOverride(1024, 768, 1.0, false),
chromedp.Navigate(url),
chromedp.Navigate(gourl),
chromedp.Sleep(time.Second*2),
chromedp.CaptureScreenshot(&scrcap),
chromedp.Location(&loc),
chromedp.Nodes("a", &nodes, chromedp.ByQueryAll))
img, err:= png.Decode(bytes.NewReader(scrcap) )
if err != nil {
log.Fatal(err)
}
gifbuf.Reset()
gif.Encode(&gifbuf, img, nil)
log.Printf("Landed on: %s, Got %d nodes\n", loc, len(nodes))
img, err:= png.Decode(bytes.NewReader(scrcap) )
if err != nil {
log.Fatal(err)
}
gifbuf.Reset()
gif.Encode(&gifbuf, img, nil)
base, _ := url.Parse(loc)
fmt.Fprintf(out, "<!-- Location: %s -->\n", base)
fmt.Fprintf(out, "<IMG SRC=\"/wrp.gif\" ALT=\"wrp\" USEMAP=\"#map\">\n<MAP NAME=\"map\">\n")
for _, n := range nodes {
b, err := dom.GetBoxModel().WithNodeID(n.NodeID).Do(cdp.WithExecutor(ctx, ctxx.Target))
if strings.HasPrefix(n.AttributeValue("href"), "/") {
target = fmt.Sprintf("/?url=%s%s", url, n.AttributeValue("href"))
} else {
target = fmt.Sprintf("/?url=%s", n.AttributeValue("href"))
if err != nil {
continue
}
tgt, err := base.Parse(n.AttributeValue("href"))
if err != nil {
continue
}
target := fmt.Sprintf("/?url=%s", tgt)
if err == nil && len(b.Content) > 6 {
if len(b.Content) > 6 && len(target) > 7 {
fmt.Fprintf(out, "<AREA SHAPE=\"RECT\" COORDS=\"%.f,%.f,%.f,%.f\" ALT=\"%s\" TITLE=\"%s\" HREF=\"%s\">\n",
b.Content[0], b.Content[1], b.Content[4], b.Content[5], n.AttributeValue("href"), n.AttributeValue("href"), target)
}
}
fmt.Fprintf(out, "</MAP>\n")
log.Printf("Done with caputure for %s\n", url)
log.Printf("Done with caputure for %s\n", gourl)
}
func main() {