From 8d165df36df29add18d69e7c49e0b3a60fe43162 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Wed, 3 Jul 2024 05:24:56 -0700 Subject: [PATCH 01/16] more fixes --- cdp.go | 263 ++++++++++++++++++++++++++++++++++++++++++++ txt.go | 92 ++++++++++------ wrp.go | 325 ++++++++----------------------------------------------- wrp.html | 18 ++- 4 files changed, 379 insertions(+), 319 deletions(-) create mode 100644 cdp.go diff --git a/cdp.go b/cdp.go new file mode 100644 index 0000000..4e04a2f --- /dev/null +++ b/cdp.go @@ -0,0 +1,263 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" + "io" + "log" + "math" + "math/rand" + "net/http" + "time" + + "github.com/chromedp/cdproto/css" + "github.com/chromedp/cdproto/emulation" + "github.com/chromedp/cdproto/input" + "github.com/chromedp/cdproto/page" + "github.com/chromedp/chromedp" +) + +func chromedpStart() (context.CancelFunc, context.CancelFunc) { + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.Flag("headless", *headless), + chromedp.Flag("hide-scrollbars", false), + chromedp.Flag("enable-automation", false), + chromedp.Flag("disable-blink-features", "AutomationControlled"), + ) + if *userAgent != "" { + opts = append(opts, chromedp.UserAgent(*userAgent)) + } + actx, acncl = chromedp.NewExecAllocator(context.Background(), opts...) + ctx, cncl = chromedp.NewContext(actx) + return cncl, acncl +} + +// Determine what action to take +func (rq *wrpReq) action() chromedp.Action { + // Mouse Click + if rq.mouseX > 0 && rq.mouseY > 0 { + log.Printf("%s Mouse Click %d,%d\n", rq.r.RemoteAddr, rq.mouseX, rq.mouseY) + return chromedp.MouseClickXY(float64(rq.mouseX)/float64(rq.zoom), float64(rq.mouseY)/float64(rq.zoom)) + } + // Buttons + if len(rq.buttons) > 0 { + log.Printf("%s Button %v\n", rq.r.RemoteAddr, rq.buttons) + switch rq.buttons { + case "Bk": + return chromedp.NavigateBack() + case "St": + return chromedp.Stop() + case "Re": + return chromedp.Reload() + case "Bs": + return chromedp.KeyEvent("\b") + case "Rt": + return chromedp.KeyEvent("\r") + case "<": + return chromedp.KeyEvent("\u0302") + case "^": + return chromedp.KeyEvent("\u0304") + case "v": + return chromedp.KeyEvent("\u0301") + case ">": + return chromedp.KeyEvent("\u0303") + case "Up": + return chromedp.KeyEvent("\u0308") + case "Dn": + return chromedp.KeyEvent("\u0307") + case "All": // Select all + return chromedp.KeyEvent("a", chromedp.KeyModifiers(input.ModifierCtrl)) + } + } + // Keys + if len(rq.keys) > 0 { + log.Printf("%s Sending Keys: %#v\n", rq.r.RemoteAddr, rq.keys) + return chromedp.KeyEvent(rq.keys) + } + // Navigate to URL + log.Printf("%s Processing Navigate Request for %s\n", rq.r.RemoteAddr, rq.url) + return chromedp.Navigate(rq.url) +} + +// Navigate to the desired URL. +func (rq *wrpReq) navigate() { + ctxErr(chromedp.Run(ctx, rq.action()), rq.w) +} + +// Handle context errors +func ctxErr(err error, w io.Writer) { + // TODO: callers should have retry logic, perhaps create another function + // that takes ...chromedp.Action and retries with give up + if err == nil { + return + } + log.Printf("Context error: %s", err) + fmt.Fprintf(w, "Context error: %s
\n", err) + if err.Error() != "context canceled" { + return + } + ctx, cncl = chromedp.NewContext(actx) + log.Printf("Created new context, try again") + fmt.Fprintln(w, "Created new context, try again") +} + +// https://github.com/chromedp/chromedp/issues/979 +func chromedpCaptureScreenshot(res *[]byte, h int64) chromedp.Action { + if res == nil { + panic("res cannot be nil") + } + if h == 0 { + return chromedp.CaptureScreenshot(res) + } + + return chromedp.ActionFunc(func(ctx context.Context) error { + var err error + *res, err = page.CaptureScreenshot().Do(ctx) + return err + }) +} + +// Capture Screenshot using CDP +func (rq *wrpReq) captureScreenshot() { + var styles []*css.ComputedStyleProperty + var r, g, b int + var bgColorSet bool + var h int64 + var pngCap []byte + chromedp.Run(ctx, + emulation.SetDeviceMetricsOverride(int64(float64(rq.width)/rq.zoom), 10, rq.zoom, false), + chromedp.Location(&rq.url), + chromedp.ComputedStyle("body", &styles, chromedp.ByQuery), + chromedp.ActionFunc(func(ctx context.Context) error { + _, _, _, _, _, s, err := page.GetLayoutMetrics().Do(ctx) + if err == nil { + h = int64(math.Ceil(s.Height)) + } + return nil + }), + ) + log.Printf("%s Landed on: %s, Height: %v\n", rq.r.RemoteAddr, rq.url, h) + for _, style := range styles { + if style.Name != "background-color" { + continue + } + fmt.Sscanf(style.Value, "rgb(%d,%d,%d)", &r, &g, &b) + bgColorSet = true + break + } + if !bgColorSet { + r = 255 + g = 255 + b = 255 + } + height := int64(float64(rq.height) / rq.zoom) + if rq.height == 0 && h > 0 { + height = h + 30 + } + chromedp.Run( + ctx, emulation.SetDeviceMetricsOverride(int64(float64(rq.width)/rq.zoom), height, rq.zoom, false), + chromedp.Sleep(*delay), // TODO(tenox): find a better way to determine if page is rendered + ) + // Capture screenshot... + ctxErr(chromedp.Run(ctx, chromedpCaptureScreenshot(&pngCap, rq.height)), rq.w) + seq := rand.Intn(9999) + imgPath := fmt.Sprintf("/img/%04d.%s", seq, rq.imgType) + mapPath := fmt.Sprintf("/map/%04d.map", seq) + ismap[mapPath] = *rq + var sSize string + var iW, iH int + switch rq.imgType { + case "png": + pngBuf := bytes.NewBuffer(pngCap) + img[imgPath] = *pngBuf + cfg, _, _ := image.DecodeConfig(pngBuf) + sSize = fmt.Sprintf("%.0f KB", float32(len(pngBuf.Bytes()))/1024.0) + iW = cfg.Width + iH = cfg.Height + log.Printf("%s Got PNG image: %s, Size: %s, Res: %dx%d\n", rq.r.RemoteAddr, imgPath, sSize, iW, iH) + case "gif": + i, err := png.Decode(bytes.NewReader(pngCap)) + if err != nil { + log.Printf("%s Failed to decode PNG screenshot: %s\n", rq.r.RemoteAddr, err) + fmt.Fprintf(rq.w, "
Unable to decode page PNG screenshot:
%s
\n", err) + return + } + st := time.Now() + var gifBuf bytes.Buffer + err = gif.Encode(&gifBuf, gifPalette(i, rq.imgOpt), &gif.Options{}) + if err != nil { + log.Printf("%s Failed to encode GIF: %s\n", rq.r.RemoteAddr, err) + fmt.Fprintf(rq.w, "
Unable to encode GIF:
%s
\n", err) + return + } + img[imgPath] = gifBuf + sSize = fmt.Sprintf("%.0f KB", float32(len(gifBuf.Bytes()))/1024.0) + iW = i.Bounds().Max.X + iH = i.Bounds().Max.Y + log.Printf("%s Encoded GIF image: %s, Size: %s, Colors: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, rq.imgOpt, iW, iH, time.Since(st).Milliseconds()) + case "jpg": + i, err := png.Decode(bytes.NewReader(pngCap)) + if err != nil { + log.Printf("%s Failed to decode PNG screenshot: %s\n", rq.r.RemoteAddr, err) + fmt.Fprintf(rq.w, "
Unable to decode page PNG screenshot:
%s
\n", err) + return + } + st := time.Now() + var jpgBuf bytes.Buffer + err = jpeg.Encode(&jpgBuf, i, &jpeg.Options{Quality: *jpgQual}) + if err != nil { + log.Printf("%s Failed to encode JPG: %s\n", rq.r.RemoteAddr, err) + fmt.Fprintf(rq.w, "
Unable to encode JPG:
%s
\n", err) + return + } + img[imgPath] = jpgBuf + sSize = fmt.Sprintf("%.0f KB", float32(len(jpgBuf.Bytes()))/1024.0) + iW = i.Bounds().Max.X + iH = i.Bounds().Max.Y + log.Printf("%s Encoded JPG image: %s, Size: %s, Quality: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, *jpgQual, iW, iH, time.Since(st).Milliseconds()) + } + rq.printHTML(printParams{ + bgColor: fmt.Sprintf("#%02X%02X%02X", r, g, b), + pageHeight: fmt.Sprintf("%d PX", h), + imgSize: sSize, + imgURL: imgPath, + mapURL: mapPath, + imgWidth: iW, + imgHeight: iH, + }) + log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url) +} + +// Process HTTP requests to ISMAP '/map/' url +func mapServer(w http.ResponseWriter, r *http.Request) { + log.Printf("%s ISMAP Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) + rq, ok := ismap[r.URL.Path] + rq.r = r + rq.w = w + if !ok { + fmt.Fprintf(w, "Unable to find map %s\n", r.URL.Path) + log.Printf("Unable to find map %s\n", r.URL.Path) + return + } + if !*noDel { + defer delete(ismap, r.URL.Path) + } + n, err := fmt.Sscanf(r.URL.RawQuery, "%d,%d", &rq.mouseX, &rq.mouseY) + if err != nil || n != 2 { + fmt.Fprintf(w, "n=%d, err=%s\n", n, err) + log.Printf("%s ISMAP n=%d, err=%s\n", r.RemoteAddr, n, err) + return + } + log.Printf("%s WrpReq from ISMAP: %+v\n", r.RemoteAddr, rq) + if len(rq.url) < 4 { + rq.printHTML(printParams{bgColor: "#FFFFFF"}) + return + } + rq.navigate() // TODO: if error from navigate do not capture + rq.captureScreenshot() +} diff --git a/txt.go b/txt.go index 9821e69..f1791f8 100644 --- a/txt.go +++ b/txt.go @@ -87,7 +87,7 @@ func (i *imageStore) del(id string) { delete(i.img, id) } -func fetchImage(id, url string) error { +func fetchImage(id, url, imgType string, maxSize, imgOpt int) error { log.Printf("Downloading IMGZ URL=%q for ID=%q", url, id) var img []byte var err error @@ -115,7 +115,7 @@ func fetchImage(id, url string) error { return fmt.Errorf("error decoding image from url embed: %q: %v", url, err) } } - gif, err := smallGif(img) + gif, err := smallImg(img, imgType, maxSize, imgOpt) if err != nil { return fmt.Errorf("Error scaling down image: %v", err) } @@ -123,7 +123,46 @@ func fetchImage(id, url string) error { return nil } -type astTransformer struct{} +func smallImg(src []byte, imgType string, maxSize, imgOpt int) ([]byte, error) { + t := http.DetectContentType(src) + var err error + var img image.Image + switch t { + case "image/png": + img, err = png.Decode(bytes.NewReader(src)) + case "image/gif": + img, err = gif.Decode(bytes.NewReader(src)) + case "image/jpeg": + img, err = jpeg.Decode(bytes.NewReader(src)) + case "image/webp": + img, err = webp.Decode(bytes.NewReader(src)) + default: // TODO: also add svg + err = errors.New("unknown content type: " + t) + } + if err != nil { + return nil, fmt.Errorf("image decode problem: %v", err) + } + img = resize.Thumbnail(uint(*defImgSize), uint(*defImgSize), img, resize.NearestNeighbor) + var outBuf bytes.Buffer + switch imgType { + case "png": + err = png.Encode(&outBuf, img) + case "gif": + err = gif.Encode(&outBuf, gifPalette(img, int64(imgOpt)), &gif.Options{}) + case "jpg": + err = jpeg.Encode(&outBuf, img, &jpeg.Options{Quality: imgOpt}) + } + if err != nil { + return nil, fmt.Errorf("gif encode problem: %v", err) + } + return outBuf.Bytes(), nil +} + +type astTransformer struct { + imgType string + maxSize int + imgOpt int +} func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { @@ -132,8 +171,8 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa } if img, ok := n.(*ast.Image); ok && entering { // TODO: dynamic extension based on form value - id := fmt.Sprintf("txt%05d.gif", rand.Intn(99999)) // BUG: atomic.AddInt64 or something that ever increases - time based? - err := fetchImage(id, string(img.Destination)) // TODO: use goroutines with waitgroup + id := fmt.Sprintf("txt%05d.gif", rand.Intn(99999)) // BUG: atomic.AddInt64 or something that ever increases - time based? + err := fetchImage(id, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup if err != nil { log.Print(err) n.Parent().RemoveChildren(n) @@ -145,6 +184,18 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa }) } +func asciify(s []byte) []byte { + a := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + if s[i] > 127 { + a[i] = '.' + continue + } + a[i] = s[i] + } + return a +} + func (rq *wrpReq) captureMarkdown() { log.Printf("Processing Markdown conversion request for %v", rq.url) // TODO: bug - DomainFromURL always prefixes with http:// instead of https @@ -157,7 +208,7 @@ func (rq *wrpReq) captureMarkdown() { return } log.Printf("Got %v bytes md from %v", len(md), rq.url) - t := &astTransformer{} + t := &astTransformer{imgType: rq.imgType, maxSize: int(rq.maxSize), imgOpt: int(rq.imgOpt)} // TODO: maxSize still doesn't work gm := goldmark.New( goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions(parser.WithASTTransformers(util.Prioritized(t, 100))), @@ -190,32 +241,3 @@ func imgServerZ(w http.ResponseWriter, r *http.Request) { w.Write(img) w.(http.Flusher).Flush() } - -// TODO set JPG/GIF/PNG type based on form... -func smallGif(src []byte) ([]byte, error) { - t := http.DetectContentType(src) - var err error - var img image.Image - switch t { - case "image/png": - img, err = png.Decode(bytes.NewReader(src)) - case "image/gif": - img, err = gif.Decode(bytes.NewReader(src)) - case "image/jpeg": - img, err = jpeg.Decode(bytes.NewReader(src)) - case "image/webp": - img, err = webp.Decode(bytes.NewReader(src)) - default: // TODO: also add svg - err = errors.New("unknown content type: " + t) - } - if err != nil { - return nil, fmt.Errorf("image decode problem: %v", err) - } - img = resize.Thumbnail(uint(*txtImgSize), uint(*txtImgSize), img, resize.NearestNeighbor) - var gifBuf bytes.Buffer - err = gif.Encode(&gifBuf, gifPalette(img, 216), &gif.Options{}) - if err != nil { - return nil, fmt.Errorf("gif encode problem: %v", err) - } - return gifBuf.Bytes(), nil -} diff --git a/wrp.go b/wrp.go index 49850d9..c8fcc23 100644 --- a/wrp.go +++ b/wrp.go @@ -15,14 +15,8 @@ import ( "fmt" "image" "image/color/palette" - "image/gif" - "image/jpeg" - "image/png" "io" - "io/ioutil" "log" - "math" - "math/rand" "net" "net/http" "net/url" @@ -36,27 +30,26 @@ import ( "time" "github.com/MaxHalford/halfgone" - "github.com/chromedp/cdproto/css" - "github.com/chromedp/cdproto/emulation" - "github.com/chromedp/cdproto/input" - "github.com/chromedp/cdproto/page" - "github.com/chromedp/chromedp" "github.com/soniakeys/quant/median" ) -const version = "4.7.0" +const version = "4.8.0" + +var ( + addr = flag.String("l", ":8080", "Listen address:port, default :8080") + headless = flag.Bool("h", true, "Headless mode / hide browser window (default true)") + noDel = flag.Bool("n", false, "Do not free maps and images after use") + defType = flag.String("t", "gif", "Image type: png|gif|jpg") + wrpMode = flag.String("m", "ismap", "WRP Mode: ismap|html") + defImgSize = flag.Int64("is", 200, "html mode default image size") + jpgQual = flag.Int("q", 75, "Jpeg image quality, default 75%") // TODO: this should be form dropdown when jpeg is selected as image type + fgeom = flag.String("g", "1152x600x216", "Geometry: width x height x colors, height can be 0 for unlimited") + htmFnam = flag.String("ui", "wrp.html", "HTML template file for the UI") + delay = flag.Duration("s", 2*time.Second, "Delay/sleep after page is rendered and before screenshot is taken") + userAgent = flag.String("ua", "", "override chrome user agent") +) var ( - addr = flag.String("l", ":8080", "Listen address:port, default :8080") - headless = flag.Bool("h", true, "Headless mode / hide browser window (default true)") - noDel = flag.Bool("n", false, "Do not free maps and images after use") - defType = flag.String("t", "gif", "Image type: png|gif|jpg|txt") - txtImgSize = flag.Int("ts", 200, "txt mode image size") // make it default, this should come from the from - jpgQual = flag.Int("q", 80, "Jpeg image quality, default 80%") - fgeom = flag.String("g", "1152x600x216", "Geometry: width x height x colors, height can be 0 for unlimited") - htmFnam = flag.String("ui", "wrp.html", "HTML template file for the UI") - delay = flag.Duration("s", 2*time.Second, "Delay/sleep after page is rendered and before screenshot is taken") - userAgent = flag.String("ua", "", "override chrome user agent") srv http.Server actx, ctx context.Context acncl, cncl context.CancelFunc @@ -78,6 +71,7 @@ type geom struct { // Data for html template type uiData struct { Version string + WrpMode string URL string BgColor string NColors int64 @@ -89,6 +83,7 @@ type uiData struct { ImgSize string ImgWidth int ImgHeight int + MaxSize int64 MapURL string PageHeight string TeXT string @@ -118,6 +113,9 @@ type wrpReq struct { keys string // keys to send buttons string // Fn buttons imgType string // imgtype + wrpMode string // mode ismap/html + maxSize int64 // image max size for html mode + imgOpt int64 w http.ResponseWriter r *http.Request } @@ -125,10 +123,15 @@ type wrpReq struct { // Parse HTML Form, Process Input Boxes, Etc. func (rq *wrpReq) parseForm() { rq.r.ParseForm() + rq.wrpMode = rq.r.FormValue("m") + if rq.wrpMode == "" { + rq.wrpMode = *wrpMode + } rq.url = rq.r.FormValue("url") if len(rq.url) > 1 && !strings.HasPrefix(rq.url, "http") { rq.url = fmt.Sprintf("http://www.google.com/search?q=%s", url.QueryEscape(rq.url)) } + // TODO: implement atoiOrZero rq.width, _ = strconv.ParseInt(rq.r.FormValue("w"), 10, 64) rq.height, _ = strconv.ParseInt(rq.r.FormValue("h"), 10, 64) if rq.width < 10 && rq.height < 10 { @@ -139,20 +142,26 @@ func (rq *wrpReq) parseForm() { if rq.zoom < 0.1 { rq.zoom = 1.0 } - rq.colors, _ = strconv.ParseInt(rq.r.FormValue("c"), 10, 64) - if rq.colors < 2 || rq.colors > 256 { + rq.colors, _ = strconv.ParseInt(rq.r.FormValue("c"), 10, 64) // TODO: this needs to be jpeg quality as well + if rq.colors < 2 || rq.colors > 256 { // ... but maybe not because of this? rq.colors = defGeom.c } rq.keys = rq.r.FormValue("k") rq.buttons = rq.r.FormValue("Fn") + rq.maxSize, _ = strconv.ParseInt(rq.r.FormValue("s"), 10, 64) + if rq.maxSize == 0 { + rq.maxSize = *defImgSize + } rq.imgType = rq.r.FormValue("t") switch rq.imgType { case "png": case "gif": + rq.imgOpt = defGeom.c case "jpg": - case "txt": + rq.imgOpt = int64(*jpgQual) default: rq.imgType = *defType + rq.imgOpt = 80 // TODO: fixme, this needs to be different based on image type } log.Printf("%s WrpReq from UI Form: %+v\n", rq.r.RemoteAddr, rq) } @@ -168,12 +177,14 @@ func (rq *wrpReq) printHTML(p printParams) { } data := uiData{ Version: version, + WrpMode: rq.wrpMode, URL: rq.url, BgColor: p.bgColor, Width: rq.width, Height: rq.height, - NColors: rq.colors, + NColors: rq.colors, // TODO: this needs to be also jpeg quality Zoom: rq.zoom, + MaxSize: rq.maxSize, ImgType: rq.imgType, ImgSize: p.imgSize, ImgWidth: p.imgWidth, @@ -189,91 +200,6 @@ func (rq *wrpReq) printHTML(p printParams) { } } -// Determine what action to take -func (rq *wrpReq) action() chromedp.Action { - // Mouse Click - if rq.mouseX > 0 && rq.mouseY > 0 { - log.Printf("%s Mouse Click %d,%d\n", rq.r.RemoteAddr, rq.mouseX, rq.mouseY) - return chromedp.MouseClickXY(float64(rq.mouseX)/float64(rq.zoom), float64(rq.mouseY)/float64(rq.zoom)) - } - // Buttons - if len(rq.buttons) > 0 { - log.Printf("%s Button %v\n", rq.r.RemoteAddr, rq.buttons) - switch rq.buttons { - case "Bk": - return chromedp.NavigateBack() - case "St": - return chromedp.Stop() - case "Re": - return chromedp.Reload() - case "Bs": - return chromedp.KeyEvent("\b") - case "Rt": - return chromedp.KeyEvent("\r") - case "<": - return chromedp.KeyEvent("\u0302") - case "^": - return chromedp.KeyEvent("\u0304") - case "v": - return chromedp.KeyEvent("\u0301") - case ">": - return chromedp.KeyEvent("\u0303") - case "Up": - return chromedp.KeyEvent("\u0308") - case "Dn": - return chromedp.KeyEvent("\u0307") - case "All": // Select all - return chromedp.KeyEvent("a", chromedp.KeyModifiers(input.ModifierCtrl)) - } - } - // Keys - if len(rq.keys) > 0 { - log.Printf("%s Sending Keys: %#v\n", rq.r.RemoteAddr, rq.keys) - return chromedp.KeyEvent(rq.keys) - } - // Navigate to URL - log.Printf("%s Processing Navigate Request for %s\n", rq.r.RemoteAddr, rq.url) - return chromedp.Navigate(rq.url) -} - -// Navigate to the desired URL. -func (rq *wrpReq) navigate() { - ctxErr(chromedp.Run(ctx, rq.action()), rq.w) -} - -// Handle context errors -func ctxErr(err error, w io.Writer) { - // TODO: callers should have retry logic, perhaps create another function - // that takes ...chromedp.Action and retries with give up - if err == nil { - return - } - log.Printf("Context error: %s", err) - fmt.Fprintf(w, "Context error: %s
\n", err) - if err.Error() != "context canceled" { - return - } - ctx, cncl = chromedp.NewContext(actx) - log.Printf("Created new context, try again") - fmt.Fprintln(w, "Created new context, try again") -} - -// https://github.com/chromedp/chromedp/issues/979 -func chromedpCaptureScreenshot(res *[]byte, h int64) chromedp.Action { - if res == nil { - panic("res cannot be nil") - } - if h == 0 { - return chromedp.CaptureScreenshot(res) - } - - return chromedp.ActionFunc(func(ctx context.Context) error { - var err error - *res, err = page.CaptureScreenshot().Do(ctx) - return err - }) -} - func gifPalette(i image.Image, n int64) image.Image { switch n { case 2: @@ -313,128 +239,6 @@ func gifPalette(i image.Image, n int64) image.Image { return i } -func (rq *wrpReq) captureImage() { - var styles []*css.ComputedStyleProperty - var r, g, b int - var bgColorSet bool - var h int64 - var pngCap []byte - chromedp.Run(ctx, - emulation.SetDeviceMetricsOverride(int64(float64(rq.width)/rq.zoom), 10, rq.zoom, false), - chromedp.Location(&rq.url), - chromedp.ComputedStyle("body", &styles, chromedp.ByQuery), - chromedp.ActionFunc(func(ctx context.Context) error { - _, _, _, _, _, s, err := page.GetLayoutMetrics().Do(ctx) - if err == nil { - h = int64(math.Ceil(s.Height)) - } - return nil - }), - ) - log.Printf("%s Landed on: %s, Height: %v\n", rq.r.RemoteAddr, rq.url, h) - for _, style := range styles { - if style.Name != "background-color" { - continue - } - fmt.Sscanf(style.Value, "rgb(%d,%d,%d)", &r, &g, &b) - bgColorSet = true - break - } - if !bgColorSet { - r = 255 - g = 255 - b = 255 - } - height := int64(float64(rq.height) / rq.zoom) - if rq.height == 0 && h > 0 { - height = h + 30 - } - chromedp.Run( - ctx, emulation.SetDeviceMetricsOverride(int64(float64(rq.width)/rq.zoom), height, rq.zoom, false), - chromedp.Sleep(*delay), // TODO(tenox): find a better way to determine if page is rendered - ) - // Capture screenshot... - ctxErr(chromedp.Run(ctx, chromedpCaptureScreenshot(&pngCap, rq.height)), rq.w) - seq := rand.Intn(9999) - imgPath := fmt.Sprintf("/img/%04d.%s", seq, rq.imgType) - mapPath := fmt.Sprintf("/map/%04d.map", seq) - ismap[mapPath] = *rq - var sSize string - var iW, iH int - switch rq.imgType { - case "png": - pngBuf := bytes.NewBuffer(pngCap) - img[imgPath] = *pngBuf - cfg, _, _ := image.DecodeConfig(pngBuf) - sSize = fmt.Sprintf("%.0f KB", float32(len(pngBuf.Bytes()))/1024.0) - iW = cfg.Width - iH = cfg.Height - log.Printf("%s Got PNG image: %s, Size: %s, Res: %dx%d\n", rq.r.RemoteAddr, imgPath, sSize, iW, iH) - case "gif": - i, err := png.Decode(bytes.NewReader(pngCap)) - if err != nil { - log.Printf("%s Failed to decode PNG screenshot: %s\n", rq.r.RemoteAddr, err) - fmt.Fprintf(rq.w, "
Unable to decode page PNG screenshot:
%s
\n", err) - return - } - st := time.Now() - var gifBuf bytes.Buffer - err = gif.Encode(&gifBuf, gifPalette(i, rq.colors), &gif.Options{}) - if err != nil { - log.Printf("%s Failed to encode GIF: %s\n", rq.r.RemoteAddr, err) - fmt.Fprintf(rq.w, "
Unable to encode GIF:
%s
\n", err) - return - } - img[imgPath] = gifBuf - sSize = fmt.Sprintf("%.0f KB", float32(len(gifBuf.Bytes()))/1024.0) - iW = i.Bounds().Max.X - iH = i.Bounds().Max.Y - log.Printf("%s Encoded GIF image: %s, Size: %s, Colors: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, rq.colors, iW, iH, time.Since(st).Milliseconds()) - case "jpg": - i, err := png.Decode(bytes.NewReader(pngCap)) - if err != nil { - log.Printf("%s Failed to decode PNG screenshot: %s\n", rq.r.RemoteAddr, err) - fmt.Fprintf(rq.w, "
Unable to decode page PNG screenshot:
%s
\n", err) - return - } - st := time.Now() - var jpgBuf bytes.Buffer - err = jpeg.Encode(&jpgBuf, i, &jpeg.Options{Quality: *jpgQual}) - if err != nil { - log.Printf("%s Failed to encode JPG: %s\n", rq.r.RemoteAddr, err) - fmt.Fprintf(rq.w, "
Unable to encode JPG:
%s
\n", err) - return - } - img[imgPath] = jpgBuf - sSize = fmt.Sprintf("%.0f KB", float32(len(jpgBuf.Bytes()))/1024.0) - iW = i.Bounds().Max.X - iH = i.Bounds().Max.Y - log.Printf("%s Encoded JPG image: %s, Size: %s, Quality: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, *jpgQual, iW, iH, time.Since(st).Milliseconds()) - } - rq.printHTML(printParams{ - bgColor: fmt.Sprintf("#%02X%02X%02X", r, g, b), - pageHeight: fmt.Sprintf("%d PX", h), - imgSize: sSize, - imgURL: imgPath, - mapURL: mapPath, - imgWidth: iW, - imgHeight: iH, - }) - log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url) -} - -func asciify(s []byte) []byte { - a := make([]byte, len(s)) - for i := 0; i < len(s); i++ { - if s[i] > 127 { - a[i] = '.' - continue - } - a[i] = s[i] - } - return a -} - // Process HTTP requests to WRP '/' url func pageServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Page Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) @@ -448,43 +252,15 @@ func pageServer(w http.ResponseWriter, r *http.Request) { return } rq.navigate() // TODO: if error from navigate do not capture - if rq.imgType == "txt" { + if rq.wrpMode == "html" { rq.captureMarkdown() return } - rq.captureImage() -} - -// Process HTTP requests to ISMAP '/map/' url -func mapServer(w http.ResponseWriter, r *http.Request) { - log.Printf("%s ISMAP Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) - rq, ok := ismap[r.URL.Path] - rq.r = r - rq.w = w - if !ok { - fmt.Fprintf(w, "Unable to find map %s\n", r.URL.Path) - log.Printf("Unable to find map %s\n", r.URL.Path) - return - } - if !*noDel { - defer delete(ismap, r.URL.Path) - } - n, err := fmt.Sscanf(r.URL.RawQuery, "%d,%d", &rq.mouseX, &rq.mouseY) - if err != nil || n != 2 { - fmt.Fprintf(w, "n=%d, err=%s\n", n, err) - log.Printf("%s ISMAP n=%d, err=%s\n", r.RemoteAddr, n, err) - return - } - log.Printf("%s WrpReq from ISMAP: %+v\n", r.RemoteAddr, rq) - if len(rq.url) < 4 { - rq.printHTML(printParams{bgColor: "#FFFFFF"}) - return - } - rq.navigate() // TODO: if error from navigate do not capture - rq.captureImage() + rq.captureScreenshot() } // Process HTTP requests for images '/img/' url +// TODO: merge this with html mode IMGZ func imgServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s IMG Request for %s\n", r.RemoteAddr, r.URL.Path) imgBuf, ok := img[r.URL.Path] @@ -515,9 +291,6 @@ func imgServer(w http.ResponseWriter, r *http.Request) { // Process HTTP requests for Shutdown via '/shutdown/' url func haltServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Shutdown Request for %s\n", r.RemoteAddr, r.URL.Path) - w.Header().Set("Cache-Control", "max-age=0") - w.Header().Set("Expires", "-1") - w.Header().Set("Pragma", "no-cache") w.Header().Set("Content-Type", "text/plain") fmt.Fprintf(w, "Shutting down WRP...\n") w.(http.Flusher).Flush() @@ -537,7 +310,7 @@ func tmpl(t string) string { } defer fh.Close() - tmpl, err = ioutil.ReadAll(fh) + tmpl, err = io.ReadAll(fh) if err != nil { goto builtin } @@ -551,7 +324,7 @@ builtin: } defer fhs.Close() - tmpl, err = ioutil.ReadAll(fhs) + tmpl, err = io.ReadAll(fhs) if err != nil { log.Fatal(err) } @@ -601,19 +374,9 @@ func main() { log.Fatalf("Unable to parse -g geometry flag / %s", err) } - opts := append(chromedp.DefaultExecAllocatorOptions[:], - chromedp.Flag("headless", *headless), - chromedp.Flag("hide-scrollbars", false), - chromedp.Flag("enable-automation", false), - chromedp.Flag("disable-blink-features", "AutomationControlled"), - ) - if *userAgent != "" { - opts = append(opts, chromedp.UserAgent(*userAgent)) - } - actx, acncl = chromedp.NewExecAllocator(context.Background(), opts...) - defer acncl() - ctx, cncl = chromedp.NewContext(actx) + cncl, acncl = chromedpStart() defer cncl() + defer acncl() c := make(chan os.Signal) signal.Notify(c, os.Interrupt, syscall.SIGTERM) diff --git a/wrp.html b/wrp.html index c93c6c4..af34cc1 100644 --- a/wrp.html +++ b/wrp.html @@ -6,7 +6,7 @@
- {{ if ne .ImgType "txt" }} + {{ if eq .WrpMode "ismap" }} @@ -14,7 +14,13 @@ W H + {{ end }} + {{ if eq .WrpMode "html" }} + S + {{ end }} + {{ if eq .WrpMode "ismap" }} Z {{ end }} + M T - {{ if ne .ImgType "txt" }} C + {{ if eq .WrpMode "ismap" }} K From 00304b5d05cb50e8c53ce2d35fc34bb23058e8ef Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Sun, 7 Jul 2024 23:40:46 -0700 Subject: [PATCH 02/16] fix makefile --- Makefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index ee67f2f..a356a43 100755 --- a/Makefile +++ b/Makefile @@ -1,17 +1,17 @@ all: wrp wrp: wrp.go - go build wrp.go + go build -a cross: - GOOS=linux GOARCH=amd64 go build -a -o wrp-amd64-linux wrp.go - GOOS=freebsd GOARCH=amd64 go build -a -o wrp-amd64-freebsd wrp.go - GOOS=openbsd GOARCH=amd64 go build -a -o wrp-amd64-openbsd wrp.go - GOOS=darwin GOARCH=amd64 go build -a -o wrp-amd64-macos wrp.go - GOOS=darwin GOARCH=arm64 go build -a -o wrp-arm64-macos wrp.go - GOOS=windows GOARCH=amd64 go build -a -o wrp-amd64-windows.exe wrp.go - GOOS=linux GOARCH=arm go build -a -o wrp-arm-linux wrp.go - GOOS=linux GOARCH=arm64 go build -a -o wrp-arm64-linux wrp.go + GOOS=linux GOARCH=amd64 go build -a -o wrp-amd64-linux + GOOS=freebsd GOARCH=amd64 go build -a -o wrp-amd64-freebsd + GOOS=openbsd GOARCH=amd64 go build -a -o wrp-amd64-openbsd + GOOS=darwin GOARCH=amd64 go build -a -o wrp-amd64-macos + GOOS=darwin GOARCH=arm64 go build -a -o wrp-arm64-macos + GOOS=windows GOARCH=amd64 go build -a -o wrp-amd64-windows.exe + GOOS=linux GOARCH=arm go build -a -o wrp-arm-linux + GOOS=linux GOARCH=arm64 go build -a -o wrp-arm64-linux docker-local: docker buildx build --platform linux/amd64,linux/arm64 -t tenox7/wrp:latest --load . From 2f667e447cf14e8d0f8d5e9c7ec4db3b351c8cdc Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Mon, 8 Jul 2024 21:27:51 -0700 Subject: [PATCH 03/16] move some code to util.go --- cdp.go | 31 +++++++++++++++++ txt.go | 14 ++------ util.go | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ wrp.go | 103 ++------------------------------------------------------ 4 files changed, 125 insertions(+), 112 deletions(-) create mode 100644 util.go diff --git a/cdp.go b/cdp.go index 4e04a2f..a698d72 100644 --- a/cdp.go +++ b/cdp.go @@ -13,6 +13,8 @@ import ( "math" "math/rand" "net/http" + "strconv" + "strings" "time" "github.com/chromedp/cdproto/css" @@ -261,3 +263,32 @@ func mapServer(w http.ResponseWriter, r *http.Request) { rq.navigate() // TODO: if error from navigate do not capture rq.captureScreenshot() } + +// Process HTTP requests for images '/img/' url +// TODO: merge this with html mode IMGZ +func imgServer(w http.ResponseWriter, r *http.Request) { + log.Printf("%s IMG Request for %s\n", r.RemoteAddr, r.URL.Path) + imgBuf, ok := img[r.URL.Path] + if !ok || imgBuf.Bytes() == nil { + fmt.Fprintf(w, "Unable to find image %s\n", r.URL.Path) + log.Printf("%s Unable to find image %s\n", r.RemoteAddr, r.URL.Path) + return + } + if !*noDel { + defer delete(img, r.URL.Path) + } + switch { + case strings.HasSuffix(r.URL.Path, ".gif"): + w.Header().Set("Content-Type", "image/gif") + case strings.HasSuffix(r.URL.Path, ".png"): + w.Header().Set("Content-Type", "image/png") + case strings.HasSuffix(r.URL.Path, ".jpg"): + w.Header().Set("Content-Type", "image/jpeg") + } + w.Header().Set("Content-Length", strconv.Itoa(len(imgBuf.Bytes()))) + w.Header().Set("Cache-Control", "max-age=0") + w.Header().Set("Expires", "-1") + w.Header().Set("Pragma", "no-cache") + w.Write(imgBuf.Bytes()) + w.(http.Flusher).Flush() +} diff --git a/txt.go b/txt.go index f1791f8..d1f4df6 100644 --- a/txt.go +++ b/txt.go @@ -7,6 +7,8 @@ package main // - garbage collector / delete old images from map // - add referer header // - svg support +// - incorrect cert support in both markdown and image download +// - unify cdp and txt image handlers // - BOG: DomainFromURL always prefixes with http instead of https // reproduces on vsi vms docs // - BUG: markdown table errors @@ -184,18 +186,6 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa }) } -func asciify(s []byte) []byte { - a := make([]byte, len(s)) - for i := 0; i < len(s); i++ { - if s[i] > 127 { - a[i] = '.' - continue - } - a[i] = s[i] - } - return a -} - func (rq *wrpReq) captureMarkdown() { log.Printf("Processing Markdown conversion request for %v", rq.url) // TODO: bug - DomainFromURL always prefixes with http:// instead of https diff --git a/util.go b/util.go new file mode 100644 index 0000000..c746e18 --- /dev/null +++ b/util.go @@ -0,0 +1,89 @@ +// wrp utility functions +package main + +import ( + "image" + "image/color/palette" + "log" + "net" + "strings" + + "github.com/MaxHalford/halfgone" + "github.com/soniakeys/quant/median" +) + +func printIPs(b string) { + ap := strings.Split(b, ":") + if len(ap) < 1 { + log.Fatal("Wrong format of ipaddress:port") + } + log.Printf("Listen address: %v", b) + if ap[0] != "" && ap[0] != "0.0.0.0" { + return + } + a, err := net.InterfaceAddrs() + if err != nil { + log.Print("Unable to get interfaces: ", err) + return + } + var m string + for _, i := range a { + n, ok := i.(*net.IPNet) + if !ok || n.IP.IsLoopback() || strings.Contains(n.IP.String(), ":") { + continue + } + m = m + n.IP.String() + " " + } + log.Print("My IP addresses: ", m) +} + +func gifPalette(i image.Image, n int64) image.Image { + switch n { + case 2: + i = halfgone.FloydSteinbergDitherer{}.Apply(halfgone.ImageToGray(i)) + case 216: + var FastGifLut = [256]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5} + r := i.Bounds() + // NOTE: the color index computation below works only for palette.WebSafe! + p := image.NewPaletted(r, palette.WebSafe) + if i64, ok := i.(image.RGBA64Image); ok { + for y := r.Min.Y; y < r.Max.Y; y++ { + for x := r.Min.X; x < r.Max.X; x++ { + c := i64.RGBA64At(x, y) + r6 := FastGifLut[c.R>>8] + g6 := FastGifLut[c.G>>8] + b6 := FastGifLut[c.B>>8] + p.SetColorIndex(x, y, uint8(36*r6+6*g6+b6)) + } + } + } else { + for y := r.Min.Y; y < r.Max.Y; y++ { + for x := r.Min.X; x < r.Max.X; x++ { + c := i.At(x, y) + r, g, b, _ := c.RGBA() + r6 := FastGifLut[r&0xff] + g6 := FastGifLut[g&0xff] + b6 := FastGifLut[b&0xff] + p.SetColorIndex(x, y, uint8(36*r6+6*g6+b6)) + } + } + } + i = p + default: + q := median.Quantizer(n) + i = q.Paletted(i) + } + return i +} + +func asciify(s []byte) []byte { + a := make([]byte, len(s)) + for i := 0; i < len(s); i++ { + if s[i] > 127 { + a[i] = '.' + continue + } + a[i] = s[i] + } + return a +} diff --git a/wrp.go b/wrp.go index c8fcc23..1005476 100644 --- a/wrp.go +++ b/wrp.go @@ -13,11 +13,8 @@ import ( "embed" "flag" "fmt" - "image" - "image/color/palette" "io" "log" - "net" "net/http" "net/url" "os" @@ -28,9 +25,6 @@ import ( "syscall" "text/template" "time" - - "github.com/MaxHalford/halfgone" - "github.com/soniakeys/quant/median" ) const version = "4.8.0" @@ -68,6 +62,9 @@ type geom struct { c int64 } +// TODO: there is a major overlap/duplication/triplication +// between the 3 data structs, perhps we could reduce to just one? + // Data for html template type uiData struct { Version string @@ -200,45 +197,6 @@ func (rq *wrpReq) printHTML(p printParams) { } } -func gifPalette(i image.Image, n int64) image.Image { - switch n { - case 2: - i = halfgone.FloydSteinbergDitherer{}.Apply(halfgone.ImageToGray(i)) - case 216: - var FastGifLut = [256]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5} - r := i.Bounds() - // NOTE: the color index computation below works only for palette.WebSafe! - p := image.NewPaletted(r, palette.WebSafe) - if i64, ok := i.(image.RGBA64Image); ok { - for y := r.Min.Y; y < r.Max.Y; y++ { - for x := r.Min.X; x < r.Max.X; x++ { - c := i64.RGBA64At(x, y) - r6 := FastGifLut[c.R>>8] - g6 := FastGifLut[c.G>>8] - b6 := FastGifLut[c.B>>8] - p.SetColorIndex(x, y, uint8(36*r6+6*g6+b6)) - } - } - } else { - for y := r.Min.Y; y < r.Max.Y; y++ { - for x := r.Min.X; x < r.Max.X; x++ { - c := i.At(x, y) - r, g, b, _ := c.RGBA() - r6 := FastGifLut[r&0xff] - g6 := FastGifLut[g&0xff] - b6 := FastGifLut[b&0xff] - p.SetColorIndex(x, y, uint8(36*r6+6*g6+b6)) - } - } - } - i = p - default: - q := median.Quantizer(n) - i = q.Paletted(i) - } - return i -} - // Process HTTP requests to WRP '/' url func pageServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Page Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) @@ -259,35 +217,6 @@ func pageServer(w http.ResponseWriter, r *http.Request) { rq.captureScreenshot() } -// Process HTTP requests for images '/img/' url -// TODO: merge this with html mode IMGZ -func imgServer(w http.ResponseWriter, r *http.Request) { - log.Printf("%s IMG Request for %s\n", r.RemoteAddr, r.URL.Path) - imgBuf, ok := img[r.URL.Path] - if !ok || imgBuf.Bytes() == nil { - fmt.Fprintf(w, "Unable to find image %s\n", r.URL.Path) - log.Printf("%s Unable to find image %s\n", r.RemoteAddr, r.URL.Path) - return - } - if !*noDel { - defer delete(img, r.URL.Path) - } - switch { - case strings.HasSuffix(r.URL.Path, ".gif"): - w.Header().Set("Content-Type", "image/gif") - case strings.HasSuffix(r.URL.Path, ".png"): - w.Header().Set("Content-Type", "image/png") - case strings.HasSuffix(r.URL.Path, ".jpg"): - w.Header().Set("Content-Type", "image/jpeg") - } - w.Header().Set("Content-Length", strconv.Itoa(len(imgBuf.Bytes()))) - w.Header().Set("Cache-Control", "max-age=0") - w.Header().Set("Expires", "-1") - w.Header().Set("Pragma", "no-cache") - w.Write(imgBuf.Bytes()) - w.(http.Flusher).Flush() -} - // Process HTTP requests for Shutdown via '/shutdown/' url func haltServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Shutdown Request for %s\n", r.RemoteAddr, r.URL.Path) @@ -332,32 +261,6 @@ builtin: return string(tmpl) } -// Print my own IP addresses -func printIPs(b string) { - ap := strings.Split(b, ":") - if len(ap) < 1 { - log.Fatal("Wrong format of ipaddress:port") - } - log.Printf("Listen address: %v", b) - if ap[0] != "" && ap[0] != "0.0.0.0" { - return - } - a, err := net.InterfaceAddrs() - if err != nil { - log.Print("Unable to get interfaces: ", err) - return - } - var m string - for _, i := range a { - n, ok := i.(*net.IPNet) - if !ok || n.IP.IsLoopback() || strings.Contains(n.IP.String(), ":") { - continue - } - m = m + n.IP.String() + " " - } - log.Print("My IP addresses: ", m) -} - // Main func main() { var err error From db4ed0d8110c25ba02d06a29087d9695522f4fce Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Mon, 8 Jul 2024 21:36:32 -0700 Subject: [PATCH 04/16] code reorg, comments etc --- cdp.go | 11 +++++------ txt.go | 6 ++++-- util.go | 2 +- wrp.go | 22 ++++++++-------------- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/cdp.go b/cdp.go index a698d72..69cd053 100644 --- a/cdp.go +++ b/cdp.go @@ -1,3 +1,4 @@ +// WRP ISMAP / ChromeDP routines package main import ( @@ -111,7 +112,7 @@ func ctxErr(err error, w io.Writer) { // https://github.com/chromedp/chromedp/issues/979 func chromedpCaptureScreenshot(res *[]byte, h int64) chromedp.Action { if res == nil { - panic("res cannot be nil") + panic("res cannot be nil") // TODO: do not panic here, return error } if h == 0 { return chromedp.CaptureScreenshot(res) @@ -223,7 +224,7 @@ func (rq *wrpReq) captureScreenshot() { iH = i.Bounds().Max.Y log.Printf("%s Encoded JPG image: %s, Size: %s, Quality: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, *jpgQual, iW, iH, time.Since(st).Milliseconds()) } - rq.printHTML(printParams{ + rq.printUI(uiParams{ bgColor: fmt.Sprintf("#%02X%02X%02X", r, g, b), pageHeight: fmt.Sprintf("%d PX", h), imgSize: sSize, @@ -235,7 +236,6 @@ func (rq *wrpReq) captureScreenshot() { log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url) } -// Process HTTP requests to ISMAP '/map/' url func mapServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s ISMAP Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) rq, ok := ismap[r.URL.Path] @@ -257,16 +257,15 @@ func mapServer(w http.ResponseWriter, r *http.Request) { } log.Printf("%s WrpReq from ISMAP: %+v\n", r.RemoteAddr, rq) if len(rq.url) < 4 { - rq.printHTML(printParams{bgColor: "#FFFFFF"}) + rq.printUI(uiParams{bgColor: "#FFFFFF"}) return } rq.navigate() // TODO: if error from navigate do not capture rq.captureScreenshot() } -// Process HTTP requests for images '/img/' url // TODO: merge this with html mode IMGZ -func imgServer(w http.ResponseWriter, r *http.Request) { +func imgServerMap(w http.ResponseWriter, r *http.Request) { log.Printf("%s IMG Request for %s\n", r.RemoteAddr, r.URL.Path) imgBuf, ok := img[r.URL.Path] if !ok || imgBuf.Bytes() == nil { diff --git a/txt.go b/txt.go index d1f4df6..bef7742 100644 --- a/txt.go +++ b/txt.go @@ -1,8 +1,10 @@ +// WRP TXT / Simple HTML Mode Routines package main // TODO: // - image type based on form value // - also size and quality +// - imgOpt image quality // - non overlaping image names atomic.int etc // - garbage collector / delete old images from map // - add referer header @@ -210,13 +212,13 @@ func (rq *wrpReq) captureMarkdown() { return } log.Printf("Rendered %v bytes html for %v", len(ht.String()), rq.url) - rq.printHTML(printParams{ + rq.printUI(uiParams{ text: string(asciify([]byte(ht.String()))), bgColor: "#FFFFFF", }) } -func imgServerZ(w http.ResponseWriter, r *http.Request) { +func imgServerTxt(w http.ResponseWriter, r *http.Request) { log.Printf("%s IMGZ Request for %s", r.RemoteAddr, r.URL.Path) id := strings.Replace(r.URL.Path, imgZpfx, "", 1) img, err := imgStor.get(id) diff --git a/util.go b/util.go index c746e18..83d8b3c 100644 --- a/util.go +++ b/util.go @@ -12,7 +12,7 @@ import ( "github.com/soniakeys/quant/median" ) -func printIPs(b string) { +func printMyIPs(b string) { ap := strings.Split(b, ":") if len(ap) < 1 { log.Fatal("Wrong format of ipaddress:port") diff --git a/wrp.go b/wrp.go index 1005476..9823702 100644 --- a/wrp.go +++ b/wrp.go @@ -87,7 +87,7 @@ type uiData struct { } // Parameters for HTML print function -type printParams struct { +type uiParams struct { bgColor string pageHeight string imgSize string @@ -117,7 +117,6 @@ type wrpReq struct { r *http.Request } -// Parse HTML Form, Process Input Boxes, Etc. func (rq *wrpReq) parseForm() { rq.r.ParseForm() rq.wrpMode = rq.r.FormValue("m") @@ -163,8 +162,7 @@ func (rq *wrpReq) parseForm() { log.Printf("%s WrpReq from UI Form: %+v\n", rq.r.RemoteAddr, rq) } -// Display WP UI -func (rq *wrpReq) printHTML(p printParams) { +func (rq *wrpReq) printUI(p uiParams) { rq.w.Header().Set("Cache-Control", "max-age=0") rq.w.Header().Set("Expires", "-1") rq.w.Header().Set("Pragma", "no-cache") @@ -197,7 +195,6 @@ func (rq *wrpReq) printHTML(p printParams) { } } -// Process HTTP requests to WRP '/' url func pageServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Page Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) rq := wrpReq{ @@ -206,7 +203,7 @@ func pageServer(w http.ResponseWriter, r *http.Request) { } rq.parseForm() if len(rq.url) < 4 { - rq.printHTML(printParams{bgColor: "#FFFFFF"}) + rq.printUI(uiParams{bgColor: "#FFFFFF"}) return } rq.navigate() // TODO: if error from navigate do not capture @@ -217,7 +214,6 @@ func pageServer(w http.ResponseWriter, r *http.Request) { rq.captureScreenshot() } -// Process HTTP requests for Shutdown via '/shutdown/' url func haltServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Shutdown Request for %s\n", r.RemoteAddr, r.URL.Path) w.Header().Set("Content-Type", "text/plain") @@ -230,8 +226,7 @@ func haltServer(w http.ResponseWriter, r *http.Request) { os.Exit(1) } -// returns html template, either from html file or built-in -func tmpl(t string) string { +func wrpTemplate(t string) string { var tmpl []byte fh, err := os.Open(t) if err != nil { @@ -261,7 +256,6 @@ builtin: return string(tmpl) } -// Main func main() { var err error log.SetFlags(log.LstdFlags | log.Lshortfile) @@ -271,7 +265,7 @@ func main() { if len(os.Getenv("PORT")) > 0 { *addr = ":" + os.Getenv(("PORT")) } - printIPs(*addr) + printMyIPs(*addr) n, err := fmt.Sscanf(*fgeom, "%dx%dx%d", &defGeom.w, &defGeom.h, &defGeom.c) if err != nil || n != 3 { log.Fatalf("Unable to parse -g geometry flag / %s", err) @@ -294,14 +288,14 @@ func main() { http.HandleFunc("/", pageServer) http.HandleFunc("/map/", mapServer) - http.HandleFunc("/img/", imgServer) - http.HandleFunc(imgZpfx, imgServerZ) + http.HandleFunc("/img/", imgServerMap) + http.HandleFunc(imgZpfx, imgServerTxt) http.HandleFunc("/shutdown/", haltServer) http.HandleFunc("/favicon.ico", http.NotFound) log.Printf("Default Img Type: %v, Geometry: %+v", *defType, defGeom) - htmlTmpl, err = template.New("wrp.html").Parse(tmpl(*htmFnam)) + htmlTmpl, err = template.New("wrp.html").Parse(wrpTemplate(*htmFnam)) if err != nil { log.Fatal(err) } From bb29ce38de439f162d39bacd21a80bb0981145f3 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Mon, 8 Jul 2024 21:46:47 -0700 Subject: [PATCH 05/16] pass img type and size --- txt.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txt.go b/txt.go index bef7742..b0177c4 100644 --- a/txt.go +++ b/txt.go @@ -171,7 +171,7 @@ type astTransformer struct { func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { if link, ok := n.(*ast.Link); ok && entering { - link.Destination = append([]byte("/?t=txt&url="), link.Destination...) + link.Destination = append([]byte("/?m=html&t="+t.imgType+"&s="+strconv.Itoa(t.maxSize)+"&url="), link.Destination...) } if img, ok := n.(*ast.Image); ok && entering { // TODO: dynamic extension based on form value From 335a84f52e4bda2c632b7eefa43cad63deaa7854 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Mon, 8 Jul 2024 21:54:45 -0700 Subject: [PATCH 06/16] use form image size --- txt.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/txt.go b/txt.go index b0177c4..fb00aa7 100644 --- a/txt.go +++ b/txt.go @@ -2,15 +2,14 @@ package main // TODO: -// - image type based on form value -// - also size and quality -// - imgOpt image quality +// - imgOpt image quality for jpeg // - non overlaping image names atomic.int etc // - garbage collector / delete old images from map // - add referer header // - svg support // - incorrect cert support in both markdown and image download // - unify cdp and txt image handlers +// - use goroutiness to process images // - BOG: DomainFromURL always prefixes with http instead of https // reproduces on vsi vms docs // - BUG: markdown table errors @@ -146,7 +145,7 @@ func smallImg(src []byte, imgType string, maxSize, imgOpt int) ([]byte, error) { if err != nil { return nil, fmt.Errorf("image decode problem: %v", err) } - img = resize.Thumbnail(uint(*defImgSize), uint(*defImgSize), img, resize.NearestNeighbor) + img = resize.Thumbnail(uint(maxSize), uint(maxSize), img, resize.NearestNeighbor) var outBuf bytes.Buffer switch imgType { case "png": From 56fa314d61085738ede0cc62c0683edb3ab9c23f Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Mon, 8 Jul 2024 21:55:03 -0700 Subject: [PATCH 07/16] image type based on form value --- txt.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/txt.go b/txt.go index fb00aa7..130a9be 100644 --- a/txt.go +++ b/txt.go @@ -173,8 +173,7 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa link.Destination = append([]byte("/?m=html&t="+t.imgType+"&s="+strconv.Itoa(t.maxSize)+"&url="), link.Destination...) } if img, ok := n.(*ast.Image); ok && entering { - // TODO: dynamic extension based on form value - id := fmt.Sprintf("txt%05d.gif", rand.Intn(99999)) // BUG: atomic.AddInt64 or something that ever increases - time based? + id := fmt.Sprintf("txt%05d.%s", rand.Intn(99999), strings.ToLower(t.imgType)) // BUG: atomic.AddInt64 or something that ever increases - time based? err := fetchImage(id, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup if err != nil { log.Print(err) From eb38499280a53be127f0b01e77c4fca19c20e196 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 01:11:20 -0700 Subject: [PATCH 08/16] todo updates --- txt.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/txt.go b/txt.go index 130a9be..408b7d3 100644 --- a/txt.go +++ b/txt.go @@ -14,6 +14,8 @@ package main // reproduces on vsi vms docs // - BUG: markdown table errors // reproduces on hacker news +// - BUG: captcha errors using html to markdown, perhaps use cdp inner html +// reproduces on https://www.cnn.com/cnn-underscored/electronics import ( "bytes" @@ -51,10 +53,6 @@ const imgZpfx = "/imgz/" func init() { imgStor.img = make(map[string]imageContainer) - // TODO: add garbage collector - // think about how to remove old images - // if removed from cache how to download them later if a browser goes back? - // browser should cache on it's own... but it may request it, what then? } type imageContainer struct { From 0d998af68c432b7633d77202bda6f62e953cebd4 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 01:45:03 -0700 Subject: [PATCH 09/16] jpeg quality via form value etc --- cdp.go | 8 ++++---- txt.go | 11 +++++++++-- wrp.go | 59 ++++++++++++++++++++++++++++---------------------------- wrp.html | 5 +++++ 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/cdp.go b/cdp.go index 69cd053..2fe9ee1 100644 --- a/cdp.go +++ b/cdp.go @@ -192,7 +192,7 @@ func (rq *wrpReq) captureScreenshot() { } st := time.Now() var gifBuf bytes.Buffer - err = gif.Encode(&gifBuf, gifPalette(i, rq.imgOpt), &gif.Options{}) + err = gif.Encode(&gifBuf, gifPalette(i, rq.nColors), &gif.Options{}) if err != nil { log.Printf("%s Failed to encode GIF: %s\n", rq.r.RemoteAddr, err) fmt.Fprintf(rq.w, "
Unable to encode GIF:
%s
\n", err) @@ -202,7 +202,7 @@ func (rq *wrpReq) captureScreenshot() { sSize = fmt.Sprintf("%.0f KB", float32(len(gifBuf.Bytes()))/1024.0) iW = i.Bounds().Max.X iH = i.Bounds().Max.Y - log.Printf("%s Encoded GIF image: %s, Size: %s, Colors: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, rq.imgOpt, iW, iH, time.Since(st).Milliseconds()) + log.Printf("%s Encoded GIF image: %s, Size: %s, Colors: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, rq.nColors, iW, iH, time.Since(st).Milliseconds()) case "jpg": i, err := png.Decode(bytes.NewReader(pngCap)) if err != nil { @@ -212,7 +212,7 @@ func (rq *wrpReq) captureScreenshot() { } st := time.Now() var jpgBuf bytes.Buffer - err = jpeg.Encode(&jpgBuf, i, &jpeg.Options{Quality: *jpgQual}) + err = jpeg.Encode(&jpgBuf, i, &jpeg.Options{Quality: int(rq.jQual)}) if err != nil { log.Printf("%s Failed to encode JPG: %s\n", rq.r.RemoteAddr, err) fmt.Fprintf(rq.w, "
Unable to encode JPG:
%s
\n", err) @@ -222,7 +222,7 @@ func (rq *wrpReq) captureScreenshot() { sSize = fmt.Sprintf("%.0f KB", float32(len(jpgBuf.Bytes()))/1024.0) iW = i.Bounds().Max.X iH = i.Bounds().Max.Y - log.Printf("%s Encoded JPG image: %s, Size: %s, Quality: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, *jpgQual, iW, iH, time.Since(st).Milliseconds()) + log.Printf("%s Encoded JPG image: %s, Size: %s, Quality: %d, Res: %dx%d, Time: %vms\n", rq.r.RemoteAddr, imgPath, sSize, *defJpgQual, iW, iH, time.Since(st).Milliseconds()) } rq.printUI(uiParams{ bgColor: fmt.Sprintf("#%02X%02X%02X", r, g, b), diff --git a/txt.go b/txt.go index 408b7d3..123ffed 100644 --- a/txt.go +++ b/txt.go @@ -14,7 +14,7 @@ package main // reproduces on vsi vms docs // - BUG: markdown table errors // reproduces on hacker news -// - BUG: captcha errors using html to markdown, perhaps use cdp inner html +// - BUG: captcha errors using html to markdown, perhaps use cdp inner html + downloaded images // reproduces on https://www.cnn.com/cnn-underscored/electronics import ( @@ -196,7 +196,14 @@ func (rq *wrpReq) captureMarkdown() { return } log.Printf("Got %v bytes md from %v", len(md), rq.url) - t := &astTransformer{imgType: rq.imgType, maxSize: int(rq.maxSize), imgOpt: int(rq.imgOpt)} // TODO: maxSize still doesn't work + var imgOpt int + switch rq.imgType { + case "jpg": + imgOpt = int(rq.jQual) + case "gif": + imgOpt = int(rq.nColors) + } + t := &astTransformer{imgType: rq.imgType, maxSize: int(rq.maxSize), imgOpt: imgOpt} gm := goldmark.New( goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions(parser.WithASTTransformers(util.Prioritized(t, 100))), diff --git a/wrp.go b/wrp.go index 9823702..757cf38 100644 --- a/wrp.go +++ b/wrp.go @@ -36,7 +36,7 @@ var ( defType = flag.String("t", "gif", "Image type: png|gif|jpg") wrpMode = flag.String("m", "ismap", "WRP Mode: ismap|html") defImgSize = flag.Int64("is", 200, "html mode default image size") - jpgQual = flag.Int("q", 75, "Jpeg image quality, default 75%") // TODO: this should be form dropdown when jpeg is selected as image type + defJpgQual = flag.Int64("q", 75, "Jpeg image quality, default 75%") // TODO: this should be form dropdown when jpeg is selected as image type fgeom = flag.String("g", "1152x600x216", "Geometry: width x height x colors, height can be 0 for unlimited") htmFnam = flag.String("ui", "wrp.html", "HTML template file for the UI") delay = flag.Duration("s", 2*time.Second, "Delay/sleep after page is rendered and before screenshot is taken") @@ -72,6 +72,7 @@ type uiData struct { URL string BgColor string NColors int64 + JQual int64 Width int64 Height int64 Zoom float64 @@ -100,19 +101,19 @@ type uiParams struct { // WRP Request type wrpReq struct { - url string // url - width int64 // width - height int64 // height - zoom float64 // zoom/scale - colors int64 // #colors - mouseX int64 // mouseX - mouseY int64 // mouseY - keys string // keys to send - buttons string // Fn buttons - imgType string // imgtype - wrpMode string // mode ismap/html - maxSize int64 // image max size for html mode - imgOpt int64 + url string + width int64 + height int64 + zoom float64 + nColors int64 + jQual int64 + mouseX int64 + mouseY int64 + keys string + buttons string + imgType string + wrpMode string + maxSize int64 w http.ResponseWriter r *http.Request } @@ -138,9 +139,19 @@ func (rq *wrpReq) parseForm() { if rq.zoom < 0.1 { rq.zoom = 1.0 } - rq.colors, _ = strconv.ParseInt(rq.r.FormValue("c"), 10, 64) // TODO: this needs to be jpeg quality as well - if rq.colors < 2 || rq.colors > 256 { // ... but maybe not because of this? - rq.colors = defGeom.c + rq.imgType = rq.r.FormValue("t") + switch rq.imgType { + case "png", "gif", "jpg": + default: + rq.imgType = *defType + } + rq.nColors, _ = strconv.ParseInt(rq.r.FormValue("c"), 10, 64) + if rq.nColors < 2 || rq.nColors > 256 { + rq.nColors = defGeom.c + } + rq.jQual, _ = strconv.ParseInt(rq.r.FormValue("q"), 10, 64) + if rq.jQual < 1 || rq.jQual > 100 { + rq.jQual = *defJpgQual } rq.keys = rq.r.FormValue("k") rq.buttons = rq.r.FormValue("Fn") @@ -148,17 +159,6 @@ func (rq *wrpReq) parseForm() { if rq.maxSize == 0 { rq.maxSize = *defImgSize } - rq.imgType = rq.r.FormValue("t") - switch rq.imgType { - case "png": - case "gif": - rq.imgOpt = defGeom.c - case "jpg": - rq.imgOpt = int64(*jpgQual) - default: - rq.imgType = *defType - rq.imgOpt = 80 // TODO: fixme, this needs to be different based on image type - } log.Printf("%s WrpReq from UI Form: %+v\n", rq.r.RemoteAddr, rq) } @@ -177,7 +177,8 @@ func (rq *wrpReq) printUI(p uiParams) { BgColor: p.bgColor, Width: rq.width, Height: rq.height, - NColors: rq.colors, // TODO: this needs to be also jpeg quality + NColors: rq.nColors, + JQual: rq.jQual, Zoom: rq.zoom, MaxSize: rq.maxSize, ImgType: rq.imgType, diff --git a/wrp.html b/wrp.html index af34cc1..2514af0 100644 --- a/wrp.html +++ b/wrp.html @@ -41,6 +41,7 @@ + {{ if eq .ImgType "gif" }} C + {{ end }} + {{ if eq .ImgType "jpg" }} + Q % + {{ end }} {{ if eq .WrpMode "ismap" }} K From 51c4c356514e35bc29d668d792aa2d9b1e34be8d Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 02:07:50 -0700 Subject: [PATCH 10/16] use local binaries for local docker --- Dockerfile.local | 6 ++++++ Makefile | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.local diff --git a/Dockerfile.local b/Dockerfile.local new file mode 100644 index 0000000..f392ece --- /dev/null +++ b/Dockerfile.local @@ -0,0 +1,6 @@ +FROM chromedp/headless-shell +ARG TARGETARCH +ADD wrp-${TARGETARCH}-linux /wrp +ENTRYPOINT ["/wrp"] +ENV PATH="/headless-shell:${PATH}" +LABEL maintainer="as@tenoware.com" diff --git a/Makefile b/Makefile index a356a43..e1ef8d7 100755 --- a/Makefile +++ b/Makefile @@ -14,10 +14,15 @@ cross: GOOS=linux GOARCH=arm64 go build -a -o wrp-arm64-linux docker-local: - docker buildx build --platform linux/amd64,linux/arm64 -t tenox7/wrp:latest --load . + GOOS=linux GOARCH=amd64 go build -a -o wrp-amd64-linux + GOOS=linux GOARCH=arm64 go build -a -o wrp-arm64-linux + docker buildx build --platform linux/amd64,linux/arm64 -t tenox7/wrp:latest -f Dockerfile.local --load . docker-push: docker buildx build --platform linux/amd64,linux/arm64 -t tenox7/wrp:latest --push . +docker-clean: + docker buildx prune -a -f + clean: rm -rf wrp-* wrp From 9110ad0853af8b00f341738b6ec12bd023040d52 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 02:09:18 -0700 Subject: [PATCH 11/16] embed certs for text mode --- go.mod | 1 + go.sum | 2 ++ wrp.go | 2 ++ 3 files changed, 5 insertions(+) diff --git a/go.mod b/go.mod index 253dfa6..238f62b 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.21.5 require ( github.com/JohannesKaufmann/html-to-markdown v1.6.0 github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb + github.com/breml/rootcerts v0.2.17 github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa github.com/chromedp/chromedp v0.9.5 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 diff --git a/go.sum b/go.sum index 53b4a99..78c61d5 100644 --- a/go.sum +++ b/go.sum @@ -6,6 +6,8 @@ github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4 github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/breml/rootcerts v0.2.17 h1:0/M2BE2Apw0qEJCXDOkaiu7d5Sx5ObNfe1BkImJ4u1I= +github.com/breml/rootcerts v0.2.17/go.mod h1:S/PKh+4d1HUn4HQovEB8hPJZO6pUZYrIhmXBhsegfXw= github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa h1:T3Ho4BWIkoEoMPCj90W2HIPF/k56qk4JWzTs6JUBxVw= github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= diff --git a/wrp.go b/wrp.go index 757cf38..751a703 100644 --- a/wrp.go +++ b/wrp.go @@ -25,6 +25,8 @@ import ( "syscall" "text/template" "time" + + _ "github.com/breml/rootcerts" ) const version = "4.8.0" From 94fb4f437bc2c68f1bb113b4da3b14f9129cabea Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 02:11:41 -0700 Subject: [PATCH 12/16] todo updates --- txt.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/txt.go b/txt.go index 123ffed..6e99546 100644 --- a/txt.go +++ b/txt.go @@ -2,9 +2,9 @@ package main // TODO: -// - imgOpt image quality for jpeg +// - imgOpt image quality for jpeg <<== TEST // - non overlaping image names atomic.int etc -// - garbage collector / delete old images from map +// - cache + garbage collector / delete old images from map -- test back/button behavior in old browsers // - add referer header // - svg support // - incorrect cert support in both markdown and image download From 3231a0a61c85a5b2f3035dcb8c47bf3ad260982f Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 22:03:22 -0700 Subject: [PATCH 13/16] count image size for simple html mode --- txt.go | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/txt.go b/txt.go index 6e99546..61179ba 100644 --- a/txt.go +++ b/txt.go @@ -2,8 +2,8 @@ package main // TODO: -// - imgOpt image quality for jpeg <<== TEST // - non overlaping image names atomic.int etc +// - add image processing times counter to the footer // - cache + garbage collector / delete old images from map -- test back/button behavior in old browsers // - add referer header // - svg support @@ -88,40 +88,40 @@ func (i *imageStore) del(id string) { delete(i.img, id) } -func fetchImage(id, url, imgType string, maxSize, imgOpt int) error { +func fetchImage(id, url, imgType string, maxSize, imgOpt int) (int, error) { log.Printf("Downloading IMGZ URL=%q for ID=%q", url, id) - var img []byte + var in []byte var err error switch url[:4] { case "http": r, err := http.Get(url) // TODO: possibly set a header "referer" here if err != nil { - return fmt.Errorf("Error downloading %q: %v", url, err) + return 0, fmt.Errorf("Error downloading %q: %v", url, err) } if r.StatusCode != http.StatusOK { - return fmt.Errorf("Error %q HTTP Status Code: %v", url, r.StatusCode) + return 0, fmt.Errorf("Error %q HTTP Status Code: %v", url, r.StatusCode) } defer r.Body.Close() - img, err = io.ReadAll(r.Body) + in, err = io.ReadAll(r.Body) if err != nil { - return fmt.Errorf("Error reading %q: %v", url, err) + return 0, fmt.Errorf("Error reading %q: %v", url, err) } case "data": idx := strings.Index(url, ",") if idx < 1 { - return fmt.Errorf("image is embeded but unable to find coma: %q", url) + return 0, fmt.Errorf("image is embeded but unable to find coma: %q", url) } - img, err = base64.StdEncoding.DecodeString(url[idx+1:]) + in, err = base64.StdEncoding.DecodeString(url[idx+1:]) if err != nil { - return fmt.Errorf("error decoding image from url embed: %q: %v", url, err) + return 0, fmt.Errorf("error decoding image from url embed: %q: %v", url, err) } } - gif, err := smallImg(img, imgType, maxSize, imgOpt) + out, err := smallImg(in, imgType, maxSize, imgOpt) if err != nil { - return fmt.Errorf("Error scaling down image: %v", err) + return 0, fmt.Errorf("Error scaling down image: %v", err) } - imgStor.add(id, url, gif) - return nil + imgStor.add(id, url, out) + return len(out), nil } func smallImg(src []byte, imgType string, maxSize, imgOpt int) ([]byte, error) { @@ -163,6 +163,7 @@ type astTransformer struct { imgType string maxSize int imgOpt int + totSize int } func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { @@ -171,14 +172,15 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa link.Destination = append([]byte("/?m=html&t="+t.imgType+"&s="+strconv.Itoa(t.maxSize)+"&url="), link.Destination...) } if img, ok := n.(*ast.Image); ok && entering { - id := fmt.Sprintf("txt%05d.%s", rand.Intn(99999), strings.ToLower(t.imgType)) // BUG: atomic.AddInt64 or something that ever increases - time based? - err := fetchImage(id, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup + id := fmt.Sprintf("txt%05d.%s", rand.Intn(99999), strings.ToLower(t.imgType)) // BUG: atomic.AddInt64 or something that ever increases - time based? + size, err := fetchImage(id, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup if err != nil { log.Print(err) n.Parent().RemoveChildren(n) return ast.WalkContinue, nil } img.Destination = []byte(imgZpfx + id) + t.totSize += size } return ast.WalkContinue, nil }) @@ -218,6 +220,7 @@ func (rq *wrpReq) captureMarkdown() { rq.printUI(uiParams{ text: string(asciify([]byte(ht.String()))), bgColor: "#FFFFFF", + imgSize: fmt.Sprintf("%.0f KB", float32(t.totSize)/1024.0), }) } From 9f9014dc153bc03a66d0dfa08e0b9e7280df99bf Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 22:50:25 -0700 Subject: [PATCH 14/16] use short uuid generator instead of rand --- cdp.go | 8 ++++---- go.mod | 2 ++ go.sum | 4 ++++ txt.go | 11 +++++------ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cdp.go b/cdp.go index 2fe9ee1..175fa32 100644 --- a/cdp.go +++ b/cdp.go @@ -12,7 +12,6 @@ import ( "io" "log" "math" - "math/rand" "net/http" "strconv" "strings" @@ -23,6 +22,7 @@ import ( "github.com/chromedp/cdproto/input" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" + "github.com/lithammer/shortuuid/v4" ) func chromedpStart() (context.CancelFunc, context.CancelFunc) { @@ -168,9 +168,9 @@ func (rq *wrpReq) captureScreenshot() { ) // Capture screenshot... ctxErr(chromedp.Run(ctx, chromedpCaptureScreenshot(&pngCap, rq.height)), rq.w) - seq := rand.Intn(9999) - imgPath := fmt.Sprintf("/img/%04d.%s", seq, rq.imgType) - mapPath := fmt.Sprintf("/map/%04d.map", seq) + seq := shortuuid.New() + imgPath := fmt.Sprintf("/img/%s.%s", seq, rq.imgType) + mapPath := fmt.Sprintf("/map/%s.map", seq) ismap[mapPath] = *rq var sSize string var iW, iH int diff --git a/go.mod b/go.mod index 238f62b..f73ea30 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/breml/rootcerts v0.2.17 github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa github.com/chromedp/chromedp v0.9.5 + github.com/lithammer/shortuuid/v4 v4.0.0 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 github.com/soniakeys/quant v1.0.0 github.com/yuin/goldmark v1.7.2 @@ -21,6 +22,7 @@ require ( github.com/gobwas/httphead v0.1.0 // indirect github.com/gobwas/pool v0.2.1 // indirect github.com/gobwas/ws v1.4.0 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect golang.org/x/net v0.25.0 // indirect diff --git a/go.sum b/go.sum index 78c61d5..44cbb75 100644 --- a/go.sum +++ b/go.sum @@ -24,6 +24,8 @@ github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6Wezm github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= @@ -33,6 +35,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/lithammer/shortuuid/v4 v4.0.0 h1:QRbbVkfgNippHOS8PXDkti4NaWeyYfcBTHtw7k08o4c= +github.com/lithammer/shortuuid/v4 v4.0.0/go.mod h1:Zs8puNcrvf2rV9rTH51ZLLcj7ZXqQI3lv67aw4KiB1Y= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= diff --git a/txt.go b/txt.go index 61179ba..e1975dc 100644 --- a/txt.go +++ b/txt.go @@ -2,9 +2,8 @@ package main // TODO: -// - non overlaping image names atomic.int etc // - add image processing times counter to the footer -// - cache + garbage collector / delete old images from map -- test back/button behavior in old browsers +// - img cache w/garbage collector / test back/button behavior in old browsers // - add referer header // - svg support // - incorrect cert support in both markdown and image download @@ -28,7 +27,6 @@ import ( "image/png" "io" "log" - "math/rand" "net/http" "strconv" "strings" @@ -37,6 +35,7 @@ import ( h2m "github.com/JohannesKaufmann/html-to-markdown" "github.com/JohannesKaufmann/html-to-markdown/plugin" + "github.com/lithammer/shortuuid/v4" "github.com/nfnt/resize" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" @@ -172,14 +171,14 @@ func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc pa link.Destination = append([]byte("/?m=html&t="+t.imgType+"&s="+strconv.Itoa(t.maxSize)+"&url="), link.Destination...) } if img, ok := n.(*ast.Image); ok && entering { - id := fmt.Sprintf("txt%05d.%s", rand.Intn(99999), strings.ToLower(t.imgType)) // BUG: atomic.AddInt64 or something that ever increases - time based? - size, err := fetchImage(id, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup + seq := shortuuid.New() + "." + t.imgType + size, err := fetchImage(seq, string(img.Destination), t.imgType, t.maxSize, t.imgOpt) // TODO: use goroutines with waitgroup if err != nil { log.Print(err) n.Parent().RemoveChildren(n) return ast.WalkContinue, nil } - img.Destination = []byte(imgZpfx + id) + img.Destination = []byte(imgZpfx + seq) t.totSize += size } return ast.WalkContinue, nil From 7916fa12603b3686c536f7d6414a54a6eaf64865 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Tue, 9 Jul 2024 22:53:55 -0700 Subject: [PATCH 15/16] rename files --- cdp.go => ismap.go | 0 txt.go => shtml.go | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename cdp.go => ismap.go (100%) rename txt.go => shtml.go (100%) diff --git a/cdp.go b/ismap.go similarity index 100% rename from cdp.go rename to ismap.go diff --git a/txt.go b/shtml.go similarity index 100% rename from txt.go rename to shtml.go From 4d9319eef2679fc2fb3c9a694cab03726b12cb6f Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Thu, 11 Jul 2024 21:22:13 -0700 Subject: [PATCH 16/16] todo update --- shtml.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/shtml.go b/shtml.go index e1975dc..2955061 100644 --- a/shtml.go +++ b/shtml.go @@ -9,7 +9,9 @@ package main // - incorrect cert support in both markdown and image download // - unify cdp and txt image handlers // - use goroutiness to process images -// - BOG: DomainFromURL always prefixes with http instead of https +// - get inner html from chromedp instead of html2markdown +// +// - BUG: DomainFromURL always prefixes with http instead of https // reproduces on vsi vms docs // - BUG: markdown table errors // reproduces on hacker news