From 79c86a70569dbfdbcd069d884166cd4efd95661a Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Wed, 19 Jun 2024 23:37:44 -0700 Subject: [PATCH] initial markdown support --- go.mod | 1 + go.sum | 2 ++ wrp.go | 46 ++++++++++++++++++++++++++++++++++++++++++++-- wrp.html | 2 ++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 32517ad..ef9af8c 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa github.com/chromedp/chromedp v0.9.5 + github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 github.com/soniakeys/quant v1.0.0 ) diff --git a/go.sum b/go.sum index f21c5e7..99e73fe 100644 --- a/go.sum +++ b/go.sum @@ -22,6 +22,8 @@ github.com/gobwas/ws v1.3.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/K github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs= github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc= +github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 h1:yEt5djSYb4iNtmV9iJGVday+i4e9u6Mrn5iP64HH5QM= +github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= diff --git a/wrp.go b/wrp.go index 6ba0f3f..33b661d 100644 --- a/wrp.go +++ b/wrp.go @@ -13,7 +13,6 @@ import ( "embed" "flag" "fmt" - "html/template" "image" "image/color/palette" "image/gif" @@ -33,6 +32,7 @@ import ( "strconv" "strings" "syscall" + "text/template" "time" "github.com/MaxHalford/halfgone" @@ -41,6 +41,9 @@ import ( "github.com/chromedp/cdproto/input" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" + "github.com/gomarkdown/markdown" + "github.com/gomarkdown/markdown/html" + "github.com/gomarkdown/markdown/parser" "github.com/soniakeys/quant/median" ) @@ -90,6 +93,7 @@ type uiData struct { ImgHeight int MapURL string PageHeight string + TeXT string } // Parameters for HTML print function @@ -101,6 +105,7 @@ type printParams struct { mapURL string imgWidth int imgHeight int + text string } // WRP Request @@ -147,6 +152,7 @@ func (rq *wrpReq) parseForm() { case "png": case "gif": case "jpg": + case "txt": default: rq.imgType = *defType } @@ -174,6 +180,7 @@ func (rq *wrpReq) printHTML(p printParams) { ImgURL: p.imgURL, MapURL: p.mapURL, PageHeight: p.pageHeight, + TeXT: p.text, } err := htmlTmpl.Execute(rq.w, data) if err != nil { @@ -224,7 +231,7 @@ func (rq *wrpReq) action() chromedp.Action { return chromedp.KeyEvent(rq.keys) } // Navigate to URL - log.Printf("%s Processing Capture Request for %s\n", rq.r.RemoteAddr, rq.url) + log.Printf("%s Processing Navigate Request for %s\n", rq.r.RemoteAddr, rq.url) return chromedp.Navigate(rq.url) } @@ -407,6 +414,36 @@ func (rq *wrpReq) capture() { log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url) } +func (rq *wrpReq) markdown() { + log.Printf("Processing Markdown conversion for %v", rq.url) + req, err := http.NewRequest("GET", "https://r.jina.ai/"+rq.url, nil) + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + req.Header.Set("x-respond-with", "markdown") + cli := &http.Client{} + resp, err := cli.Do(req) + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + defer resp.Body.Close() + p := parser.New() + md, err := io.ReadAll(resp.Body) + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + log.Printf("got %v bytes from jina.ai", len(md)) + d := p.Parse(md) + r := html.NewRenderer(html.RendererOptions{}) + ht := markdown.Render(d, r) + rq.printHTML(printParams{ + text: string(ht), + }) +} + // Process HTTP requests to WRP '/' url func pageServer(w http.ResponseWriter, r *http.Request) { log.Printf("%s Page Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery) @@ -415,11 +452,16 @@ func pageServer(w http.ResponseWriter, r *http.Request) { w: w, } rq.parseForm() + log.Printf("%v", rq.imgType) if len(rq.url) < 4 { rq.printHTML(printParams{bgColor: "#FFFFFF"}) return } rq.navigate() // TODO: if error from navigate do not capture + if rq.imgType == "txt" { + rq.markdown() + return + } rq.capture() } diff --git a/wrp.html b/wrp.html index b0980ea..a504b4b 100644 --- a/wrp.html +++ b/wrp.html @@ -26,6 +26,7 @@ + C