initial markdown support

This commit is contained in:
Antoni Sawicki 2024-06-19 23:37:44 -07:00
parent 19f4be3ac1
commit 79c86a7056
4 changed files with 49 additions and 2 deletions

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/MaxHalford/halfgone v0.0.0-20171017091812-482157b86ccb
github.com/chromedp/cdproto v0.0.0-20240519224452-66462be74baa
github.com/chromedp/chromedp v0.9.5
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2
github.com/soniakeys/quant v1.0.0
)

2
go.sum
View File

@ -22,6 +22,8 @@ github.com/gobwas/ws v1.3.1/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/K
github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
github.com/gobwas/ws v1.4.0 h1:CTaoG1tojrh4ucGPcoJFiAQUAsEWekEWvLy7GsVNqGs=
github.com/gobwas/ws v1.4.0/go.mod h1:G3gNqMNtPppf5XUz7O4shetPpcZ1VJ7zt18dlUeakrc=
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 h1:yEt5djSYb4iNtmV9iJGVday+i4e9u6Mrn5iP64HH5QM=
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=

46
wrp.go
View File

@ -13,7 +13,6 @@ import (
"embed"
"flag"
"fmt"
"html/template"
"image"
"image/color/palette"
"image/gif"
@ -33,6 +32,7 @@ import (
"strconv"
"strings"
"syscall"
"text/template"
"time"
"github.com/MaxHalford/halfgone"
@ -41,6 +41,9 @@ import (
"github.com/chromedp/cdproto/input"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
"github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/html"
"github.com/gomarkdown/markdown/parser"
"github.com/soniakeys/quant/median"
)
@ -90,6 +93,7 @@ type uiData struct {
ImgHeight int
MapURL string
PageHeight string
TeXT string
}
// Parameters for HTML print function
@ -101,6 +105,7 @@ type printParams struct {
mapURL string
imgWidth int
imgHeight int
text string
}
// WRP Request
@ -147,6 +152,7 @@ func (rq *wrpReq) parseForm() {
case "png":
case "gif":
case "jpg":
case "txt":
default:
rq.imgType = *defType
}
@ -174,6 +180,7 @@ func (rq *wrpReq) printHTML(p printParams) {
ImgURL: p.imgURL,
MapURL: p.mapURL,
PageHeight: p.pageHeight,
TeXT: p.text,
}
err := htmlTmpl.Execute(rq.w, data)
if err != nil {
@ -224,7 +231,7 @@ func (rq *wrpReq) action() chromedp.Action {
return chromedp.KeyEvent(rq.keys)
}
// Navigate to URL
log.Printf("%s Processing Capture Request for %s\n", rq.r.RemoteAddr, rq.url)
log.Printf("%s Processing Navigate Request for %s\n", rq.r.RemoteAddr, rq.url)
return chromedp.Navigate(rq.url)
}
@ -407,6 +414,36 @@ func (rq *wrpReq) capture() {
log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url)
}
func (rq *wrpReq) markdown() {
log.Printf("Processing Markdown conversion for %v", rq.url)
req, err := http.NewRequest("GET", "https://r.jina.ai/"+rq.url, nil)
if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return
}
req.Header.Set("x-respond-with", "markdown")
cli := &http.Client{}
resp, err := cli.Do(req)
if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return
}
defer resp.Body.Close()
p := parser.New()
md, err := io.ReadAll(resp.Body)
if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return
}
log.Printf("got %v bytes from jina.ai", len(md))
d := p.Parse(md)
r := html.NewRenderer(html.RendererOptions{})
ht := markdown.Render(d, r)
rq.printHTML(printParams{
text: string(ht),
})
}
// Process HTTP requests to WRP '/' url
func pageServer(w http.ResponseWriter, r *http.Request) {
log.Printf("%s Page Request for %s [%+v]\n", r.RemoteAddr, r.URL.Path, r.URL.RawQuery)
@ -415,11 +452,16 @@ func pageServer(w http.ResponseWriter, r *http.Request) {
w: w,
}
rq.parseForm()
log.Printf("%v", rq.imgType)
if len(rq.url) < 4 {
rq.printHTML(printParams{bgColor: "#FFFFFF"})
return
}
rq.navigate() // TODO: if error from navigate do not capture
if rq.imgType == "txt" {
rq.markdown()
return
}
rq.capture()
}

View File

@ -26,6 +26,7 @@
<OPTION VALUE="png" {{ if eq .ImgType "png"}}SELECTED{{end}}>PNG</OPTION>
<OPTION VALUE="gif" {{ if eq .ImgType "gif"}}SELECTED{{end}}>GIF</OPTION>
<OPTION VALUE="jpg" {{ if eq .ImgType "jpg"}}SELECTED{{end}}>JPG</OPTION>
<OPTION VALUE="txt" {{ if eq .ImgType "txt"}}SELECTED{{end}}>TXT</OPTION>
</SELECT>
C <SELECT NAME="c">
<OPTION VALUE="256" {{ if eq .NColors 256}}SELECTED{{end}}>256</OPTION>
@ -51,6 +52,7 @@
</A>
<P>
{{end}}
{{.TeXT}}
<FONT SIZE="-2">
<A HREF="/?url=https://github.com/tenox7/wrp/&w={{.Width}}&h={{.Height}}&s={{printf "%.1f" .Zoom}}&c={{.NColors}}&t={{.ImgType}}">Web Rendering Proxy {{.Version}}</A> |
<A HREF="/shutdown/">Shutdown WRP</A> |