prefix links and remove images

This commit is contained in:
Antoni Sawicki 2024-06-21 01:04:30 -07:00
parent 79c86a7056
commit 55f3c852f9
2 changed files with 18 additions and 5 deletions

19
wrp.go
View File

@ -42,6 +42,7 @@ import (
"github.com/chromedp/cdproto/page" "github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp" "github.com/chromedp/chromedp"
"github.com/gomarkdown/markdown" "github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/ast"
"github.com/gomarkdown/markdown/html" "github.com/gomarkdown/markdown/html"
"github.com/gomarkdown/markdown/parser" "github.com/gomarkdown/markdown/parser"
"github.com/soniakeys/quant/median" "github.com/soniakeys/quant/median"
@ -414,7 +415,7 @@ func (rq *wrpReq) capture() {
log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url) log.Printf("%s Done with capture for %s\n", rq.r.RemoteAddr, rq.url)
} }
func (rq *wrpReq) markdown() { func (rq *wrpReq) toMarkdown() {
log.Printf("Processing Markdown conversion for %v", rq.url) log.Printf("Processing Markdown conversion for %v", rq.url)
req, err := http.NewRequest("GET", "https://r.jina.ai/"+rq.url, nil) req, err := http.NewRequest("GET", "https://r.jina.ai/"+rq.url, nil)
if err != nil { if err != nil {
@ -429,14 +430,23 @@ func (rq *wrpReq) markdown() {
return return
} }
defer resp.Body.Close() defer resp.Body.Close()
p := parser.New()
md, err := io.ReadAll(resp.Body) md, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError) http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return return
} }
log.Printf("got %v bytes from jina.ai", len(md)) log.Printf("Got %v bytes from jina.ai", len(md))
p := parser.NewWithExtensions(parser.CommonExtensions | parser.Autolink)
d := p.Parse(md) d := p.Parse(md)
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
if link, ok := node.(*ast.Link); ok && entering {
link.Destination = append([]byte("?t=txt&url="), link.Destination...)
}
if image, ok := node.(*ast.Image); ok && entering {
image.Destination = nil
}
return ast.GoToNext
})
r := html.NewRenderer(html.RendererOptions{}) r := html.NewRenderer(html.RendererOptions{})
ht := markdown.Render(d, r) ht := markdown.Render(d, r)
rq.printHTML(printParams{ rq.printHTML(printParams{
@ -452,14 +462,13 @@ func pageServer(w http.ResponseWriter, r *http.Request) {
w: w, w: w,
} }
rq.parseForm() rq.parseForm()
log.Printf("%v", rq.imgType)
if len(rq.url) < 4 { if len(rq.url) < 4 {
rq.printHTML(printParams{bgColor: "#FFFFFF"}) rq.printHTML(printParams{bgColor: "#FFFFFF"})
return return
} }
rq.navigate() // TODO: if error from navigate do not capture rq.navigate() // TODO: if error from navigate do not capture
if rq.imgType == "txt" { if rq.imgType == "txt" {
rq.markdown() rq.toMarkdown()
return return
} }
rq.capture() rq.capture()

View File

@ -6,6 +6,7 @@
<FORM ACTION="/" METHOD="POST"> <FORM ACTION="/" METHOD="POST">
<INPUT TYPE="TEXT" NAME="url" VALUE="{{.URL}}" SIZE="20"> <INPUT TYPE="TEXT" NAME="url" VALUE="{{.URL}}" SIZE="20">
<INPUT TYPE="SUBMIT" VALUE="Go"> <INPUT TYPE="SUBMIT" VALUE="Go">
{{ if ne .ImgType "txt" }}
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="Bk"> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="Bk">
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="St"> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="St">
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="Re"> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="Re">
@ -22,12 +23,14 @@
<OPTION VALUE="1.2" {{ if eq .Zoom 1.2}}SELECTED{{end}}>1.2 x</OPTION> <OPTION VALUE="1.2" {{ if eq .Zoom 1.2}}SELECTED{{end}}>1.2 x</OPTION>
<OPTION VALUE="1.3" {{ if eq .Zoom 1.3}}SELECTED{{end}}>1.3 x</OPTION> <OPTION VALUE="1.3" {{ if eq .Zoom 1.3}}SELECTED{{end}}>1.3 x</OPTION>
</SELECT> </SELECT>
{{ end }}
T <SELECT NAME="t"> T <SELECT NAME="t">
<OPTION VALUE="png" {{ if eq .ImgType "png"}}SELECTED{{end}}>PNG</OPTION> <OPTION VALUE="png" {{ if eq .ImgType "png"}}SELECTED{{end}}>PNG</OPTION>
<OPTION VALUE="gif" {{ if eq .ImgType "gif"}}SELECTED{{end}}>GIF</OPTION> <OPTION VALUE="gif" {{ if eq .ImgType "gif"}}SELECTED{{end}}>GIF</OPTION>
<OPTION VALUE="jpg" {{ if eq .ImgType "jpg"}}SELECTED{{end}}>JPG</OPTION> <OPTION VALUE="jpg" {{ if eq .ImgType "jpg"}}SELECTED{{end}}>JPG</OPTION>
<OPTION VALUE="txt" {{ if eq .ImgType "txt"}}SELECTED{{end}}>TXT</OPTION> <OPTION VALUE="txt" {{ if eq .ImgType "txt"}}SELECTED{{end}}>TXT</OPTION>
</SELECT> </SELECT>
{{ if ne .ImgType "txt" }}
C <SELECT NAME="c"> C <SELECT NAME="c">
<OPTION VALUE="256" {{ if eq .NColors 256}}SELECTED{{end}}>256</OPTION> <OPTION VALUE="256" {{ if eq .NColors 256}}SELECTED{{end}}>256</OPTION>
<OPTION VALUE="216" {{ if eq .NColors 216}}SELECTED{{end}}>216</OPTION> <OPTION VALUE="216" {{ if eq .NColors 216}}SELECTED{{end}}>216</OPTION>
@ -44,6 +47,7 @@
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="^"> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="^">
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="v"> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="v">
<INPUT TYPE="SUBMIT" NAME="Fn" VALUE="&gt;" SIZE="1">--> <INPUT TYPE="SUBMIT" NAME="Fn" VALUE="&gt;" SIZE="1">-->
{{ end }}
</FORM> </FORM>
<BR> <BR>
{{if .ImgURL}} {{if .ImgURL}}