use plugins for markdown

This commit is contained in:
Antoni Sawicki 2024-06-22 11:53:20 -07:00
parent a3c06d346c
commit ee0e72f246
3 changed files with 16 additions and 4 deletions

1
go.mod
View File

@ -22,4 +22,5 @@ require (
github.com/mailru/easyjson v0.7.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect
golang.org/x/net v0.25.0 // indirect golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect golang.org/x/sys v0.20.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
) )

3
go.sum
View File

@ -26,8 +26,10 @@ github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 h1:yEt5djSYb4i
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA= github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
@ -106,6 +108,7 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=

16
wrp.go
View File

@ -35,6 +35,8 @@ import (
"text/template" "text/template"
"time" "time"
"github.com/JohannesKaufmann/html-to-markdown/plugin"
h2m "github.com/JohannesKaufmann/html-to-markdown" h2m "github.com/JohannesKaufmann/html-to-markdown"
"github.com/MaxHalford/halfgone" "github.com/MaxHalford/halfgone"
"github.com/chromedp/cdproto/css" "github.com/chromedp/cdproto/css"
@ -429,14 +431,19 @@ func asciify(s []byte) []byte {
} }
func (rq *wrpReq) toMarkdown() { func (rq *wrpReq) toMarkdown() {
log.Printf("Processing Markdown conversion for %v", rq.url) log.Printf("Processing Markdown conversion request for %v", rq.url)
h := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil) // TODO: bug - DomainFromURL always prefixes with http:// instead of https
md, err := h.ConvertURL(rq.url) // this causes issues on some websites, write a smarter DomainFromURL
c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
c.Use(plugin.GitHubFlavored())
// We could alternatively get inner html from chromedp
md, err := c.ConvertURL(rq.url)
if err != nil { if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError) http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return return
} }
p := parser.New() log.Printf("Got %v bytes md from %v", len(md), rq.url)
p := parser.NewWithExtensions(parser.CommonExtensions)
d := p.Parse([]byte(md)) d := p.Parse([]byte(md))
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus { ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
if link, ok := node.(*ast.Link); ok && entering { if link, ok := node.(*ast.Link); ok && entering {
@ -449,6 +456,7 @@ func (rq *wrpReq) toMarkdown() {
}) })
r := html.NewRenderer(html.RendererOptions{}) r := html.NewRenderer(html.RendererOptions{})
ht := markdown.Render(d, r) ht := markdown.Render(d, r)
log.Printf("Rendered %v bytes of html for %v", len(ht), rq.url)
// TODO: add https://github.com/microcosm-cc/bluemonday // TODO: add https://github.com/microcosm-cc/bluemonday
rq.printHTML(printParams{ rq.printHTML(printParams{
text: string(asciify(ht)), text: string(asciify(ht)),