use goldmark instead of gomarkdown

This commit is contained in:
Antoni Sawicki 2024-06-22 15:17:26 -07:00
parent b92478fd6a
commit da524ff275
3 changed files with 39 additions and 25 deletions

1
go.mod
View File

@ -20,6 +20,7 @@ require (
github.com/gobwas/ws v1.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/yuin/goldmark v1.7.2 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect

2
go.sum
View File

@ -53,6 +53,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yuin/goldmark v1.7.1 h1:3bajkSilaCbjdKVsKdZjZCLBNPL9pYzrCakKaf4U49U=
github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
github.com/yuin/goldmark v1.7.2 h1:NjGd7lO7zrUn/A7eKwn5PEOt4ONYGqpxSEeZuduvgxc=
github.com/yuin/goldmark v1.7.2/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=

59
wrp.go
View File

@ -43,11 +43,13 @@ import (
"github.com/chromedp/cdproto/input"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
"github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/ast"
"github.com/gomarkdown/markdown/html"
"github.com/gomarkdown/markdown/parser"
"github.com/soniakeys/quant/median"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
const version = "4.6.3"
@ -429,38 +431,47 @@ func asciify(s []byte) []byte {
return a
}
type astTransformer struct{}
func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if link, ok := n.(*ast.Link); ok && entering {
link.Destination = append([]byte("?t=txt&url="), link.Destination...)
}
if _, ok := n.(*ast.Image); ok && entering {
// TODO: perhaps instead of deleting images convert them to links
// smaller images or ascii? https://github.com/TheZoraiz/ascii-image-converter
n.Parent().RemoveChildren(n)
}
return ast.WalkContinue, nil
})
}
func (rq *wrpReq) toMarkdown() {
log.Printf("Processing Markdown conversion request for %v", rq.url)
// TODO: bug - DomainFromURL always prefixes with http:// instead of https
// this causes issues on some websites, write a smarter DomainFromURL
// this causes issues on some websites, fix or write a smarter DomainFromURL
c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
c.Use(plugin.GitHubFlavored())
// We could alternatively get inner html from chromedp
md, err := c.ConvertURL(rq.url)
md, err := c.ConvertURL(rq.url) // We could also get inner html from chromedp
if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return
}
log.Printf("Got %v bytes md from %v", len(md), rq.url)
// TODO: Use GoldMark instead
// https://github.com/yuin/goldmark
p := parser.NewWithExtensions(parser.CommonExtensions)
d := p.Parse([]byte(md))
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
if link, ok := node.(*ast.Link); ok && entering {
link.Destination = append([]byte("?t=txt&url="), link.Destination...)
gm := goldmark.New(
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(parser.WithASTTransformers(util.Prioritized(&astTransformer{}, 100))),
)
var ht bytes.Buffer
err = gm.Convert([]byte(md), &ht)
if err != nil {
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
return
}
if _, ok := node.(*ast.Image); ok && entering {
ast.RemoveFromTree(node)
}
return ast.GoToNext
})
r := html.NewRenderer(html.RendererOptions{})
ht := markdown.Render(d, r)
log.Printf("Rendered %v bytes of html for %v", len(ht), rq.url)
// TODO: add https://github.com/microcosm-cc/bluemonday
log.Printf("Rendered %v bytes html for %v", len(ht.String()), rq.url)
rq.printHTML(printParams{
text: string(asciify(ht)),
text: string(asciify([]byte(ht.String()))),
bgColor: "#FFFFFF",
})
}