From da524ff2752ae34e18ae4ae35083f35fd8689d31 Mon Sep 17 00:00:00 2001 From: Antoni Sawicki Date: Sat, 22 Jun 2024 15:17:26 -0700 Subject: [PATCH] use goldmark instead of gomarkdown --- go.mod | 1 + go.sum | 2 ++ wrp.go | 61 ++++++++++++++++++++++++++++++++++------------------------ 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/go.mod b/go.mod index bab96da..eb42549 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/gobwas/ws v1.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/yuin/goldmark v1.7.2 // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/sys v0.20.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/go.sum b/go.sum index 22adfd0..901e901 100644 --- a/go.sum +++ b/go.sum @@ -53,6 +53,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.7.1 h1:3bajkSilaCbjdKVsKdZjZCLBNPL9pYzrCakKaf4U49U= github.com/yuin/goldmark v1.7.1/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= +github.com/yuin/goldmark v1.7.2 h1:NjGd7lO7zrUn/A7eKwn5PEOt4ONYGqpxSEeZuduvgxc= +github.com/yuin/goldmark v1.7.2/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= diff --git a/wrp.go b/wrp.go index 5590e39..d90990e 100644 --- a/wrp.go +++ b/wrp.go @@ -43,11 +43,13 @@ import ( "github.com/chromedp/cdproto/input" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" - "github.com/gomarkdown/markdown" - "github.com/gomarkdown/markdown/ast" - "github.com/gomarkdown/markdown/html" - "github.com/gomarkdown/markdown/parser" "github.com/soniakeys/quant/median" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" ) const version = "4.6.3" @@ -429,38 +431,47 @@ func asciify(s []byte) []byte { return a } +type astTransformer struct{} + +func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if link, ok := n.(*ast.Link); ok && entering { + link.Destination = append([]byte("?t=txt&url="), link.Destination...) + } + if _, ok := n.(*ast.Image); ok && entering { + // TODO: perhaps instead of deleting images convert them to links + // smaller images or ascii? https://github.com/TheZoraiz/ascii-image-converter + n.Parent().RemoveChildren(n) + } + return ast.WalkContinue, nil + }) +} + func (rq *wrpReq) toMarkdown() { log.Printf("Processing Markdown conversion request for %v", rq.url) // TODO: bug - DomainFromURL always prefixes with http:// instead of https - // this causes issues on some websites, write a smarter DomainFromURL + // this causes issues on some websites, fix or write a smarter DomainFromURL c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil) c.Use(plugin.GitHubFlavored()) - // We could alternatively get inner html from chromedp - md, err := c.ConvertURL(rq.url) + md, err := c.ConvertURL(rq.url) // We could also get inner html from chromedp if err != nil { http.Error(rq.w, err.Error(), http.StatusInternalServerError) return } log.Printf("Got %v bytes md from %v", len(md), rq.url) - // TODO: Use GoldMark instead - // https://github.com/yuin/goldmark - p := parser.NewWithExtensions(parser.CommonExtensions) - d := p.Parse([]byte(md)) - ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus { - if link, ok := node.(*ast.Link); ok && entering { - link.Destination = append([]byte("?t=txt&url="), link.Destination...) - } - if _, ok := node.(*ast.Image); ok && entering { - ast.RemoveFromTree(node) - } - return ast.GoToNext - }) - r := html.NewRenderer(html.RendererOptions{}) - ht := markdown.Render(d, r) - log.Printf("Rendered %v bytes of html for %v", len(ht), rq.url) - // TODO: add https://github.com/microcosm-cc/bluemonday + gm := goldmark.New( + goldmark.WithExtensions(extension.GFM), + goldmark.WithParserOptions(parser.WithASTTransformers(util.Prioritized(&astTransformer{}, 100))), + ) + var ht bytes.Buffer + err = gm.Convert([]byte(md), &ht) + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + log.Printf("Rendered %v bytes html for %v", len(ht.String()), rq.url) rq.printHTML(printParams{ - text: string(asciify(ht)), + text: string(asciify([]byte(ht.String()))), bgColor: "#FFFFFF", }) }