mirror of
https://github.com/tenox7/wrp.git
synced 2024-11-28 03:52:12 +00:00
use plugins for markdown
This commit is contained in:
parent
a3c06d346c
commit
ee0e72f246
1
go.mod
1
go.mod
@ -22,4 +22,5 @@ require (
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
golang.org/x/net v0.25.0 // indirect
|
||||
golang.org/x/sys v0.20.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
)
|
||||
|
3
go.sum
3
go.sum
@ -26,8 +26,10 @@ github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 h1:yEt5djSYb4i
|
||||
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
|
||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
||||
@ -106,6 +108,7 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
|
16
wrp.go
16
wrp.go
@ -35,6 +35,8 @@ import (
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/JohannesKaufmann/html-to-markdown/plugin"
|
||||
|
||||
h2m "github.com/JohannesKaufmann/html-to-markdown"
|
||||
"github.com/MaxHalford/halfgone"
|
||||
"github.com/chromedp/cdproto/css"
|
||||
@ -429,14 +431,19 @@ func asciify(s []byte) []byte {
|
||||
}
|
||||
|
||||
func (rq *wrpReq) toMarkdown() {
|
||||
log.Printf("Processing Markdown conversion for %v", rq.url)
|
||||
h := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
|
||||
md, err := h.ConvertURL(rq.url)
|
||||
log.Printf("Processing Markdown conversion request for %v", rq.url)
|
||||
// TODO: bug - DomainFromURL always prefixes with http:// instead of https
|
||||
// this causes issues on some websites, write a smarter DomainFromURL
|
||||
c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
|
||||
c.Use(plugin.GitHubFlavored())
|
||||
// We could alternatively get inner html from chromedp
|
||||
md, err := c.ConvertURL(rq.url)
|
||||
if err != nil {
|
||||
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
p := parser.New()
|
||||
log.Printf("Got %v bytes md from %v", len(md), rq.url)
|
||||
p := parser.NewWithExtensions(parser.CommonExtensions)
|
||||
d := p.Parse([]byte(md))
|
||||
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
|
||||
if link, ok := node.(*ast.Link); ok && entering {
|
||||
@ -449,6 +456,7 @@ func (rq *wrpReq) toMarkdown() {
|
||||
})
|
||||
r := html.NewRenderer(html.RendererOptions{})
|
||||
ht := markdown.Render(d, r)
|
||||
log.Printf("Rendered %v bytes of html for %v", len(ht), rq.url)
|
||||
// TODO: add https://github.com/microcosm-cc/bluemonday
|
||||
rq.printHTML(printParams{
|
||||
text: string(asciify(ht)),
|
||||
|
Loading…
Reference in New Issue
Block a user