mirror of
https://github.com/tenox7/wrp.git
synced 2024-11-28 03:52:12 +00:00
use plugins for markdown
This commit is contained in:
parent
a3c06d346c
commit
ee0e72f246
1
go.mod
1
go.mod
@ -22,4 +22,5 @@ require (
|
|||||||
github.com/mailru/easyjson v0.7.7 // indirect
|
github.com/mailru/easyjson v0.7.7 // indirect
|
||||||
golang.org/x/net v0.25.0 // indirect
|
golang.org/x/net v0.25.0 // indirect
|
||||||
golang.org/x/sys v0.20.0 // indirect
|
golang.org/x/sys v0.20.0 // indirect
|
||||||
|
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||||
)
|
)
|
||||||
|
3
go.sum
3
go.sum
@ -26,8 +26,10 @@ github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2 h1:yEt5djSYb4i
|
|||||||
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
|
github.com/gomarkdown/markdown v0.0.0-20240419095408-642f0ee99ae2/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
|
||||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||||
|
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
|
||||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
|
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kULo2bwGEkFvCePZ3qHDDTC3/J9Swo=
|
||||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
||||||
@ -106,6 +108,7 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
|
|||||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
|
||||||
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
|
16
wrp.go
16
wrp.go
@ -35,6 +35,8 @@ import (
|
|||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/JohannesKaufmann/html-to-markdown/plugin"
|
||||||
|
|
||||||
h2m "github.com/JohannesKaufmann/html-to-markdown"
|
h2m "github.com/JohannesKaufmann/html-to-markdown"
|
||||||
"github.com/MaxHalford/halfgone"
|
"github.com/MaxHalford/halfgone"
|
||||||
"github.com/chromedp/cdproto/css"
|
"github.com/chromedp/cdproto/css"
|
||||||
@ -429,14 +431,19 @@ func asciify(s []byte) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (rq *wrpReq) toMarkdown() {
|
func (rq *wrpReq) toMarkdown() {
|
||||||
log.Printf("Processing Markdown conversion for %v", rq.url)
|
log.Printf("Processing Markdown conversion request for %v", rq.url)
|
||||||
h := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
|
// TODO: bug - DomainFromURL always prefixes with http:// instead of https
|
||||||
md, err := h.ConvertURL(rq.url)
|
// this causes issues on some websites, write a smarter DomainFromURL
|
||||||
|
c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil)
|
||||||
|
c.Use(plugin.GitHubFlavored())
|
||||||
|
// We could alternatively get inner html from chromedp
|
||||||
|
md, err := c.ConvertURL(rq.url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
|
http.Error(rq.w, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
p := parser.New()
|
log.Printf("Got %v bytes md from %v", len(md), rq.url)
|
||||||
|
p := parser.NewWithExtensions(parser.CommonExtensions)
|
||||||
d := p.Parse([]byte(md))
|
d := p.Parse([]byte(md))
|
||||||
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
|
ast.WalkFunc(d, func(node ast.Node, entering bool) ast.WalkStatus {
|
||||||
if link, ok := node.(*ast.Link); ok && entering {
|
if link, ok := node.(*ast.Link); ok && entering {
|
||||||
@ -449,6 +456,7 @@ func (rq *wrpReq) toMarkdown() {
|
|||||||
})
|
})
|
||||||
r := html.NewRenderer(html.RendererOptions{})
|
r := html.NewRenderer(html.RendererOptions{})
|
||||||
ht := markdown.Render(d, r)
|
ht := markdown.Render(d, r)
|
||||||
|
log.Printf("Rendered %v bytes of html for %v", len(ht), rq.url)
|
||||||
// TODO: add https://github.com/microcosm-cc/bluemonday
|
// TODO: add https://github.com/microcosm-cc/bluemonday
|
||||||
rq.printHTML(printParams{
|
rq.printHTML(printParams{
|
||||||
text: string(asciify(ht)),
|
text: string(asciify(ht)),
|
||||||
|
Loading…
Reference in New Issue
Block a user