diff --git a/go.sum b/go.sum index 901e901..afab967 100644 --- a/go.sum +++ b/go.sum @@ -35,6 +35,8 @@ github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80 h1:6Yzfa6GP0rIo/kUL github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= +github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw= github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -60,6 +62,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/image v0.18.0 h1:jGzIakQa/ZXI1I0Fxvaa9W7yP25TqT6cHIHn+6CqvSQ= +golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= diff --git a/txt.go b/txt.go new file mode 100644 index 0000000..99526a2 --- /dev/null +++ b/txt.go @@ -0,0 +1,187 @@ +package main + +import ( + "bytes" + "errors" + "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" + "io" + "log" + "math/rand" + "net/http" + "strconv" + "strings" + "sync" + "time" + + h2m "github.com/JohannesKaufmann/html-to-markdown" + "github.com/JohannesKaufmann/html-to-markdown/plugin" + "github.com/nfnt/resize" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" + "golang.org/x/image/webp" +) + +var imgStor imageStore + +func init() { + imgStor.img = make(map[string]imageContainer) + // TODO: add garbage collector + // think about how to remove old images + // if removed from cache how to download them later if a browser goes back? + // browser should cache on it's own... but it may request it, what then? +} + +type imageContainer struct { + data []byte + url string + added time.Time +} + +type imageStore struct { + img map[string]imageContainer + sync.Mutex +} + +func (i *imageStore) add(id, url string, img []byte) { + i.Lock() + defer i.Unlock() + i.img[id] = imageContainer{data: img, url: url, added: time.Now()} +} + +func (i *imageStore) get(id string) ([]byte, error) { + i.Lock() + defer i.Unlock() + img, ok := i.img[id] + if !ok { + return nil, errors.New("not found") + } + return img.data, nil +} + +func (i *imageStore) del(id string) { + i.Lock() + defer i.Unlock() + delete(i.img, id) +} + +func grabImage(id, url string) { + log.Printf(">>> Downloading ID=%q URL=%q", id, url) + // TODO: possibly set a header "referer" here + r, err := http.Get(url) + if err != nil { + log.Printf("Error downloading %q: %v", url, err) + return + } + defer r.Body.Close() + img, err := io.ReadAll(r.Body) + if err != nil { + log.Printf("Error reading %q: %v", url, err) + return + } + gif, err := smallGif(img) + if err != nil { + log.Printf("Error scaling down image: %v", err) + return + } + imgStor.add(id, url, gif) +} + +type astTransformer struct{} + +func (t *astTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if link, ok := n.(*ast.Link); ok && entering { + link.Destination = append([]byte("?t=txt&url="), link.Destination...) + } + if img, ok := n.(*ast.Image); ok && entering { + id := fmt.Sprintf("txt%04d.gif", rand.Intn(9999)) // atomic.AddInt64 could be better here + grabImage(id, string(img.Destination)) // TODO: goroutines with waitgroup + img.Destination = []byte("/imgz/" + id) + } + return ast.WalkContinue, nil + }) +} + +func (rq *wrpReq) captureMarkdown() { + log.Printf("Processing Markdown conversion request for %v", rq.url) + // TODO: bug - DomainFromURL always prefixes with http:// instead of https + // this causes issues on some websites, fix or write a smarter DomainFromURL + c := h2m.NewConverter(h2m.DomainFromURL(rq.url), true, nil) + c.Use(plugin.GitHubFlavored()) + md, err := c.ConvertURL(rq.url) // We could also get inner html from chromedp + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + log.Printf("Got %v bytes md from %v", len(md), rq.url) + t := &astTransformer{} + gm := goldmark.New( + goldmark.WithExtensions(extension.GFM), + goldmark.WithParserOptions(parser.WithASTTransformers(util.Prioritized(t, 100))), + ) + var ht bytes.Buffer + err = gm.Convert([]byte(md), &ht) + if err != nil { + http.Error(rq.w, err.Error(), http.StatusInternalServerError) + return + } + log.Printf("Rendered %v bytes html for %v", len(ht.String()), rq.url) + rq.printHTML(printParams{ + text: string(asciify([]byte(ht.String()))), + }) +} + +func imgServerZ(w http.ResponseWriter, r *http.Request) { + log.Printf("%s IMGZ Request for %s\n", r.RemoteAddr, r.URL.Path) + id := strings.Replace(r.URL.Path, "/imgz/", "", 1) + img, err := imgStor.get(id) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + imgStor.del(id) + w.Header().Set("Content-Type", "image/gif") + w.Header().Set("Content-Length", strconv.Itoa(len(img))) + // TODO: we may want to let the client browser cache images + w.Header().Set("Cache-Control", "max-age=0") + w.Header().Set("Expires", "-1") + w.Header().Set("Pragma", "no-cache") + w.Write(img) + w.(http.Flusher).Flush() +} + +func smallGif(src []byte) ([]byte, error) { + t := http.DetectContentType(src) + var err error + var img image.Image + switch t { + case "image/png": + img, err = png.Decode(bytes.NewReader(src)) + case "image/gif": + img, err = gif.Decode(bytes.NewReader(src)) + case "image/jpeg": + img, err = jpeg.Decode(bytes.NewReader(src)) + case "image/webp": + img, err = webp.Decode(bytes.NewReader(src)) + default: + err = errors.New("unknown content type: " + t) + } + if err != nil { + return nil, fmt.Errorf("image decode problem: %v", err) + } + sm := resize.Resize(200, 0, img, resize.NearestNeighbor) + var gifBuf bytes.Buffer + err = gif.Encode(&gifBuf, gifPalette(sm, 216), &gif.Options{}) + if err != nil { + return nil, fmt.Errorf("gif encode problem: %v", err) + } + return gifBuf.Bytes(), nil +} diff --git a/wrp.go b/wrp.go index 506efc3..2851895 100644 --- a/wrp.go +++ b/wrp.go @@ -669,6 +669,7 @@ func main() { http.HandleFunc("/", pageServer) http.HandleFunc("/map/", mapServer) http.HandleFunc("/img/", imgServer) + http.HandleFunc("/imgz/", imgServerZ) http.HandleFunc("/shutdown/", haltServer) http.HandleFunc("/favicon.ico", http.NotFound)