Enormous vore update - redesign homepage with BOLD on active pages - add saves page - fix up supporting docs - import archiveis dep - AND MORE!
jump to
@@ -0,0 +1,206 @@
+package archiveis + +import ( + "bytes" + "errors" + "fmt" + "io" + "log" + "net/http" + "net/url" + "regexp" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +var ( + BaseURL = "https://archive.is" // Overrideable default package value. + HTTPHost = "archive.is" // Overrideable default package value. + UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36" // Overrideable default package value. + DefaultRequestTimeout = 10 * time.Second // Overrideable default package value. + DefaultPollInterval = 5 * time.Second // Overrideable default package value. + + jsLocationExpr = regexp.MustCompile(`document\.location\.replace\(["']([^"']+)`) +) + +// Config settings for page capture client behavior. +type Config struct { + Anyway bool // Force archival even if there is already a recent snapshot of the page. + Wait bool // Wait until the crawl has been completed. + WaitTimeout time.Duration // Max time to wait for crawl completion. Default is unlimited. + PollInterval time.Duration // Interval between crawl completion checks. Defaults to 5s. + RequestTimeout time.Duration // Overrides default request timeout. + SubmitID string // Accepts a user-provided submitid. +} + +// Capture archives the provided URL using the archive.is service. +func Capture(u string, cfg ...Config) (string, error) { + timeout := DefaultRequestTimeout + if len(cfg) > 0 && cfg[0].RequestTimeout > time.Duration(0) { + timeout = cfg[0].RequestTimeout + } + + var ( + submitID string + anyway string + body []byte + resp *http.Response + final string + err error + ) + + if len(cfg) > 0 && len(cfg[0].SubmitID) > 0 { + submitID = cfg[0].SubmitID + log.Printf("Will use caller-provided submitid=%v", submitID) + } else if submitID, err = newSubmitID(timeout); err != nil { + return "", err + } + + if len(cfg) > 0 && cfg[0].Anyway { + anyway = "&anyway=1" + } + + content := fmt.Sprintf("submitid=%v&url=%v%v", url.QueryEscape(submitID), url.QueryEscape(u), anyway) + + resp, body, err = doRequest("POST", BaseURL+"/submit/", io.NopCloser(bytes.NewBufferString(content)), timeout) + if err != nil { + return "", err + } + + if resp.StatusCode/100 == 3 { + // Page has already been archived. + log.Print("Detected redirect to archived page") + + if loc := resp.Header.Get("Location"); len(loc) == 0 { + return "", fmt.Errorf("received a redirect status-code %v with an empty Location header", resp.StatusCode) + } else { + final = loc + } + } else { + // log.Printf("body: %+v\n", string(body)) + // log.Printf("headers: %+v\n", resp.Header) + // log.Printf("trailers: %+v\n", resp.Trailer) + + doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(body)) + if err != nil { + return "", fmt.Errorf("constructing goquery doc from submission response: %s", err) + } + + if script := doc.Find("script").First(); script != nil { + js := strings.Trim(script.Text(), "\r\n\t ") + if match := jsLocationExpr.FindStringSubmatch(js); len(match) > 1 { + final = match[1] + } + } + + if len(final) == 0 { + input := doc.Find("input[name=id]").First() + if input == nil { + return "", errors.New("page archive ID not found in submission response content") + } + id, exists := input.Attr("value") + if !exists { + log.Printf("No page archive ID value detected, here was the page content: %v", string(body)) + return "", errors.New("no page archive ID value available") + } + + final = fmt.Sprintf("%v/%v", BaseURL, id) + } + } + + log.Printf("Capture for url=%v -> %v", u, final) + + if len(cfg) > 0 && cfg[0].Wait { + var ( + waitTimeout = cfg[0].WaitTimeout + pollInterval = DefaultPollInterval + ) + + if cfg[0].PollInterval > time.Duration(0) { + pollInterval = cfg[0].PollInterval + } + + if err := waitForCrawlToFinish(final, body, timeout, waitTimeout, pollInterval); err != nil { + return final, err + } + } + + return final, nil +} + +// newSubmitID gets the index page and extracts the form submission identifier. +func newSubmitID(timeout time.Duration) (string, error) { + _, body, err := doRequest("", BaseURL, nil, timeout) + if err != nil { + return "", err + } + + doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(body)) + if err != nil { + return "", fmt.Errorf("constructing goquery doc from index: %s", err) + } + + input := doc.Find("input[name=submitid]").First() + if input == nil { + return "", errors.New("no submitid element found") + } + id, exists := input.Attr("value") + if !exists { + return "", errors.New("no submitid value available") + } + return id, nil +} + +func waitForCrawlToFinish(url string, body []byte, requestTimeout time.Duration, waitTimeout time.Duration, pollInterval time.Duration) error { + var ( + expr = regexp.MustCompile(`<html><body>`) + until = time.Now().Add(waitTimeout) + d = time.Now().Sub(until) + err error + ) + + if body != nil && !expr.Match(body) { + // log.WithField("url", url).WithField("wait-timeout", waitTimeout).WithField("poll-interval", pollInterval).Printf("Detected crawl completion after %s", d) + if err := checkCrawlResult(body); err != nil { + return err + } + return nil + } + + // log.WithField("url", url).WithField("wait-timeout", waitTimeout).WithField("poll-interval", pollInterval).Debug("Waiting for crawl to finish") + for { + if waitTimeout != time.Duration(0) && time.Now().After(until) { + return fmt.Errorf("timed out after %s waiting for crawl to complete", waitTimeout) + } + + _, body, err = doRequest("", url, nil, requestTimeout) + + d = time.Now().Sub(until) + + if err != nil { + log.Printf("Non-fatal error while polling for crawl completion: %s (continuing on, waiting for %s so far)", err, d) + } else if !expr.Match(body) { + // log.WithField("url", url).WithField("wait-timeout", waitTimeout).WithField("poll-interval", pollInterval).Printf("Detected crawl completion after %s", d) + break + } + + time.Sleep(pollInterval) + } + return nil +} + +// checkCrawlResult searches for known archive.is errors in HTML content. +func checkCrawlResult(body []byte) error { + doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(body)) + if err != nil { + return fmt.Errorf("crawl result check gq new doc: %s", err) + } + if block := doc.Find("html > body > div").First(); block != nil { + if text := strings.Trim(block.Text(), "\r\n\t "); text == "Error: Network error." { + return fmt.Errorf("archive.is crawl result: Network Error") + } + } + return nil +}
@@ -0,0 +1,74 @@
+package archiveis + +import ( + "fmt" + "io" + "net" + "net/http" + "strings" + "time" +) + +func doRequest(method string, url string, body io.ReadCloser, timeout time.Duration) (*http.Response, []byte, error) { + req, err := newRequest(method, url, body) + if err != nil { + return nil, nil, err + } + + if method != "" && method != "get" { + req.Header.Set("content-type", "application/x-www-form-urlencoded") + } + + client := newClient(timeout) + resp, err := client.Do(req) + if err != nil { + return resp, nil, fmt.Errorf("executing request: %s", err) + } + if resp.StatusCode/100 != 2 && resp.StatusCode/100 != 3 { + return resp, nil, fmt.Errorf("%v request to %v received unhappy response status-code=%v", method, url, resp.StatusCode) + } + respBody, err := io.ReadAll(resp.Body) + if err != nil { + return resp, nil, fmt.Errorf("reading response body: %s", err) + } + if err := resp.Body.Close(); err != nil { + return resp, respBody, fmt.Errorf("closing response body: %s", err) + } + return resp, respBody, nil +} + +func newRequest(method string, url string, body io.ReadCloser) (*http.Request, error) { + req, err := http.NewRequest(method, url, body) + if err != nil { + return nil, fmt.Errorf("creating %v request to %v: %s", method, url, err) + } + + req.Host = HTTPHost + + hostname := strings.Split(BaseURL, "://")[1] + req.Header.Set("Host", hostname) + req.Header.Set("Origin", hostname) + req.Header.Set("Authority", hostname) + req.Header.Set("User-Agent", UserAgent) + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8") + req.Header.Set("Referer", BaseURL+"/") + + return req, nil +} + +func newClient(timeout time.Duration) *http.Client { + c := &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + Dial: (&net.Dialer{ + Timeout: timeout, + KeepAlive: timeout, + }).Dial, + TLSHandshakeTimeout: timeout, + ResponseHeaderTimeout: timeout, + ExpectContinueTimeout: 1 * time.Second, + }, + } + return c +}
@@ -19,6 +19,7 @@ }
</script> <script data-goatcounter="https://vore.goatcounter.com/count" async src="//gc.zgo.at/count.js"></script> + <script src="https://unpkg.com/htmx.org@1.9.12"></script> <title>{{ .Title }}</title> </head>
@@ -2,42 +2,60 @@ {{ define "index" }}
{{ template "head" . }} {{ template "nav" . }} <p> -2024-01-21 +vore.website is a minimal web-based rss/atom feed reader. -hey ya'll, sorry for that outage! -i was on vacation & when i got back, -work was on fire. this is the first -actually free moment i've had for awhile. +- subscribe to feeds +- view posts chronologically +- save stuff that interests you -i'm so happy that so many of you use -this website, i feel very grateful for -your presence. +no unread indicators, no choresome tagging system - no bullshit. -- j3s +vore.website is free for anyone to use, + and will never be turned into a vehicle for profit. -vore.website is a minimal, no-bullshit web-based rss/atom feed reader. +<3 <a href="https://j3s.sh">j3s</a> -vore.website is free for anyone to use, and will never be turned into a vehicle for profit. -vore.website is a love letter to the rss world - long live syndication!!! + ===CHANGELOG=== +2024-04-29 -history: +i have save support & reworked the UI a little - 2020: <a href="https://git.j3s.sh/jrss">jrss</a> is born -jrss had one concept: define list of feeds, and jrss will keep that list up to date and display it to you via the web. +vore "saves" work like so: + - you click save + - the post title + link are saved to db + - the post is archived via archive.is + - the archive link is saved to the db - 2021: <a href="https://git.j3s.sh/j3s.sh">j3s.sh</a> consumes jrss -my main website eventually consumed jrss because it was easier to maintain. +this way, anything that you save _should_ be +available basically indefinitely. + +i wanted a way to save nice posts & prevent link +rot - the vore save feature is a convenient way +to do that. + + +2024-04-20 + +forest prompted me to dig into why vore's performance +was so shitty - i dug in & uncovered an infinite loop +bug in the rss library i was using! the bug has been +reported & squashed + +everything should be fast again! - 2023: <a href="https://git.j3s.sh/vore">vore</a> -vore takes the simple foundation from j3s.sh and exposes it as a hosted service for anyone to use! -rss lovers rejoice. +2024-01-21 -love, +hey ya'll, sorry for that outage! +i was on vacation & when i got back, +work was on fire. this is the first +actually free moment i've had for awhile. -<a href="https://j3s.sh">jes</a> +i'm so happy that so many of you use +this website, i feel very grateful for +your presence. </p> {{ template "tail" . }} {{ end }}
@@ -0,0 +1,42 @@
+{{ define "saves" }} +{{ template "head" . }} +{{ template "nav" . }} + +{{ $length := len .Data }} {{ if eq $length 0 }} +{{ if .LoggedIn }} +<p> +you haven't saved anything yet. :( + +use the "save" button to save posts that you like! + +vore's save system is unique: + when you click the "save" button, vore will: + - store the article title & domain + - submit an https://archive.is request on your behalf + - store the archive.is link + +this ensures that all saved articles will remain +accessible indefinitely. + +it also means that you may save the same article +more than once, if you'd like! + +currently, saves cannot be deleted. +</p> +{{ end }} +{{ end }} +<ul> +{{ range .Data }} + <li> + <a href="{{ .ItemURL }}">{{ .ItemTitle }}</a> + <span class=puny> + (<a href="{{ .ArchiveURL }}">archived</a>) + </span> + <br> + <span class=puny>saved {{ .CreatedAt }} via <a href="//{{ .ItemURL | printDomain }}">{{ .ItemURL | printDomain }}</a></span> + </li> +{{ end }} +</ul> + +{{ template "tail" . }} +{{ end }}
@@ -2,7 +2,7 @@ {{ define "settings" }}
{{ template "head" . }} {{ template "nav" . }} <h3>Settings</h3> -<p>your public url: <a href="/{{ .Username }}">vore.website/{{ .Username }}</a> +<p>your public homepage: <a href="/{{ .Username }}">vore.website/{{ .Username }}</a> {{ len .Data }} subscriptions: </p>@@ -21,9 +21,10 @@ <p>
‼️ tutorial ‼️ once you have subscribed to some feeds, -their posts will appear on your timeline. +their posts will appear chronologically +on your homepage. -note that all timelines are public ‼️ +note that vore homepages are public ‼️ here are some feed urls to play with copy them into the text box above@@ -32,10 +33,8 @@
https://100r.co/links/rss.xml https://begriffs.com/atom.xml https://blog.passtheballsocrates.com/feed/ -https://cyberia.club/blog/blog.xml https://facklambda.dev/atom.xml https://herman.bearblog.dev/feed/ -https://icyphox.sh/blog/feed.xml https://j3s.sh/feed.atom https://katherine.cox-buday.com/rss.xml https://sequentialread.com/rss
@@ -1,6 +1,6 @@
html { font-family: monospace; - max-width: 600px; + max-width: 500px; margin: 20px auto; /* make sure the scrollbar is always shown so the width doesnt change between pages */@@ -40,11 +40,19 @@ font-size: 1.1rem;
} nav a { - border-bottom: 1px dotted #000; color: #000; } +nav .left { + float: left; +} + +nav .right { + float: right; +} + .puny { + padding-top: 0px; color: grey; font-size: 0.75rem; }
@@ -7,7 +7,7 @@ {{ if .LoggedIn }}
<p> you don't seem to have any feeds yet. -<a href="/settings">add your first feed here!</a> +go to <a href="/settings">/settings</a> to add your first feed! </p> {{ end }} {{ end }}@@ -18,7 +18,12 @@ <a href="{{ .Link }}">
{{ .Title }} </a> <br> - <span class=puny title="{{ .Date }}">published {{ .Date | timeSince }} via <a href="//{{ .Link | printDomain }}">{{ .Link | printDomain }}</a></span> + <span class=puny title="{{ .Date }}"> + published {{ .Date | timeSince }} via + <a href="//{{ .Link | printDomain }}"> + {{ .Link | printDomain }}</a> + | <a href="/save/{{ .Link | escapeURL }}">save</a> + </span> </li> {{ end }} </ul>
@@ -3,17 +3,20 @@
go 1.22 require ( + github.com/PuerkitoBio/goquery v1.9.1 github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 github.com/glebarez/go-sqlite v1.21.2 - golang.org/x/crypto v0.11.0 + golang.org/x/crypto v0.19.0 ) require ( + github.com/andybalholm/cascadia v1.3.2 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/google/uuid v1.3.0 // indirect github.com/mattn/go-isatty v0.0.19 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - golang.org/x/sys v0.10.0 // indirect + golang.org/x/net v0.21.0 // indirect + golang.org/x/sys v0.17.0 // indirect modernc.org/libc v1.24.1 // indirect modernc.org/mathutil v1.6.0 // indirect modernc.org/memory v1.6.0 // indirect
@@ -1,3 +1,7 @@
+github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VPW7UI= +github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= +github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= +github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394 h1:OYA+5W64v3OgClL+IrOD63t4i/RW7RqrAVl9LTZ9UqQ= github.com/axgle/mahonia v0.0.0-20180208002826-3358181d7394/go.mod h1:Q8n74mJTIgjX4RBBcHnJ05h//6/k6foqmgE45jTQtxg= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=@@ -12,11 +16,47 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= -golang.org/x/crypto v0.11.0 h1:6Ewdq3tDic1mg5xRO4milcWCfMVQhI4NkqWWvqejpuA= -golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.19.0 h1:ENy+Az/9Y1vSrlrvBSyna3PITt4tiZLf7sgCjZBX7Wo= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= modernc.org/libc v1.24.1 h1:uvJSeCKL/AgzBo2yYIPPTy82v21KgGnizcGYfBHaNuM= modernc.org/libc v1.24.1/go.mod h1:FmfO1RLrU3MHJfyi9eYYmZBfi/R+tqZ6+hQ3yQQUkak= modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
@@ -12,6 +12,7 @@ s := New()
http.HandleFunc("GET /{$}", s.indexHandler) http.HandleFunc("GET /{username}", s.userHandler) + http.HandleFunc("GET /saves", s.userSavesHandler) http.HandleFunc("GET /static/{file}", s.staticHandler) http.HandleFunc("GET /discover", s.discoverHandler) http.HandleFunc("GET /settings", s.settingsHandler)@@ -21,6 +22,7 @@ http.HandleFunc("POST /login", s.loginHandler)
http.HandleFunc("GET /logout", s.logoutHandler) http.HandleFunc("POST /logout", s.logoutHandler) http.HandleFunc("POST /register", s.registerHandler) + http.HandleFunc("GET /save/{url}", s.saveHandler) http.HandleFunc("GET /feeds/{url}", s.feedDetailsHandler) // left in-place for backwards compat
@@ -16,3 +16,35 @@ - no options
- no javascript - no unread indicators or push notifs - no comments, upvotes, or ranks + + dev notes + - vore should always trust websites as the source of authority + this is why posts aren't saved to disk - there's no good way to + uniquely identify them over time easily. a new website might + show up & reuse the post urls, and i want to avoid all of the + complexities that introduces by just fetching feeds at runtime + & loading them live - that way we're SURE they're fresh and + accurate. + + - do not natively display posts + posts always look like shit away from their home websites. instead + of doing any of that nonsense, vore just takes website snapshots + via archive.is and presents them to the user. + + - saved entries will NEVER change/expire + if a user uses the "save" feature, the data they were looking at + must never be lost. + therefore, we just copy whatever the active post state was from + memory & also snapshot the website via archive.is & link to the + snapshot. this way, there's always a cached version available + to use. + + website may be saved multiple times, i don't care. + + TODO "this has been saved already" indicator + - vore prefers raw URLs, we don't care about traditional RSS + formats like OPML + + soon(tm): + - non-active feeds will be retried at a much slower cadence + (& remembered across restarts)
@@ -1,6 +1,7 @@
package reaper import ( + "errors" "log" "sort" "sync"@@ -45,9 +46,9 @@ r.feeds[url] = feed
} for { - log.Println("reaper start: refresh all feeds") + log.Println("reaper: refreshing all feeds") r.refreshAllFeeds() - log.Println("reaper end: completed all feeds, sleeping") + log.Println("reaper: refreshed all feeds, sleeping 😴") time.Sleep(15 * time.Minute) } }@@ -62,6 +63,7 @@ // asynchronously, then prints the duration of the sync
func (r *Reaper) refreshAllFeeds() { ch := make(chan *rss.Feed) var wg sync.WaitGroup + // i chose 20 workers somewhat arbitrarily for i := 20; i > 0; i-- { wg.Add(1)@@ -70,9 +72,8 @@ defer wg.Done()
for f := range ch { start := time.Now() - log.Printf("refreshing %s\n", f.UpdateURL) r.refreshFeed(f) - log.Printf("%s refreshed in %s\n", f.UpdateURL, time.Since(start)) + log.Printf("reaper: %s refreshed in %s\n", f.UpdateURL, time.Since(start)) } }() }@@ -97,10 +98,10 @@ }
} func (r *Reaper) handleFeedFetchFailure(url string, err error) { - log.Printf("[err] reaper: fetch failure '%s': %s\n", url, err) + log.Printf("reaper: failed to fetch %s: %s\n", url, err) err = r.db.SetFeedFetchError(url, err.Error()) if err != nil { - log.Printf("[err] reaper: could not set feed fetch error '%s'\n", err) + log.Printf("reaper: could not set feed fetch error '%s'\n", err) } }@@ -117,6 +118,19 @@ func (r *Reaper) GetFeed(url string) *rss.Feed {
return r.feeds[url] } +// GetItem recurses through all rss feeds, returning the first +// found feed by matching against the provided link +func (r *Reaper) GetItem(url string) (*rss.Item, error) { + for _, f := range r.feeds { + for _, i := range f.Items { + if i.Link == url { + return i, nil + } + } + } + return &rss.Item{}, errors.New("item not found") +} + // GetUserFeeds returns a list of feeds func (r *Reaper) GetUserFeeds(username string) []*rss.Feed { urls := r.db.GetUserFeedURLs(username)@@ -130,6 +144,7 @@ r.SortFeeds(result)
return result } +// SortFeeds sorts reaper feeds chronologically by date func (r *Reaper) SortFeeds(f []*rss.Feed) { sort.Slice(f, func(i, j int) bool { return f[i].UpdateURL < f[j].UpdateURL
@@ -5,8 +5,6 @@ "bytes"
"errors" "fmt" "io" - "io/ioutil" - "log" "net" "net/http" "strings"@@ -65,12 +63,10 @@ return nil, err
} defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } - - log.Println("rss: ok i got the bytes for this url: " + url) out, err := Parse(body) if err != nil {
@@ -13,6 +13,7 @@ "path/filepath"
"strings" "time" + "git.j3s.sh/vore/archiveis" "git.j3s.sh/vore/lib" "git.j3s.sh/vore/reaper" "git.j3s.sh/vore/rss"@@ -29,6 +30,10 @@ reaper *reaper.Reaper
// site database handle db *sqlite.DB +} + +type Save struct { + // inferred: user_id } // New returns a fully populated & ready for action Site@@ -53,7 +58,8 @@ }
func (s *Site) indexHandler(w http.ResponseWriter, r *http.Request) { if s.loggedIn(r) { - http.Redirect(w, r, "/"+s.username(r), http.StatusSeeOther) + username := s.username(r) + http.Redirect(w, r, "/"+username, http.StatusSeeOther) return } s.renderPage(w, r, "index", nil)@@ -66,7 +72,8 @@
func (s *Site) loginHandler(w http.ResponseWriter, r *http.Request) { if r.Method == "GET" { if s.loggedIn(r) { - http.Redirect(w, r, "/", http.StatusSeeOther) + username := s.username(r) + http.Redirect(w, r, "/"+username, http.StatusSeeOther) } else { s.renderPage(w, r, "login", nil) }@@ -80,7 +87,7 @@ if err != nil {
s.renderErr(w, err.Error(), http.StatusUnauthorized) return } - http.Redirect(w, r, "/", http.StatusSeeOther) + http.Redirect(w, r, "/"+username, http.StatusSeeOther) } }@@ -109,6 +116,48 @@ }
http.Redirect(w, r, "/", http.StatusSeeOther) } +// saveHandler is an HTMX endpoint that returns the text "saved!" when +// a post has been saved to a user's account +func (s *Site) saveHandler(w http.ResponseWriter, r *http.Request) { + if !s.loggedIn(r) { + s.renderErr(w, "", http.StatusUnauthorized) + return + } + + username := s.username(r) + encodedURL := r.PathValue("url") + decodedURL, err := url.QueryUnescape(encodedURL) + if err != nil { + e := fmt.Sprintf("failed to decode URL '%s' %s", encodedURL, err) + s.renderErr(w, e, http.StatusBadRequest) + return + } + + item, err := s.reaper.GetItem(decodedURL) + if err != nil { + fmt.Fprintf(w, "error!") + return + } + archiveURL, err := archiveis.Capture(decodedURL) + if err != nil { + log.Println(err) + fmt.Fprintf(w, "error capturing archive!!") + return + } + + err = s.db.WriteSavedItem(username, sqlite.SavedItem{ + ArchiveURL: archiveURL, + ItemTitle: item.Title, + ItemURL: item.Link, + }) + if err != nil { + log.Println(err) + fmt.Fprintf(w, "error!!!") + return + } + fmt.Fprintf(w, "saved! you can go back now. this will eventually be async. lol.") +} + func (s *Site) userHandler(w http.ResponseWriter, r *http.Request) { username := r.PathValue("username")@@ -129,6 +178,17 @@
s.renderPage(w, r, "user", data) } +func (s *Site) userSavesHandler(w http.ResponseWriter, r *http.Request) { + if !s.loggedIn(r) { + s.renderErr(w, "", http.StatusUnauthorized) + return + } + + username := s.username(r) + saves := s.db.GetUserSavedItems(username) + s.renderPage(w, r, "saves", saves) +} + func (s *Site) settingsHandler(w http.ResponseWriter, r *http.Request) { if !s.loggedIn(r) { s.renderErr(w, "", http.StatusUnauthorized)@@ -140,11 +200,9 @@ feeds = s.reaper.GetUserFeeds(s.username(r))
s.renderPage(w, r, "settings", feeds) } -// TODO: -// -// show diff before submission (like tf plan) -// check if feed exists in db already? -// validate that title exists +// TODO: show diff before submission (like tf plan) +// check if feed exists in db already? +// validate that title exists func (s *Site) settingsSubmitHandler(w http.ResponseWriter, r *http.Request) { if !s.loggedIn(r) { s.renderErr(w, "", http.StatusUnauthorized)@@ -187,7 +245,10 @@ }
s.db.WriteFeed(u) } - // subscribe to all listed feeds exclusively + // TODO: this is insane, make it a transaction + // so people don't lose feed subscriptions + // if vore restarts in the middle of this + // process. s.db.UnsubscribeAll(s.username(r)) for _, url := range validatedURLs { s.db.Subscribe(s.username(r), url)@@ -319,7 +380,7 @@ LoggedIn bool
CutePhrase string Data any }{ - Title: page + " | " + s.title, + Title: page, Username: s.username(r), LoggedIn: s.loggedIn(r), CutePhrase: s.randomCutePhrase(),@@ -333,9 +394,8 @@ return
} } -// printDomain does a best-effort uri parse and -// prints the base domain, otherwise returning the -// unmodified string +// printDomain does a best-effort uri parse, returning a string +// that may still contain special characters func (s *Site) printDomain(rawURL string) string { parsedURL, err := url.Parse(rawURL) if err == nil {
@@ -0,0 +1,8 @@
+CREATE TABLE IF NOT EXISTS saved_item ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + item_url TEXT NOT NULL, + item_title TEXT NOT NULL, + archive_url TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +);
@@ -7,6 +7,7 @@ "fmt"
"io/fs" "log" "strings" + "time" _ "github.com/glebarez/go-sqlite" )@@ -16,6 +17,13 @@ var migrationFiles embed.FS
type DB struct { sql *sql.DB +} + +type SavedItem struct { + ArchiveURL string + CreatedAt time.Time + ItemTitle string + ItemURL string } // New opens a sqlite database, populates it with tables, and@@ -203,6 +211,32 @@ }
return urls } +func (db *DB) GetUserSavedItems(username string) []SavedItem { + uid := db.GetUserID(username) + + rows, err := db.sql.Query(`SELECT item_url, item_title, archive_url, created_at + FROM saved_item WHERE user_id = ? + ORDER BY created_at DESC`, uid) + if err == sql.ErrNoRows { + return []SavedItem{} + } + if err != nil { + log.Fatal(err) + } + defer rows.Close() + + var savedItems []SavedItem + for rows.Next() { + var si SavedItem + err = rows.Scan(&si.ItemURL, &si.ItemTitle, &si.ArchiveURL, &si.CreatedAt) + if err != nil { + log.Fatal(err) + } + savedItems = append(savedItems, si) + } + return savedItems +} + func (db *DB) GetUserID(username string) int { var uid int err := db.sql.QueryRow("SELECT id FROM user WHERE username=?", username).Scan(&uid)@@ -229,6 +263,16 @@ ON CONFLICT(url) DO NOTHING`, url)
if err != nil { log.Fatal(err) } +} + +func (db *DB) WriteSavedItem(username string, item SavedItem) error { + uid := db.GetUserID(username) + + _, err := db.sql.Exec(` + INSERT INTO saved_item(user_id, item_url, item_title, archive_url) + VALUES(?, ?, ?, ?)`, uid, item.ItemURL, item.ItemTitle, item.ArchiveURL) + + return err } // WriteFeed writes an rss feed to the database for permanent storage