dashboard / pico / chore(feeds): better error handling for invalid feed files #41 rss

open · opened on 2025-01-10T15:10:28Z by erock
Help
# add changes to patch request
git format-patch main --stdout | ssh pr.pico.sh pr add 41
# add review to patch request
git format-patch main --stdout | ssh pr.pico.sh pr add --review 41
# remove patchset
ssh pr.pico.sh ps rm ps-x
# checkout all patches
ssh pr.pico.sh pr print 41 | git am -3
# print a diff between the last two patches in a patch request
ssh pr.pico.sh pr diff 41
# accept PR
ssh pr.pico.sh pr accept 41
# close PR
ssh pr.pico.sh pr close 41

Logs

erock created pr with ps-87 on 2025-01-10T15:10:28Z
erock added ps-88 on 2025-01-10T15:14:05Z

Patchsets

ps-87 by erock on 2025-01-10T15:10:28Z
Range Diff ↕ rd-88
1: 5d5b1f5 ! 1: a6f10fc chore(feeds): better error handling for invalid feed files
ps-88 by erock on 2025-01-10T15:14:05Z

Patchset ps-88

Back to top

chore(feeds): better error handling for invalid feed files

When a user uploads an invalid feed file we do not have any mechanism to
notify that user.  Further will continue to attempt to run the file
through our rss-to-email cron.

This commit makes it so we perform validation on the feed files as they
are uploaded and refused to save files that we know will eventually
fail.

Further, we don't want to continuously try files that we know will not
succeed so we are pushing those known issues into our retry-then-delete
mechanism inside our cron.
feeds/cron.go link
+20 -9
 1diff --git a/feeds/cron.go b/feeds/cron.go
 2index 612ab29..5863c37 100644
 3--- a/feeds/cron.go
 4+++ b/feeds/cron.go
 5@@ -9,6 +9,7 @@ import (
 6 	"log/slog"
 7 	"math"
 8 	"net/http"
 9+	"net/url"
10 	"strings"
11 	"text/template"
12 	"time"
13@@ -161,23 +162,29 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
14 
15 	urls := []string{}
16 	for _, item := range parsed.Items {
17-		url := ""
18-		if item.IsText {
19-			url = item.Value
20+		u := ""
21+		if item.IsText || item.IsURL {
22+			u = item.Value
23 		} else if item.IsURL {
24-			url = string(item.URL)
25+			u = string(item.Value)
26 		}
27 
28-		if url == "" {
29+		if u == "" {
30 			continue
31 		}
32 
33-		urls = append(urls, url)
34+		_, err := url.Parse(string(item.URL))
35+		if err != nil {
36+			logger.Info("invalid url", "url", string(item.URL))
37+			continue
38+		}
39+
40+		urls = append(urls, u)
41 	}
42 
43 	now := time.Now().UTC()
44 	if post.ExpiresAt == nil {
45-		expiresAt := time.Now().AddDate(0, 6, 0)
46+		expiresAt := time.Now().AddDate(0, 12, 0)
47 		post.ExpiresAt = &expiresAt
48 	}
49 	_, err = f.db.UpdatePost(post)
50@@ -199,7 +206,7 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
51 		post.Data.Attempts += 1
52 		logger.Error("could not fetch urls", "err", err, "attempts", post.Data.Attempts)
53 
54-		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (3) attempts we remove the file from our system.  Please check all the URLs and re-upload.
55+		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (5) attempts we remove the file from our system.  Please check all the URLs and re-upload.
56 Also, we have centralized logs in our pico.sh TUI that will display realtime feed errors so you can debug.
57 
58 
59@@ -217,7 +224,7 @@ Also, we have centralized logs in our pico.sh TUI that will display realtime fee
60 			return err
61 		}
62 
63-		if post.Data.Attempts >= 3 {
64+		if post.Data.Attempts >= 5 {
65 			err = f.db.RemovePosts([]string{post.ID})
66 			if err != nil {
67 				return err
68@@ -410,6 +417,10 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
69 		return nil, err
70 	}
71 
72+	if len(urls) == 0 {
73+		return nil, fmt.Errorf("feed file does not contain any urls")
74+	}
75+
76 	var allErrors error
77 	for _, url := range urls {
78 		feedTmpl, err := f.Fetch(logger, fp, url, username, feedItems)
feeds/scp_hooks.go link
+33 -14
 1diff --git a/feeds/scp_hooks.go b/feeds/scp_hooks.go
 2index 785bfd5..fb03a8d 100644
 3--- a/feeds/scp_hooks.go
 4+++ b/feeds/scp_hooks.go
 5@@ -1,12 +1,13 @@
 6 package feeds
 7 
 8 import (
 9+	"errors"
10 	"fmt"
11+	"net/url"
12+
13 	"strings"
14 	"time"
15 
16-	"slices"
17-
18 	"github.com/charmbracelet/ssh"
19 	"github.com/picosh/pico/db"
20 	"github.com/picosh/pico/filehandlers"
21@@ -38,23 +39,41 @@ func (p *FeedHooks) FileValidate(s ssh.Session, data *filehandlers.PostMetaData)
22 		return false, err
23 	}
24 
25-	return true, nil
26-}
27-
28-func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
29-	parsedText := shared.ListParseText(string(data.Text))
30+	// Because we need to support sshfs, sftp runs our Write handler twice
31+	// and on the first pass we do not have access to the file data.
32+	// In that case we should skip the parsing validation
33+	if data.Text == "" {
34+		return true, nil
35+	}
36 
37-	if parsedText.Title == "" {
38-		data.Title = utils.ToUpper(data.Slug)
39-	} else {
40-		data.Title = parsedText.Title
41+	parsed := shared.ListParseText(string(data.Text))
42+	if parsed.Email == "" {
43+		return false, fmt.Errorf("ERROR: no email variable detected for %s, check the format of your file, skipping", data.Filename)
44 	}
45 
46-	data.Description = parsedText.Description
47-	data.Tags = parsedText.Tags
48+	var allErr error
49+	for _, txt := range parsed.Items {
50+		u := ""
51+		if txt.IsText {
52+			u = txt.Value
53+		} else if txt.IsURL {
54+			u = string(txt.URL)
55+		}
56 
57-	data.Hidden = slices.Contains(p.Cfg.HiddenPosts, data.Filename)
58+		_, err := url.Parse(u)
59+		if err != nil {
60+			allErr = errors.Join(allErr, fmt.Errorf("%s: %w", u, err))
61+			continue
62+		}
63+	}
64+	if allErr != nil {
65+		return false, fmt.Errorf("ERROR: some urls provided were invalid check the format of your file, skipping: %w", allErr)
66+	}
67+
68+	return true, nil
69+}
70 
71+func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
72 	if data.Data.LastDigest == nil {
73 		now := time.Now()
74 		data.Data.LastDigest = &now