dashboard / erock/pico / chore(feeds): better error handling for invalid feed files #41 rss

accepted · opened on 2025-01-10T15:10:28Z by erock
Help
checkout latest patchset:
ssh pr.pico.sh print pr-41 | git am -3
checkout any patchset in a patch request:
ssh pr.pico.sh print ps-X | git am -3
add changes to patch request:
git format-patch main --stdout | ssh pr.pico.sh pr add 41
add review to patch request:
git format-patch main --stdout | ssh pr.pico.sh pr add --review 41
accept PR:
ssh pr.pico.sh pr accept 41
close PR:
ssh pr.pico.sh pr close 41

Logs

erock created pr with ps-87 on 2025-01-10T15:10:28Z
erock added ps-88 on 2025-01-10T15:14:05Z
erock changed status on 2025-01-18T20:21:19Z {"status":"accepted"}

Patchsets

ps-87 by erock on 2025-01-10T15:10:28Z
Range Diff ↕ rd-88
1: 5d5b1f5 ! 1: a6f10fc chore(feeds): better error handling for invalid feed files
ps-88 by erock on 2025-01-10T15:14:05Z

Patchset ps-88

Back to top

chore(feeds): better error handling for invalid feed files

When a user uploads an invalid feed file we do not have any mechanism to
notify that user.  Further will continue to attempt to run the file
through our rss-to-email cron.

This commit makes it so we perform validation on the feed files as they
are uploaded and refused to save files that we know will eventually
fail.

Further, we don't want to continuously try files that we know will not
succeed so we are pushing those known issues into our retry-then-delete
mechanism inside our cron.
feeds/cron.go link
+20 -9
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
diff --git a/feeds/cron.go b/feeds/cron.go
index 612ab29..5863c37 100644
--- a/feeds/cron.go
+++ b/feeds/cron.go
@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"math"
 	"net/http"
+	"net/url"
 	"strings"
 	"text/template"
 	"time"
@@ -161,23 +162,29 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
 
 	urls := []string{}
 	for _, item := range parsed.Items {
-		url := ""
-		if item.IsText {
-			url = item.Value
+		u := ""
+		if item.IsText || item.IsURL {
+			u = item.Value
 		} else if item.IsURL {
-			url = string(item.URL)
+			u = string(item.Value)
 		}
 
-		if url == "" {
+		if u == "" {
 			continue
 		}
 
-		urls = append(urls, url)
+		_, err := url.Parse(string(item.URL))
+		if err != nil {
+			logger.Info("invalid url", "url", string(item.URL))
+			continue
+		}
+
+		urls = append(urls, u)
 	}
 
 	now := time.Now().UTC()
 	if post.ExpiresAt == nil {
-		expiresAt := time.Now().AddDate(0, 6, 0)
+		expiresAt := time.Now().AddDate(0, 12, 0)
 		post.ExpiresAt = &expiresAt
 	}
 	_, err = f.db.UpdatePost(post)
@@ -199,7 +206,7 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
 		post.Data.Attempts += 1
 		logger.Error("could not fetch urls", "err", err, "attempts", post.Data.Attempts)
 
-		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (3) attempts we remove the file from our system.  Please check all the URLs and re-upload.
+		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (5) attempts we remove the file from our system.  Please check all the URLs and re-upload.
 Also, we have centralized logs in our pico.sh TUI that will display realtime feed errors so you can debug.
 
 
@@ -217,7 +224,7 @@ Also, we have centralized logs in our pico.sh TUI that will display realtime fee
 			return err
 		}
 
-		if post.Data.Attempts >= 3 {
+		if post.Data.Attempts >= 5 {
 			err = f.db.RemovePosts([]string{post.ID})
 			if err != nil {
 				return err
@@ -410,6 +417,10 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
 		return nil, err
 	}
 
+	if len(urls) == 0 {
+		return nil, fmt.Errorf("feed file does not contain any urls")
+	}
+
 	var allErrors error
 	for _, url := range urls {
 		feedTmpl, err := f.Fetch(logger, fp, url, username, feedItems)
feeds/scp_hooks.go link
+33 -14
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
diff --git a/feeds/scp_hooks.go b/feeds/scp_hooks.go
index 785bfd5..fb03a8d 100644
--- a/feeds/scp_hooks.go
+++ b/feeds/scp_hooks.go
@@ -1,12 +1,13 @@
 package feeds
 
 import (
+	"errors"
 	"fmt"
+	"net/url"
+
 	"strings"
 	"time"
 
-	"slices"
-
 	"github.com/charmbracelet/ssh"
 	"github.com/picosh/pico/db"
 	"github.com/picosh/pico/filehandlers"
@@ -38,23 +39,41 @@ func (p *FeedHooks) FileValidate(s ssh.Session, data *filehandlers.PostMetaData)
 		return false, err
 	}
 
-	return true, nil
-}
-
-func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
-	parsedText := shared.ListParseText(string(data.Text))
+	// Because we need to support sshfs, sftp runs our Write handler twice
+	// and on the first pass we do not have access to the file data.
+	// In that case we should skip the parsing validation
+	if data.Text == "" {
+		return true, nil
+	}
 
-	if parsedText.Title == "" {
-		data.Title = utils.ToUpper(data.Slug)
-	} else {
-		data.Title = parsedText.Title
+	parsed := shared.ListParseText(string(data.Text))
+	if parsed.Email == "" {
+		return false, fmt.Errorf("ERROR: no email variable detected for %s, check the format of your file, skipping", data.Filename)
 	}
 
-	data.Description = parsedText.Description
-	data.Tags = parsedText.Tags
+	var allErr error
+	for _, txt := range parsed.Items {
+		u := ""
+		if txt.IsText {
+			u = txt.Value
+		} else if txt.IsURL {
+			u = string(txt.URL)
+		}
 
-	data.Hidden = slices.Contains(p.Cfg.HiddenPosts, data.Filename)
+		_, err := url.Parse(u)
+		if err != nil {
+			allErr = errors.Join(allErr, fmt.Errorf("%s: %w", u, err))
+			continue
+		}
+	}
+	if allErr != nil {
+		return false, fmt.Errorf("ERROR: some urls provided were invalid check the format of your file, skipping: %w", allErr)
+	}
+
+	return true, nil
+}
 
+func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
 	if data.Data.LastDigest == nil {
 		now := time.Now()
 		data.Data.LastDigest = &now