dashboard / pico / chore(feeds): better error handling for invalid feed files #41 rss

open · opened on 2025-01-10T15:10:28Z by erock
Help
# add changes to patch request
git format-patch main --stdout | ssh pr.pico.sh pr add 41
# add review to patch request
git format-patch main --stdout | ssh pr.pico.sh pr add --review 41
# remove patchset
ssh pr.pico.sh ps rm ps-x
# checkout all patches
ssh pr.pico.sh pr print 41 | git am -3
# print a diff between the last two patches in a patch request
ssh pr.pico.sh pr diff 41
# accept PR
ssh pr.pico.sh pr accept 41
# close PR
ssh pr.pico.sh pr close 41

Logs

erock created pr with ps-87 on 2025-01-10T15:10:28Z
erock added ps-88 on 2025-01-10T15:14:05Z

Patchsets

ps-87 by erock on 2025-01-10T15:10:28Z
Range Diff ↕ rd-88
1: 5d5b1f5 ! 1: a6f10fc chore(feeds): better error handling for invalid feed files
ps-88 by erock on 2025-01-10T15:14:05Z

Patchset ps-87

Back to top

chore(feeds): better error handling for invalid feed files

feeds/cron.go link
+20 -9
 1diff --git a/feeds/cron.go b/feeds/cron.go
 2index 612ab29..5863c37 100644
 3--- a/feeds/cron.go
 4+++ b/feeds/cron.go
 5@@ -9,6 +9,7 @@ import (
 6 	"log/slog"
 7 	"math"
 8 	"net/http"
 9+	"net/url"
10 	"strings"
11 	"text/template"
12 	"time"
13@@ -161,23 +162,29 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
14 
15 	urls := []string{}
16 	for _, item := range parsed.Items {
17-		url := ""
18-		if item.IsText {
19-			url = item.Value
20+		u := ""
21+		if item.IsText || item.IsURL {
22+			u = item.Value
23 		} else if item.IsURL {
24-			url = string(item.URL)
25+			u = string(item.Value)
26 		}
27 
28-		if url == "" {
29+		if u == "" {
30 			continue
31 		}
32 
33-		urls = append(urls, url)
34+		_, err := url.Parse(string(item.URL))
35+		if err != nil {
36+			logger.Info("invalid url", "url", string(item.URL))
37+			continue
38+		}
39+
40+		urls = append(urls, u)
41 	}
42 
43 	now := time.Now().UTC()
44 	if post.ExpiresAt == nil {
45-		expiresAt := time.Now().AddDate(0, 6, 0)
46+		expiresAt := time.Now().AddDate(0, 12, 0)
47 		post.ExpiresAt = &expiresAt
48 	}
49 	_, err = f.db.UpdatePost(post)
50@@ -199,7 +206,7 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
51 		post.Data.Attempts += 1
52 		logger.Error("could not fetch urls", "err", err, "attempts", post.Data.Attempts)
53 
54-		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (3) attempts we remove the file from our system.  Please check all the URLs and re-upload.
55+		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (5) attempts we remove the file from our system.  Please check all the URLs and re-upload.
56 Also, we have centralized logs in our pico.sh TUI that will display realtime feed errors so you can debug.
57 
58 
59@@ -217,7 +224,7 @@ Also, we have centralized logs in our pico.sh TUI that will display realtime fee
60 			return err
61 		}
62 
63-		if post.Data.Attempts >= 3 {
64+		if post.Data.Attempts >= 5 {
65 			err = f.db.RemovePosts([]string{post.ID})
66 			if err != nil {
67 				return err
68@@ -410,6 +417,10 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
69 		return nil, err
70 	}
71 
72+	if len(urls) == 0 {
73+		return nil, fmt.Errorf("feed file does not contain any urls")
74+	}
75+
76 	var allErrors error
77 	for _, url := range urls {
78 		feedTmpl, err := f.Fetch(logger, fp, url, username, feedItems)
feeds/scp_hooks.go link
+33 -14
 1diff --git a/feeds/scp_hooks.go b/feeds/scp_hooks.go
 2index 785bfd5..fb03a8d 100644
 3--- a/feeds/scp_hooks.go
 4+++ b/feeds/scp_hooks.go
 5@@ -1,12 +1,13 @@
 6 package feeds
 7 
 8 import (
 9+	"errors"
10 	"fmt"
11+	"net/url"
12+
13 	"strings"
14 	"time"
15 
16-	"slices"
17-
18 	"github.com/charmbracelet/ssh"
19 	"github.com/picosh/pico/db"
20 	"github.com/picosh/pico/filehandlers"
21@@ -38,23 +39,41 @@ func (p *FeedHooks) FileValidate(s ssh.Session, data *filehandlers.PostMetaData)
22 		return false, err
23 	}
24 
25-	return true, nil
26-}
27-
28-func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
29-	parsedText := shared.ListParseText(string(data.Text))
30+	// Because we need to support sshfs, sftp runs our Write handler twice
31+	// and on the first pass we do not have access to the file data.
32+	// In that case we should skip the parsing validation
33+	if data.Text == "" {
34+		return true, nil
35+	}
36 
37-	if parsedText.Title == "" {
38-		data.Title = utils.ToUpper(data.Slug)
39-	} else {
40-		data.Title = parsedText.Title
41+	parsed := shared.ListParseText(string(data.Text))
42+	if parsed.Email == "" {
43+		return false, fmt.Errorf("ERROR: no email variable detected for %s, check the format of your file, skipping", data.Filename)
44 	}
45 
46-	data.Description = parsedText.Description
47-	data.Tags = parsedText.Tags
48+	var allErr error
49+	for _, txt := range parsed.Items {
50+		u := ""
51+		if txt.IsText {
52+			u = txt.Value
53+		} else if txt.IsURL {
54+			u = string(txt.URL)
55+		}
56 
57-	data.Hidden = slices.Contains(p.Cfg.HiddenPosts, data.Filename)
58+		_, err := url.Parse(u)
59+		if err != nil {
60+			allErr = errors.Join(allErr, fmt.Errorf("%s: %w", u, err))
61+			continue
62+		}
63+	}
64+	if allErr != nil {
65+		return false, fmt.Errorf("ERROR: some urls provided were invalid check the format of your file, skipping: %w", allErr)
66+	}
67+
68+	return true, nil
69+}
70 
71+func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
72 	if data.Data.LastDigest == nil {
73 		now := time.Now()
74 		data.Data.LastDigest = &now