dashboard / erock/pico / chore(feeds): better error handling for invalid feed files #41 rss

accepted · opened on 2025-01-10T15:10:28Z by erock
Help
checkout latest patchset:
ssh pr.pico.sh print pr-41 | git am -3
checkout any patchset in a patch request:
ssh pr.pico.sh print ps-X | git am -3
add changes to patch request:
git format-patch main --stdout | ssh pr.pico.sh pr add 41
add review to patch request:
git format-patch main --stdout | ssh pr.pico.sh pr add --review 41
accept PR:
ssh pr.pico.sh pr accept 41
close PR:
ssh pr.pico.sh pr close 41

Logs

erock created pr with ps-87 on 2025-01-10T15:10:28Z
erock added ps-88 on 2025-01-10T15:14:05Z
erock changed status on 2025-01-18T20:21:19Z {"status":"accepted"}

Patchsets

ps-87 by erock on 2025-01-10T15:10:28Z
Range Diff ↕ rd-88
1: 5d5b1f5 ! 1: a6f10fc chore(feeds): better error handling for invalid feed files
ps-88 by erock on 2025-01-10T15:14:05Z

Patchset ps-87

Back to top

chore(feeds): better error handling for invalid feed files

feeds/cron.go link
+20 -9
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
diff --git a/feeds/cron.go b/feeds/cron.go
index 612ab29..5863c37 100644
--- a/feeds/cron.go
+++ b/feeds/cron.go
@@ -9,6 +9,7 @@ import (
 	"log/slog"
 	"math"
 	"net/http"
+	"net/url"
 	"strings"
 	"text/template"
 	"time"
@@ -161,23 +162,29 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
 
 	urls := []string{}
 	for _, item := range parsed.Items {
-		url := ""
-		if item.IsText {
-			url = item.Value
+		u := ""
+		if item.IsText || item.IsURL {
+			u = item.Value
 		} else if item.IsURL {
-			url = string(item.URL)
+			u = string(item.Value)
 		}
 
-		if url == "" {
+		if u == "" {
 			continue
 		}
 
-		urls = append(urls, url)
+		_, err := url.Parse(string(item.URL))
+		if err != nil {
+			logger.Info("invalid url", "url", string(item.URL))
+			continue
+		}
+
+		urls = append(urls, u)
 	}
 
 	now := time.Now().UTC()
 	if post.ExpiresAt == nil {
-		expiresAt := time.Now().AddDate(0, 6, 0)
+		expiresAt := time.Now().AddDate(0, 12, 0)
 		post.ExpiresAt = &expiresAt
 	}
 	_, err = f.db.UpdatePost(post)
@@ -199,7 +206,7 @@ func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) err
 		post.Data.Attempts += 1
 		logger.Error("could not fetch urls", "err", err, "attempts", post.Data.Attempts)
 
-		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (3) attempts we remove the file from our system.  Please check all the URLs and re-upload.
+		errBody := fmt.Sprintf(`There was an error attempting to fetch your feeds (%d) times.  After (5) attempts we remove the file from our system.  Please check all the URLs and re-upload.
 Also, we have centralized logs in our pico.sh TUI that will display realtime feed errors so you can debug.
 
 
@@ -217,7 +224,7 @@ Also, we have centralized logs in our pico.sh TUI that will display realtime fee
 			return err
 		}
 
-		if post.Data.Attempts >= 3 {
+		if post.Data.Attempts >= 5 {
 			err = f.db.RemovePosts([]string{post.ID})
 			if err != nil {
 				return err
@@ -410,6 +417,10 @@ func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent boo
 		return nil, err
 	}
 
+	if len(urls) == 0 {
+		return nil, fmt.Errorf("feed file does not contain any urls")
+	}
+
 	var allErrors error
 	for _, url := range urls {
 		feedTmpl, err := f.Fetch(logger, fp, url, username, feedItems)
feeds/scp_hooks.go link
+33 -14
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
diff --git a/feeds/scp_hooks.go b/feeds/scp_hooks.go
index 785bfd5..fb03a8d 100644
--- a/feeds/scp_hooks.go
+++ b/feeds/scp_hooks.go
@@ -1,12 +1,13 @@
 package feeds
 
 import (
+	"errors"
 	"fmt"
+	"net/url"
+
 	"strings"
 	"time"
 
-	"slices"
-
 	"github.com/charmbracelet/ssh"
 	"github.com/picosh/pico/db"
 	"github.com/picosh/pico/filehandlers"
@@ -38,23 +39,41 @@ func (p *FeedHooks) FileValidate(s ssh.Session, data *filehandlers.PostMetaData)
 		return false, err
 	}
 
-	return true, nil
-}
-
-func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
-	parsedText := shared.ListParseText(string(data.Text))
+	// Because we need to support sshfs, sftp runs our Write handler twice
+	// and on the first pass we do not have access to the file data.
+	// In that case we should skip the parsing validation
+	if data.Text == "" {
+		return true, nil
+	}
 
-	if parsedText.Title == "" {
-		data.Title = utils.ToUpper(data.Slug)
-	} else {
-		data.Title = parsedText.Title
+	parsed := shared.ListParseText(string(data.Text))
+	if parsed.Email == "" {
+		return false, fmt.Errorf("ERROR: no email variable detected for %s, check the format of your file, skipping", data.Filename)
 	}
 
-	data.Description = parsedText.Description
-	data.Tags = parsedText.Tags
+	var allErr error
+	for _, txt := range parsed.Items {
+		u := ""
+		if txt.IsText {
+			u = txt.Value
+		} else if txt.IsURL {
+			u = string(txt.URL)
+		}
 
-	data.Hidden = slices.Contains(p.Cfg.HiddenPosts, data.Filename)
+		_, err := url.Parse(u)
+		if err != nil {
+			allErr = errors.Join(allErr, fmt.Errorf("%s: %w", u, err))
+			continue
+		}
+	}
+	if allErr != nil {
+		return false, fmt.Errorf("ERROR: some urls provided were invalid check the format of your file, skipping: %w", allErr)
+	}
+
+	return true, nil
+}
 
+func (p *FeedHooks) FileMeta(s ssh.Session, data *filehandlers.PostMetaData) error {
 	if data.Data.LastDigest == nil {
 		now := time.Now()
 		data.Data.LastDigest = &now