Skip to content

Commit f0cdc50

Browse files
committed
Added guid for tracking posts, added atom support
1 parent 5290b1d commit f0cdc50

File tree

10 files changed

+168
-22
lines changed

10 files changed

+168
-22
lines changed

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
/storage/database/*
77
/storage/framework/*
88
/storage/logs/*
9+
/.github

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ A simple, opinionated, RSS feed aggregator.
88

99
The following features are built into the application:
1010

11+
- Supports RSS and ATOM formats.
1112
- Regular auto-fetching of RSS feeds (Every hour).
1213
- Custom feed names and colors.
1314
- Feed-based tags for categorization.
@@ -30,6 +31,8 @@ This is not a list of planned features. Please see the [Low Maintenance Project]
3031
- No customization, extension or plugin system.
3132
- No organisation upon feed-level tagging.
3233

34+
Upon the above, it's quite likely you'll come across issues. This project was created to meet a personal need while learning some new technologies. Much of the logic is custom written instead of using battle-tested libraries.
35+
3336
## Screenshots
3437

3538
TODO

app/Jobs/RefreshFeedJob.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public function handle(FeedPostFetcher $postFetcher)
4141

4242
foreach ($freshPosts as $post) {
4343
$post = $this->feed->posts()->updateOrCreate(
44-
['url' => $post->url],
44+
['guid' => $post->guid],
4545
$post->getAttributes(),
4646
);
4747

app/Models/Post.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ class Post extends Model
99
{
1010
use HasFactory;
1111

12-
protected $fillable = ['url', 'title', 'description', 'published_at'];
13-
protected $hidden = ['id', 'feed_id', 'created_at', 'updated_at'];
12+
protected $fillable = ['url', 'title', 'description', 'published_at', 'guid'];
13+
protected $hidden = ['id', 'feed_id', 'guid', 'created_at', 'updated_at'];
1414
}

app/Rss/FeedPostFetcher.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@ public function fetchForFeed(Feed $feed): array
2424
}
2525

2626
$rssData = ltrim($feedResponse->body());
27-
if (substr($rssData, 0, 6) !== '<?xml ') {
27+
$tagStart = explode(' ', substr($rssData, 0, 20))[0];
28+
$validStarts = ['<?xml', '<feed', '<rss'];
29+
if (!in_array($tagStart, $validStarts)) {
2830
return [];
2931
}
3032

app/Rss/RssParser.php

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,23 @@ public function rssDataToPosts(string $rssData): array
2828
*/
2929
public function parseRssDataToPosts(string $rssData): array
3030
{
31+
$rssData = trim($rssData);
32+
3133
$rssXml = new SimpleXMLElement($rssData);
32-
$items = iterator_to_array($rssXml->channel->item, false);
34+
$items = is_iterable($rssXml->channel->item ?? null) ? iterator_to_array($rssXml->channel->item, false) : [];
35+
36+
$isAtom = false;
37+
if (empty($items)) {
38+
$items = is_iterable($rssXml->entry ?? null) ? iterator_to_array($rssXml->entry, false) : [];
39+
$isAtom = true;
40+
}
41+
3342
$posts = [];
3443

3544
foreach ($items as $item) {
45+
$postData = $isAtom ? $this->getPostDataForAtomItem($item) : $this->getPostDataForRssItem($item);
3646

37-
$date = DateTime::createFromFormat('D, d M Y H:i:s T', $item->pubDate ?? '');
38-
$postData = [
39-
'title' => substr(strval($item->title ?? ''), 0, 250),
40-
'description' => $this->formatDescription(strval($item->description) ?: ''),
41-
'url' => strval($item->link ?? ''),
42-
'published_at' => $date ? $date->getTimestamp() : 0,
43-
];
44-
45-
if (!$this->isValidRssData($postData)) {
47+
if (!$this->isValidPostData($postData)) {
4648
continue;
4749
}
4850

@@ -52,23 +54,54 @@ public function parseRssDataToPosts(string $rssData): array
5254
return $posts;
5355
}
5456

57+
protected function getPostDataForRssItem(SimpleXMLElement $item): array
58+
{
59+
$date = DateTime::createFromFormat(DateTime::RSS, $item->pubDate ?? '');
60+
$item = [
61+
'title' => substr(strval($item->title ?? ''), 0, 250),
62+
'description' => $this->formatDescription(strval($item->description) ?: ''),
63+
'url' => strval($item->link ?? ''),
64+
'guid' => strval($item->guid ?? ''),
65+
'published_at' => $date ? $date->getTimestamp() : 0,
66+
];
67+
68+
if (empty($item['guid'])) {
69+
$item['guid'] = $item['url'];
70+
}
71+
72+
return $item;
73+
}
74+
5575
protected function formatDescription(string $description): string
5676
{
57-
$decoded = html_entity_decode(strip_tags($description));
58-
77+
$decoded = trim(html_entity_decode(strip_tags($description)));
78+
$decoded = preg_replace('/\s+/', ' ', $decoded);
79+
5980
if (strlen($decoded) > 200) {
6081
return substr($decoded, 0, 200) . '...';
6182
}
6283

6384
return $decoded;
6485
}
6586

87+
protected function getPostDataForAtomItem(SimpleXMLElement $item): array
88+
{
89+
$date = new DateTime(strval($item->published ?? $item->updated ?? ''));
90+
return [
91+
'title' => html_entity_decode(substr(strval($item->title ?? ''), 0, 250)),
92+
'description' => $this->formatDescription(strval($item->summary) ?: strval($item->content) ?: ''),
93+
'url' => $item->link ? strval($item->link->attributes()['href']) : '',
94+
'guid' => strval($item->id ?? ''),
95+
'published_at' => $date ? $date->getTimestamp() : 0,
96+
];
97+
}
98+
6699
/**
67100
* @param array{title: string, description: string, url: string, published_at: int} $item
68101
*/
69-
protected function isValidRssData(array $item): bool
102+
protected function isValidPostData(array $item): bool
70103
{
71-
if (empty($item['title']) || empty($item['url'])) {
104+
if (empty($item['title']) || empty($item['url']) || empty($item['guid'])) {
72105
return false;
73106
}
74107

config/queue.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686

8787
'failed' => [
8888
'driver' => env('QUEUE_FAILED_DRIVER', 'database-uuids'),
89-
'database' => env('DB_CONNECTION', 'mysql'),
89+
'database' => env('DB_CONNECTION', 'sqlite'),
9090
'table' => 'failed_jobs',
9191
],
9292

database/factories/PostFactory.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@ class PostFactory extends Factory
1717
*/
1818
public function definition()
1919
{
20+
$url = $this->faker->url . '?query=' . random_int(0, 1000);
2021
return [
2122
'feed_id' => Feed::factory(),
2223
'published_at' => now()->subHours(random_int(0, 200))->unix(),
2324
'title' => $this->faker->title,
2425
'description' => $this->faker->words(50, true),
25-
'url' => $this->faker->url . '?query=' . random_int(0, 1000),
26+
'url' => $url,
27+
'guid' => $url,
2628
'thumbnail' => ''
2729
];
2830
}

database/migrations/2022_06_29_124610_create_posts_table.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ public function up()
2020
$table->string('title', 250);
2121
$table->text('description');
2222
$table->string('url', 250);
23+
$table->string('guid', 250);
2324
$table->string('thumbnail')->default('');
2425
$table->timestamps();
2526

26-
$table->unique(['feed_id', 'url']);
27+
$table->unique(['feed_id', 'guid']);
2728
});
2829
}
2930

tests/Unit/RssParserTest.php

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,11 @@ public function test_it_parses_valid_posts()
4343
</channel>
4444
</rss>
4545
END
46-
);
46+
);
4747

4848
$this->assertCount(2, $posts);
4949
$this->assertEquals('BookStack Release v22.06', $posts[0]->title);
50+
$this->assertEquals('https://www.bookstackapp.com/blog/bookstack-release-v22-06/', $posts[0]->guid);
5051
$this->assertEquals('https://www.bookstackapp.com/blog/bookstack-release-v22-06/', $posts[0]->url);
5152
$this->assertEquals(1656068400, $posts[0]->published_at);
5253
$this->assertEquals('BookStack v22.06 is now here! This release was primarily refinement focused but it does include some great new features that may streamline your usage of the platform.', $posts[0]->description);
@@ -75,6 +76,7 @@ public function test_it_parses_single_post()
7576

7677
$this->assertCount(1, $posts);
7778
$this->assertEquals('BookStack Release v22.06', $posts[0]->title);
79+
$this->assertEquals('https://www.bookstackapp.com/blog/bookstack-release-v22-06/', $posts[0]->guid);
7880
$this->assertEquals('https://www.bookstackapp.com/blog/bookstack-release-v22-06/', $posts[0]->url);
7981
$this->assertEquals(1656068400, $posts[0]->published_at);
8082
$this->assertEquals('BookStack v22.06 is now here! This release was primarily refinement focused but it does include some great new features that may streamline your usage of the platform.', $posts[0]->description);
@@ -156,4 +158,106 @@ public function test_descriptions_in_html_are_parsed()
156158
$this->assertEquals('Some really cool text & with £ entities within', $posts[0]->description);
157159
}
158160

161+
public function test_it_parses_valid_atom_feeds()
162+
{
163+
$parser = new RssParser();
164+
165+
$posts = $parser->rssDataToPosts(<<<END
166+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
167+
<feed xmlns="http://www.w3.org/2005/Atom">
168+
<title type="text" xml:lang="en">Example Atom Feed</title>
169+
170+
<entry>
171+
<title>Example Post A</title>
172+
<link href="https://example.com/a"/>
173+
<updated>2022-06-09T17:00:00.000Z</updated>
174+
<id>https://example.com/a</id>
175+
<content type="html">
176+
&lt;p&gt;Example Post A&lt;/p&gt;
177+
&lt;p&gt;&lt;a href="https://example/a"&gt;Read the full article&lt;/a&gt;&lt;/p&gt;
178+
</content>
179+
<author>
180+
<name>Example Team</name>
181+
</author>
182+
</entry>
183+
184+
<entry>
185+
<title>Example Post B</title>
186+
<link href="https://example.com/b"/>
187+
<updated>2022-06-08T17:00:00.000Z</updated>
188+
<id>https://example.com/b</id>
189+
<content type="html">
190+
&lt;p&gt;Example Post B&lt;/p&gt;
191+
&lt;p&gt;&lt;a href="https://example/a"&gt;Read the full article&lt;/a&gt;&lt;/p&gt;
192+
</content>
193+
<author>
194+
<name>Example Team</name>
195+
</author>
196+
</entry>
197+
</feed>
198+
END
199+
);
200+
201+
$this->assertCount(2, $posts);
202+
$this->assertEquals('Example Post A', $posts[0]->title);
203+
$this->assertEquals('https://example.com/a', $posts[0]->guid);
204+
$this->assertEquals('https://example.com/a', $posts[0]->url);
205+
$this->assertEquals(1654794000, $posts[0]->published_at);
206+
$this->assertEquals("Example Post A Read the full article", $posts[0]->description);
207+
}
208+
209+
public function test_atom_summary_used_over_content()
210+
{
211+
$parser = new RssParser();
212+
213+
$posts = $parser->rssDataToPosts(<<<END
214+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
215+
<feed xmlns="http://www.w3.org/2005/Atom">
216+
<title type="text" xml:lang="en">Example Atom Feed</title>
217+
218+
<entry>
219+
<title>Example Post A</title>
220+
<link href="https://example.com/a"/>
221+
<updated>2022-06-09T17:00:00.000Z</updated>
222+
<id>https://example.com/a</id>
223+
<content type="html">&lt;p&gt;Example Post A Content&lt;/p&gt;</content>
224+
<summary type="html">&lt;p&gt;Example Post A Summary&lt;/p&gt;</summary>
225+
<author>
226+
<name>Example Team</name>
227+
</author>
228+
</entry>
229+
</feed>
230+
END
231+
);
232+
233+
$this->assertEquals("Example Post A Summary", $posts[0]->description);
234+
}
235+
236+
public function test_switcher_summary_used_over_content()
237+
{
238+
$parser = new RssParser();
239+
240+
$posts = $parser->rssDataToPosts(<<<END
241+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
242+
<feed xmlns="http://www.w3.org/2005/Atom">
243+
<title type="text" xml:lang="en">Example Atom Feed</title>
244+
245+
<entry>
246+
<title>Example Post A</title>
247+
<link href="https://example.com/a"/>
248+
<updated>2022-06-09T17:00:00.000Z</updated>
249+
<id>https://example.com/a</id>
250+
<content type="html">&lt;p&gt;Example Post A Content&lt;/p&gt;</content>
251+
<summary type="html">&lt;p&gt;Example Post A Summary&lt;/p&gt;</summary>
252+
<author>
253+
<name>Example Team</name>
254+
</author>
255+
</entry>
256+
</feed>
257+
END
258+
);
259+
260+
$this->assertEquals("Example Post A Summary", $posts[0]->description);
261+
}
262+
159263
}

0 commit comments

Comments
 (0)