Skip to content

Commit 5290b1d

Browse files
committed
Adeed decoding of rss HTML descriptions
1 parent 4176ba2 commit 5290b1d

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

app/Rss/RssParser.php

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public function parseRssDataToPosts(string $rssData): array
3737
$date = DateTime::createFromFormat('D, d M Y H:i:s T', $item->pubDate ?? '');
3838
$postData = [
3939
'title' => substr(strval($item->title ?? ''), 0, 250),
40-
'description' => substr(strval($item->description ?? ''), 0, 1000),
40+
'description' => $this->formatDescription(strval($item->description) ?: ''),
4141
'url' => strval($item->link ?? ''),
4242
'published_at' => $date ? $date->getTimestamp() : 0,
4343
];
@@ -52,6 +52,17 @@ public function parseRssDataToPosts(string $rssData): array
5252
return $posts;
5353
}
5454

55+
protected function formatDescription(string $description): string
56+
{
57+
$decoded = html_entity_decode(strip_tags($description));
58+
59+
if (strlen($decoded) > 200) {
60+
return substr($decoded, 0, 200) . '...';
61+
}
62+
63+
return $decoded;
64+
}
65+
5566
/**
5667
* @param array{title: string, description: string, url: string, published_at: int} $item
5768
*/

tests/Unit/RssParserTest.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,27 @@ public function test_invalid_posts_are_not_returned()
133133
$this->assertCount(0, $posts);
134134
}
135135

136+
public function test_descriptions_in_html_are_parsed()
137+
{
138+
$parser = new RssParser();
139+
140+
$posts = $parser->rssDataToPosts(<<<END
141+
<?xml version="1.0" encoding="utf-8" standalone="yes"?>
142+
<rss version="2.0">
143+
<channel>
144+
<item>
145+
<title>BookStack Release v22.06</title>
146+
<link>https://www.bookstackapp.com/blog/bookstack-release-v22-06/</link>
147+
<pubDate>Fri, 24 Jun 2022 11:00:00 +0000</pubDate>
148+
<guid>https://www.bookstackapp.com/blog/bookstack-release-v22-06/</guid>
149+
<description>&lt;span a=&quot;b&quot;&gt;Some really cool text&lt;/span&gt; &amp;amp; with &amp;pound; entities within</description>
150+
</item>
151+
</channel>
152+
</rss>
153+
END
154+
);
155+
156+
$this->assertEquals('Some really cool text & with £ entities within', $posts[0]->description);
157+
}
158+
136159
}

0 commit comments

Comments
 (0)