|
1 | 1 | <?php |
2 | | -class MsnMondeBridge extends BridgeAbstract { |
| 2 | +class MsnMondeBridge extends FeedExpander { |
3 | 3 |
|
4 | 4 | const MAINTAINER = 'kranack'; |
5 | 5 | const NAME = 'MSN Actu Monde'; |
6 | | - const URI = 'http://www.msn.com/'; |
7 | 6 | const DESCRIPTION = 'Returns the 10 newest posts from MSN Actualités (full text)'; |
| 7 | + const URI = 'https://www.msn.com/fr-fr/actualite'; |
| 8 | + const FEED_URL = 'https://rss.msn.com/fr-fr'; |
| 9 | + const JSON_URL = 'https://assets.msn.com/content/view/v2/Detail/fr-fr/'; |
| 10 | + const LIMIT = 10; |
8 | 11 |
|
9 | | - public function getURI(){ |
10 | | - return self::URI . 'fr-fr/actualite/monde'; |
| 12 | + public function getName() { |
| 13 | + return 'MSN Actualités'; |
11 | 14 | } |
12 | 15 |
|
13 | | - private function msnMondeExtractContent($url, &$item){ |
14 | | - $html2 = getSimpleHTMLDOM($url); |
15 | | - $item['content'] = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; |
16 | | - $item['timestamp'] = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); |
| 16 | + public function getURI() { |
| 17 | + return self::URI; |
17 | 18 | } |
18 | 19 |
|
19 | | - public function collectData(){ |
20 | | - $html = getSimpleHTMLDOM($this->getURI()); |
| 20 | + public function collectData() { |
| 21 | + $this->collectExpandableDatas(self::FEED_URL, self::LIMIT); |
| 22 | + } |
21 | 23 |
|
22 | | - $limit = 0; |
| 24 | + protected function parseItem($newsItem) { |
| 25 | + $item = parent::parseItem($newsItem); |
| 26 | + if (!preg_match('#fr-fr/actualite.*/ar-(?<id>[\w]*)\?#', $item['uri'], $matches)) { |
| 27 | + return; |
| 28 | + } |
23 | 29 |
|
24 | | - // TODO: fix why articles is empty |
25 | | - foreach($html->find('.smalla') as $article) { |
26 | | - if($limit < 10) { |
27 | | - $item = array(); |
28 | | - $item['title'] = utf8_decode($article->find('h4', 0)->innertext); |
29 | | - $item['uri'] = self::URI . utf8_decode($article->find('a', 0)->href); |
30 | | - $this->msnMondeExtractContent($item['uri'], $item); |
31 | | - $this->items[] = $item; |
32 | | - $limit++; |
33 | | - } |
| 30 | + $json = json_decode(getContents(self::JSON_URL . $matches['id']), true); |
| 31 | + $item['content'] = $json['body']; |
| 32 | + if (!empty($json['authors'])) |
| 33 | + $item['author'] = reset($json['authors'])['name']; |
| 34 | + $item['timestamp'] = $json['createdDateTime']; |
| 35 | + foreach($json['tags'] as $tag) { |
| 36 | + $item['categories'][] = $tag['label']; |
34 | 37 | } |
| 38 | + return $item; |
35 | 39 | } |
36 | 40 | } |
0 commit comments