@@ -21,6 +21,7 @@ class FeedProcessor
2121 private ProxyService $ proxyService ;
2222 private EmailService $ emailService ;
2323 private array $ defaultClientConfig ;
24+ private bool $ subscriberTextShow ;
2425
2526 public function __construct (CLImate $ climate )
2627 {
@@ -31,6 +32,11 @@ public function __construct(CLImate $climate)
3132 $ this ->defaultClientConfig = HttpClientConfig::getDefaultConfig ();
3233
3334 $ this ->httpClient = new \GuzzleHttp \Client ($ this ->defaultClientConfig );
35+
36+ $ this ->subscriberTextShow = filter_var (
37+ $ _ENV ['SUBSCRIBER_SHOW_POST ' ] ?? 'false ' ,
38+ FILTER_VALIDATE_BOOLEAN
39+ );
3440 }
3541
3642 public function process (?int $ feedId = null , int $ parallel = 1 ): void
@@ -315,6 +321,7 @@ private function processRssFeed(array $feed): void
315321 $ date = $ item ->get_date ('Y-m-d H:i:s ' ) ?: date ('Y-m-d H:i:s ' );
316322
317323 $ imageUrl = $ this ->extractImageFromUrl ($ url );
324+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
318325
319326 try {
320327 DB ::insert ('feed_items ' , [
@@ -325,7 +332,8 @@ private function processRssFeed(array $feed): void
325332 'url ' => $ url ,
326333 'image_url ' => $ imageUrl ,
327334 'guid ' => $ guid ,
328- 'published_at ' => $ date
335+ 'published_at ' => $ date ,
336+ 'is_visible ' => $ isVisible ? 1 : 0
329337 ]);
330338 $ count ++;
331339 $ updated = true ;
@@ -415,6 +423,7 @@ private function processPaginatedRssFeed(array $feed, SimplePie $simplePie, &$co
415423 $ date = $ item ->get_date ('Y-m-d H:i:s ' ) ?: date ('Y-m-d H:i:s ' );
416424
417425 $ imageUrl = $ this ->extractImageFromUrl ($ url );
426+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
418427
419428 try {
420429 DB ::insert ('feed_items ' , [
@@ -425,7 +434,8 @@ private function processPaginatedRssFeed(array $feed, SimplePie $simplePie, &$co
425434 'url ' => $ url ,
426435 'image_url ' => $ imageUrl ,
427436 'guid ' => $ guid ,
428- 'published_at ' => $ date
437+ 'published_at ' => $ date ,
438+ 'is_visible ' => $ isVisible ? 1 : 0
429439 ]);
430440 $ count ++;
431441 $ updated = true ;
@@ -562,17 +572,20 @@ private function processCsvFeed(array $feed): void
562572 $ this ->climate ->whisper ("Processing item: {$ title } ( {$ url }) " );
563573
564574 $ imageUrl = $ this ->extractImageFromUrl ($ url );
575+ $ content = $ contentIndex !== false && isset ($ data [$ contentIndex ]) ? $ data [$ contentIndex ] : null ;
576+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
565577
566578 try {
567579 DB ::insert ('feed_items ' , [
568580 'feed_id ' => $ feed ['id ' ],
569581 'title ' => $ title ,
570582 'author ' => $ authorIndex !== false && isset ($ data [$ authorIndex ]) ? $ data [$ authorIndex ] : null ,
571- 'content ' => $ contentIndex !== false && isset ( $ data [ $ contentIndex ]) ? $ data [ $ contentIndex ] : null ,
583+ 'content ' => $ content ,
572584 'url ' => $ url ,
573585 'image_url ' => $ imageUrl ,
574586 'guid ' => $ guid ,
575- 'published_at ' => $ dateIndex !== false && isset ($ data [$ dateIndex ]) ? $ data [$ dateIndex ] : date ('Y-m-d H:i:s ' )
587+ 'published_at ' => $ dateIndex !== false && isset ($ data [$ dateIndex ]) ? $ data [$ dateIndex ] : date ('Y-m-d H:i:s ' ),
588+ 'is_visible ' => $ isVisible ? 1 : 0
576589 ]);
577590 $ count ++;
578591 $ updated = true ;
@@ -683,17 +696,20 @@ private function processPaginatedCsvFeed(array $feed, &$count, &$updated, &$last
683696 $ this ->climate ->whisper ("Processing item from page {$ currentPage }: {$ title } ( {$ url }) " );
684697
685698 $ imageUrl = $ this ->extractImageFromUrl ($ url );
699+ $ content = $ contentIndex !== false && isset ($ data [$ contentIndex ]) ? $ data [$ contentIndex ] : null ;
700+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
686701
687702 try {
688703 DB ::insert ('feed_items ' , [
689704 'feed_id ' => $ feed ['id ' ],
690705 'title ' => $ title ,
691706 'author ' => $ authorIndex !== false && isset ($ data [$ authorIndex ]) ? $ data [$ authorIndex ] : null ,
692- 'content ' => $ contentIndex !== false && isset ( $ data [ $ contentIndex ]) ? $ data [ $ contentIndex ] : null ,
707+ 'content ' => $ content ,
693708 'url ' => $ url ,
694709 'image_url ' => $ imageUrl ,
695710 'guid ' => $ guid ,
696- 'published_at ' => $ dateIndex !== false && isset ($ data [$ dateIndex ]) ? $ data [$ dateIndex ] : date ('Y-m-d H:i:s ' )
711+ 'published_at ' => $ dateIndex !== false && isset ($ data [$ dateIndex ]) ? $ data [$ dateIndex ] : date ('Y-m-d H:i:s ' ),
712+ 'is_visible ' => $ isVisible ? 1 : 0
697713 ]);
698714 $ count ++;
699715 $ pageItemCount ++;
@@ -787,6 +803,7 @@ private function processJsonFeed(array $feed): void
787803 $ this ->climate ->whisper ("Processing JSON item: {$ title } ( {$ url }) " );
788804
789805 $ imageUrl = $ this ->extractImageFromUrl ($ url );
806+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
790807
791808 try {
792809 DB ::insert ('feed_items ' , [
@@ -797,7 +814,8 @@ private function processJsonFeed(array $feed): void
797814 'url ' => $ url ,
798815 'image_url ' => $ imageUrl ,
799816 'guid ' => $ guid ,
800- 'published_at ' => $ date
817+ 'published_at ' => $ date ,
818+ 'is_visible ' => $ isVisible ? 1 : 0
801819 ]);
802820 $ count ++;
803821 $ updated = true ;
@@ -885,6 +903,7 @@ private function processPaginatedJsonFeed(array $feed, string $nextPageUrl, &$co
885903 $ this ->climate ->whisper ("Processing JSON item from page {$ currentPage }: {$ title } ( {$ url }) " );
886904
887905 $ imageUrl = $ this ->extractImageFromUrl ($ url );
906+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
888907
889908 try {
890909 DB ::insert ('feed_items ' , [
@@ -895,7 +914,8 @@ private function processPaginatedJsonFeed(array $feed, string $nextPageUrl, &$co
895914 'url ' => $ url ,
896915 'image_url ' => $ imageUrl ,
897916 'guid ' => $ guid ,
898- 'published_at ' => $ date
917+ 'published_at ' => $ date ,
918+ 'is_visible ' => $ isVisible ? 1 : 0
899919 ]);
900920 $ count ++;
901921 $ pageItemCount ++;
@@ -982,6 +1002,7 @@ private function processXmlFeed(array $feed): void
9821002 $ this ->climate ->whisper ("Processing XML item: {$ title } ( {$ url }) " );
9831003
9841004 $ imageUrl = $ this ->extractImageFromUrl ($ url );
1005+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
9851006
9861007 try {
9871008 DB ::insert ('feed_items ' , [
@@ -992,7 +1013,8 @@ private function processXmlFeed(array $feed): void
9921013 'url ' => $ url ,
9931014 'image_url ' => $ imageUrl ,
9941015 'guid ' => $ guid ,
995- 'published_at ' => $ date
1016+ 'published_at ' => $ date ,
1017+ 'is_visible ' => $ isVisible ? 1 : 0
9961018 ]);
9971019 $ count ++;
9981020 $ updated = true ;
@@ -1098,6 +1120,7 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
10981120 $ this ->climate ->whisper ("Processing XML item from page {$ currentPage }: {$ title } ( {$ url }) " );
10991121
11001122 $ imageUrl = $ this ->extractImageFromUrl ($ url );
1123+ $ isVisible = $ this ->subscriberTextShow ? true : !$ this ->isSubstackSubscriberOnly ($ url , $ content );
11011124
11021125 try {
11031126 DB ::insert ('feed_items ' , [
@@ -1108,7 +1131,8 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
11081131 'url ' => $ url ,
11091132 'image_url ' => $ imageUrl ,
11101133 'guid ' => $ guid ,
1111- 'published_at ' => $ date
1134+ 'published_at ' => $ date ,
1135+ 'is_visible ' => $ isVisible ? 1 : 0
11121136 ]);
11131137 $ count ++;
11141138 $ pageItemCount ++;
@@ -1154,6 +1178,119 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
11541178 }
11551179 $ this ->climate ->out ("Added {$ count } new items from XML feed: {$ feed ['title ' ]}" );
11561180 }
1181+ /**
1182+ * Check if content is Substack subscriber-only
1183+ */
1184+ private function isSubstackSubscriberOnly (string $ url , ?string $ content ): bool
1185+ {
1186+ if (stripos ($ url , 'substack.com ' ) === false ) {
1187+ return false ;
1188+ }
1189+
1190+ if (empty ($ content )) {
1191+ return false ;
1192+ }
1193+
1194+ $ subscriberPatterns = [
1195+ // English
1196+ '/This is exclusive content for subscribers/i ' ,
1197+ '/This post is for paid subscribers/i ' ,
1198+ '/This post is for paying subscribers/i ' ,
1199+ '/Subscribe to keep reading/i ' ,
1200+ '/Subscribe now to continue reading/i ' ,
1201+ '/Upgrade to paid/i ' ,
1202+ '/Subscribe to read the full story/i ' ,
1203+ '/This is a preview/i.*subscribe/i ' ,
1204+ '/Get \d+% off for \d+ year/i ' ,
1205+ '/Upgrade your subscription/i ' ,
1206+ '/subscribers only/i ' ,
1207+ '/Subscribe to unlock/i ' ,
1208+ '/Already a paying subscriber/i ' ,
1209+ '/Become a paid subscriber/i ' ,
1210+
1211+ // Spanish
1212+ '/Este es contenido exclusivo para suscriptores/i ' ,
1213+ '/Este contenido es para suscriptores/i ' ,
1214+ '/Esta publicación es para suscriptores/i ' ,
1215+ '/Suscríbete para seguir leyendo/i ' ,
1216+ '/Suscríbete para continuar leyendo/i ' ,
1217+ '/Actualiza a suscripción de pago/i ' ,
1218+ '/Suscríbete para leer la historia completa/i ' ,
1219+ '/solo para suscriptores/i ' ,
1220+ '/Suscríbete para desbloquear/i ' ,
1221+ '/Conviértete en suscriptor de pago/i ' ,
1222+ '/Actualiza tu suscripción/i ' ,
1223+ '/contenido exclusivo para suscriptores/i ' ,
1224+
1225+ // Portuguese
1226+ '/Este é um conteúdo exclusivo para os assinantes/i ' ,
1227+ '/Este conteúdo é para assinantes/i ' ,
1228+ '/Esta publicação é para assinantes/i ' ,
1229+ '/Assine para continuar lendo/i ' ,
1230+ '/Assine agora para continuar lendo/i ' ,
1231+ '/Atualize para assinatura paga/i ' ,
1232+ '/Assine para ler a história completa/i ' ,
1233+ '/apenas para assinantes/i ' ,
1234+ '/Assine para desbloquear/i ' ,
1235+ '/Torne-se um assinante pago/i ' ,
1236+ '/Atualize sua assinatura/i ' ,
1237+ '/conteúdo exclusivo para assinantes/i ' ,
1238+ '/somente assinantes/i '
1239+ ];
1240+
1241+ foreach ($ subscriberPatterns as $ pattern ) {
1242+ if (preg_match ($ pattern , $ content )) {
1243+ $ this ->climate ->whisper ("Detected Substack subscriber-only content in: {$ url }" );
1244+ return true ;
1245+ }
1246+ }
1247+
1248+ return false ;
1249+ }
1250+
1251+ /**
1252+ * Check all feed items for Substack subscriber-only content and mark them as invisible
1253+ */
1254+ public function checkSubscriberContent (): void
1255+ {
1256+ $ this ->climate ->info ("Checking all feed items for subscriber-only content... " );
1257+
1258+ $ items = DB ::query ("SELECT id, url, content FROM feed_items WHERE is_visible = 1 " );
1259+
1260+ if (empty ($ items )) {
1261+ $ this ->climate ->info ("No visible items found to check " );
1262+ return ;
1263+ }
1264+
1265+ $ totalItems = count ($ items );
1266+ $ this ->climate ->info ("Found {$ totalItems } visible items to check " );
1267+
1268+ $ markedInvisible = 0 ;
1269+ $ processed = 0 ;
1270+
1271+ foreach ($ items as $ item ) {
1272+ $ processed ++;
1273+
1274+ // Show progress every 100 items
1275+ if ($ processed % 100 === 0 ) {
1276+ $ this ->climate ->info ("Progress: {$ processed }/ {$ totalItems } items checked... " );
1277+ }
1278+
1279+ if ($ this ->isSubstackSubscriberOnly ($ item ['url ' ], $ item ['content ' ])) {
1280+ DB ::update ('feed_items ' , [
1281+ 'is_visible ' => 0
1282+ ], 'id=%i ' , $ item ['id ' ]);
1283+
1284+ $ markedInvisible ++;
1285+ $ this ->climate ->whisper ("Marked as invisible (ID: {$ item ['id ' ]}): {$ item ['url ' ]}" );
1286+ }
1287+ }
1288+
1289+ $ this ->climate ->green ("✓ Process complete! " );
1290+ $ this ->climate ->info ("Total items checked: {$ totalItems }" );
1291+ $ this ->climate ->info ("Items marked as invisible: {$ markedInvisible }" );
1292+ }
1293+
11571294 private function extractImageFromUrl (string $ url ): ?string
11581295 {
11591296 if (empty ($ url )) {
0 commit comments