@@ -1180,6 +1180,7 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
11801180 }
11811181 /**
11821182 * Check if content is Substack subscriber-only
1183+ * Detects the "Read more" link pattern at the end of truncated content
11831184 */
11841185 private function isSubstackSubscriberOnly (string $ url , ?string $ content ): bool
11851186 {
@@ -1191,56 +1192,40 @@ private function isSubstackSubscriberOnly(string $url, ?string $content): bool
11911192 return false ;
11921193 }
11931194
1194- $ subscriberPatterns = [
1195- // English
1196- '/This is exclusive content for subscribers/i ' ,
1197- '/This post is for paid subscribers/i ' ,
1198- '/This post is for paying subscribers/i ' ,
1199- '/Subscribe to keep reading/i ' ,
1200- '/Subscribe now to continue reading/i ' ,
1201- '/Upgrade to paid/i ' ,
1202- '/Subscribe to read the full story/i ' ,
1203- '/This is a preview/i.*subscribe/i ' ,
1204- '/Get \d+% off for \d+ year/i ' ,
1205- '/Upgrade your subscription/i ' ,
1206- '/subscribers only/i ' ,
1207- '/Subscribe to unlock/i ' ,
1208- '/Already a paying subscriber/i ' ,
1209- '/Become a paid subscriber/i ' ,
1210-
1211- // Spanish
1212- '/Este es contenido exclusivo para suscriptores/i ' ,
1213- '/Este contenido es para suscriptores/i ' ,
1214- '/Esta publicación es para suscriptores/i ' ,
1215- '/Suscríbete para seguir leyendo/i ' ,
1216- '/Suscríbete para continuar leyendo/i ' ,
1217- '/Actualiza a suscripción de pago/i ' ,
1218- '/Suscríbete para leer la historia completa/i ' ,
1219- '/solo para suscriptores/i ' ,
1220- '/Suscríbete para desbloquear/i ' ,
1221- '/Conviértete en suscriptor de pago/i ' ,
1222- '/Actualiza tu suscripción/i ' ,
1223- '/contenido exclusivo para suscriptores/i ' ,
1224-
1225- // Portuguese
1226- '/Este é um conteúdo exclusivo para os assinantes/i ' ,
1227- '/Este conteúdo é para assinantes/i ' ,
1228- '/Esta publicação é para assinantes/i ' ,
1229- '/Assine para continuar lendo/i ' ,
1230- '/Assine agora para continuar lendo/i ' ,
1231- '/Atualize para assinatura paga/i ' ,
1232- '/Assine para ler a história completa/i ' ,
1233- '/apenas para assinantes/i ' ,
1234- '/Assine para desbloquear/i ' ,
1235- '/Torne-se um assinante pago/i ' ,
1236- '/Atualize sua assinatura/i ' ,
1237- '/conteúdo exclusivo para assinantes/i ' ,
1238- '/somente assinantes/i '
1195+ // Remove whitespace and get the last 500 characters for checking
1196+ $ trimmedContent = trim ($ content );
1197+ $ endContent = substr ($ trimmedContent , -500 );
1198+
1199+ // Pattern 1: Check for "Read more" link pointing to substack.com at the end
1200+ // Example: <p> <a href="https://example.substack.com/p/post-name"> Read more </a> </p>
1201+ $ readMorePattern = '/<p>\s*<a\s+href=[" \']https?:\/\/[^" \']*\.?substack\.com[^" \']*[" \']>\s*Read more\s*<\/a>\s*<\/p>\s*$/i ' ;
1202+
1203+ if (preg_match ($ readMorePattern , $ endContent )) {
1204+ $ this ->climate ->whisper ("Detected Substack subscriber-only content (Read more link) in: {$ url }" );
1205+ return true ;
1206+ }
1207+
1208+ // Pattern 2: Alternative "Read more" patterns without strict spacing
1209+ $ readMorePattern2 = '/<p[^>]*>\s*<a[^>]+href=[" \']https?:\/\/[^" \']*\.?substack\.com[^" \']*[" \'][^>]*>\s*Read\s+more\s*<\/a>\s*<\/p>\s*$/i ' ;
1210+
1211+ if (preg_match ($ readMorePattern2 , $ endContent )) {
1212+ $ this ->climate ->whisper ("Detected Substack subscriber-only content (Read more link alt) in: {$ url }" );
1213+ return true ;
1214+ }
1215+
1216+ // Pattern 3: Check for common subscriber-only content indicators as fallback
1217+ $ subscriberIndicators = [
1218+ '/Este (?:é um )?conteúdo exclusivo para (?:os )?assinantes/i ' ,
1219+ '/This is (?:a |an )?(?:exclusive )?content for (?:paid )?subscribers/i ' ,
1220+ '/Este(?:s)? (?:es|é) contenido exclusivo para suscriptores/i ' ,
1221+ '/Subscribe (?:now )?to (?:keep |continue )?reading/i ' ,
1222+ '/Assine (?:agora )?para (?:continuar |seguir )?lendo/i ' ,
1223+ '/Suscr[ií]bete (?:ahora )?para (?:seguir |continuar )?leyendo/i '
12391224 ];
12401225
1241- foreach ($ subscriberPatterns as $ pattern ) {
1226+ foreach ($ subscriberIndicators as $ pattern ) {
12421227 if (preg_match ($ pattern , $ content )) {
1243- $ this ->climate ->whisper ("Detected Substack subscriber-only content in: {$ url }" );
1228+ $ this ->climate ->whisper ("Detected Substack subscriber-only content (text indicator) in: {$ url }" );
12441229 return true ;
12451230 }
12461231 }
0 commit comments