Skip to content

Commit 417637b

Browse files
committed
subscriber only check
1 parent aa9fac9 commit 417637b

File tree

3 files changed

+155
-10
lines changed

3 files changed

+155
-10
lines changed

app/.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ADMIN_EMAIL=
1515
PROXY_LIST=
1616

1717
FEED_WORKERS=1
18+
SUBSCRIBER_SHOW_POST=false
1819

1920
SMTP_HOST=smtp.resend.com
2021
SMTP_PORT=587

app/bin/lerama

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ switch ($command) {
6969
$processor->checkPausedFeeds();
7070
break;
7171

72+
case 'feed:subscribercheck':
73+
// Check all feed items for subscriber-only content
74+
$processor->checkSubscriberContent();
75+
break;
76+
7277
case 'proxy:update':
7378
// Update proxy list
7479
$climate->info("Updating proxy list...");
@@ -107,6 +112,7 @@ function showUsage(CLImate $climate): void
107112
$climate->out(" php bin/lerama feed:process [--parallel|-p N] Process all feeds (optionally with N parallel workers, max 10)");
108113
$climate->out(" php bin/lerama feed:id {ID_DO_FEED} Process a specific feed by ID");
109114
$climate->out(" php bin/lerama feed:check-status Check and update status of paused feeds");
115+
$climate->out(" php bin/lerama feed:subscribercheck Check all feed items for Substack subscriber-only content");
110116
$climate->out(" php bin/lerama feed:import <CSV_FILE> Import feeds from CSV file (columns: url, tags, category)");
111117
$climate->out(" php bin/lerama proxy:update Update proxy list from PROXY_LIST URL");
112118
$climate->out("");
@@ -115,4 +121,5 @@ function showUsage(CLImate $climate): void
115121
$climate->out(" php bin/lerama feed:process --parallel 5 Process feeds with 5 parallel workers");
116122
$climate->out(" php bin/lerama feed:process -p 3 Process feeds with 3 parallel workers");
117123
$climate->out(" php bin/lerama feed:import feeds.csv Import feeds from CSV file");
124+
$climate->out(" php bin/lerama feed:subscribercheck Check for subscriber-only content");
118125
}

app/src/Commands/FeedProcessor.php

Lines changed: 147 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class FeedProcessor
2121
private ProxyService $proxyService;
2222
private EmailService $emailService;
2323
private array $defaultClientConfig;
24+
private bool $subscriberTextShow;
2425

2526
public function __construct(CLImate $climate)
2627
{
@@ -31,6 +32,11 @@ public function __construct(CLImate $climate)
3132
$this->defaultClientConfig = HttpClientConfig::getDefaultConfig();
3233

3334
$this->httpClient = new \GuzzleHttp\Client($this->defaultClientConfig);
35+
36+
$this->subscriberTextShow = filter_var(
37+
$_ENV['SUBSCRIBER_SHOW_POST'] ?? 'false',
38+
FILTER_VALIDATE_BOOLEAN
39+
);
3440
}
3541

3642
public function process(?int $feedId = null, int $parallel = 1): void
@@ -315,6 +321,7 @@ private function processRssFeed(array $feed): void
315321
$date = $item->get_date('Y-m-d H:i:s') ?: date('Y-m-d H:i:s');
316322

317323
$imageUrl = $this->extractImageFromUrl($url);
324+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
318325

319326
try {
320327
DB::insert('feed_items', [
@@ -325,7 +332,8 @@ private function processRssFeed(array $feed): void
325332
'url' => $url,
326333
'image_url' => $imageUrl,
327334
'guid' => $guid,
328-
'published_at' => $date
335+
'published_at' => $date,
336+
'is_visible' => $isVisible ? 1 : 0
329337
]);
330338
$count++;
331339
$updated = true;
@@ -415,6 +423,7 @@ private function processPaginatedRssFeed(array $feed, SimplePie $simplePie, &$co
415423
$date = $item->get_date('Y-m-d H:i:s') ?: date('Y-m-d H:i:s');
416424

417425
$imageUrl = $this->extractImageFromUrl($url);
426+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
418427

419428
try {
420429
DB::insert('feed_items', [
@@ -425,7 +434,8 @@ private function processPaginatedRssFeed(array $feed, SimplePie $simplePie, &$co
425434
'url' => $url,
426435
'image_url' => $imageUrl,
427436
'guid' => $guid,
428-
'published_at' => $date
437+
'published_at' => $date,
438+
'is_visible' => $isVisible ? 1 : 0
429439
]);
430440
$count++;
431441
$updated = true;
@@ -562,17 +572,20 @@ private function processCsvFeed(array $feed): void
562572
$this->climate->whisper("Processing item: {$title} ({$url})");
563573

564574
$imageUrl = $this->extractImageFromUrl($url);
575+
$content = $contentIndex !== false && isset($data[$contentIndex]) ? $data[$contentIndex] : null;
576+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
565577

566578
try {
567579
DB::insert('feed_items', [
568580
'feed_id' => $feed['id'],
569581
'title' => $title,
570582
'author' => $authorIndex !== false && isset($data[$authorIndex]) ? $data[$authorIndex] : null,
571-
'content' => $contentIndex !== false && isset($data[$contentIndex]) ? $data[$contentIndex] : null,
583+
'content' => $content,
572584
'url' => $url,
573585
'image_url' => $imageUrl,
574586
'guid' => $guid,
575-
'published_at' => $dateIndex !== false && isset($data[$dateIndex]) ? $data[$dateIndex] : date('Y-m-d H:i:s')
587+
'published_at' => $dateIndex !== false && isset($data[$dateIndex]) ? $data[$dateIndex] : date('Y-m-d H:i:s'),
588+
'is_visible' => $isVisible ? 1 : 0
576589
]);
577590
$count++;
578591
$updated = true;
@@ -683,17 +696,20 @@ private function processPaginatedCsvFeed(array $feed, &$count, &$updated, &$last
683696
$this->climate->whisper("Processing item from page {$currentPage}: {$title} ({$url})");
684697

685698
$imageUrl = $this->extractImageFromUrl($url);
699+
$content = $contentIndex !== false && isset($data[$contentIndex]) ? $data[$contentIndex] : null;
700+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
686701

687702
try {
688703
DB::insert('feed_items', [
689704
'feed_id' => $feed['id'],
690705
'title' => $title,
691706
'author' => $authorIndex !== false && isset($data[$authorIndex]) ? $data[$authorIndex] : null,
692-
'content' => $contentIndex !== false && isset($data[$contentIndex]) ? $data[$contentIndex] : null,
707+
'content' => $content,
693708
'url' => $url,
694709
'image_url' => $imageUrl,
695710
'guid' => $guid,
696-
'published_at' => $dateIndex !== false && isset($data[$dateIndex]) ? $data[$dateIndex] : date('Y-m-d H:i:s')
711+
'published_at' => $dateIndex !== false && isset($data[$dateIndex]) ? $data[$dateIndex] : date('Y-m-d H:i:s'),
712+
'is_visible' => $isVisible ? 1 : 0
697713
]);
698714
$count++;
699715
$pageItemCount++;
@@ -787,6 +803,7 @@ private function processJsonFeed(array $feed): void
787803
$this->climate->whisper("Processing JSON item: {$title} ({$url})");
788804

789805
$imageUrl = $this->extractImageFromUrl($url);
806+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
790807

791808
try {
792809
DB::insert('feed_items', [
@@ -797,7 +814,8 @@ private function processJsonFeed(array $feed): void
797814
'url' => $url,
798815
'image_url' => $imageUrl,
799816
'guid' => $guid,
800-
'published_at' => $date
817+
'published_at' => $date,
818+
'is_visible' => $isVisible ? 1 : 0
801819
]);
802820
$count++;
803821
$updated = true;
@@ -885,6 +903,7 @@ private function processPaginatedJsonFeed(array $feed, string $nextPageUrl, &$co
885903
$this->climate->whisper("Processing JSON item from page {$currentPage}: {$title} ({$url})");
886904

887905
$imageUrl = $this->extractImageFromUrl($url);
906+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
888907

889908
try {
890909
DB::insert('feed_items', [
@@ -895,7 +914,8 @@ private function processPaginatedJsonFeed(array $feed, string $nextPageUrl, &$co
895914
'url' => $url,
896915
'image_url' => $imageUrl,
897916
'guid' => $guid,
898-
'published_at' => $date
917+
'published_at' => $date,
918+
'is_visible' => $isVisible ? 1 : 0
899919
]);
900920
$count++;
901921
$pageItemCount++;
@@ -982,6 +1002,7 @@ private function processXmlFeed(array $feed): void
9821002
$this->climate->whisper("Processing XML item: {$title} ({$url})");
9831003

9841004
$imageUrl = $this->extractImageFromUrl($url);
1005+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
9851006

9861007
try {
9871008
DB::insert('feed_items', [
@@ -992,7 +1013,8 @@ private function processXmlFeed(array $feed): void
9921013
'url' => $url,
9931014
'image_url' => $imageUrl,
9941015
'guid' => $guid,
995-
'published_at' => $date
1016+
'published_at' => $date,
1017+
'is_visible' => $isVisible ? 1 : 0
9961018
]);
9971019
$count++;
9981020
$updated = true;
@@ -1098,6 +1120,7 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
10981120
$this->climate->whisper("Processing XML item from page {$currentPage}: {$title} ({$url})");
10991121

11001122
$imageUrl = $this->extractImageFromUrl($url);
1123+
$isVisible = $this->subscriberTextShow ? true : !$this->isSubstackSubscriberOnly($url, $content);
11011124

11021125
try {
11031126
DB::insert('feed_items', [
@@ -1108,7 +1131,8 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
11081131
'url' => $url,
11091132
'image_url' => $imageUrl,
11101133
'guid' => $guid,
1111-
'published_at' => $date
1134+
'published_at' => $date,
1135+
'is_visible' => $isVisible ? 1 : 0
11121136
]);
11131137
$count++;
11141138
$pageItemCount++;
@@ -1154,6 +1178,119 @@ private function processPaginatedXmlFeed(array $feed, \SimpleXMLElement $xml, &$
11541178
}
11551179
$this->climate->out("Added {$count} new items from XML feed: {$feed['title']}");
11561180
}
1181+
/**
1182+
* Check if content is Substack subscriber-only
1183+
*/
1184+
private function isSubstackSubscriberOnly(string $url, ?string $content): bool
1185+
{
1186+
if (stripos($url, 'substack.com') === false) {
1187+
return false;
1188+
}
1189+
1190+
if (empty($content)) {
1191+
return false;
1192+
}
1193+
1194+
$subscriberPatterns = [
1195+
// English
1196+
'/This is exclusive content for subscribers/i',
1197+
'/This post is for paid subscribers/i',
1198+
'/This post is for paying subscribers/i',
1199+
'/Subscribe to keep reading/i',
1200+
'/Subscribe now to continue reading/i',
1201+
'/Upgrade to paid/i',
1202+
'/Subscribe to read the full story/i',
1203+
'/This is a preview/i.*subscribe/i',
1204+
'/Get \d+% off for \d+ year/i',
1205+
'/Upgrade your subscription/i',
1206+
'/subscribers only/i',
1207+
'/Subscribe to unlock/i',
1208+
'/Already a paying subscriber/i',
1209+
'/Become a paid subscriber/i',
1210+
1211+
// Spanish
1212+
'/Este es contenido exclusivo para suscriptores/i',
1213+
'/Este contenido es para suscriptores/i',
1214+
'/Esta publicación es para suscriptores/i',
1215+
'/Suscríbete para seguir leyendo/i',
1216+
'/Suscríbete para continuar leyendo/i',
1217+
'/Actualiza a suscripción de pago/i',
1218+
'/Suscríbete para leer la historia completa/i',
1219+
'/solo para suscriptores/i',
1220+
'/Suscríbete para desbloquear/i',
1221+
'/Conviértete en suscriptor de pago/i',
1222+
'/Actualiza tu suscripción/i',
1223+
'/contenido exclusivo para suscriptores/i',
1224+
1225+
// Portuguese
1226+
'/Este é um conteúdo exclusivo para os assinantes/i',
1227+
'/Este conteúdo é para assinantes/i',
1228+
'/Esta publicação é para assinantes/i',
1229+
'/Assine para continuar lendo/i',
1230+
'/Assine agora para continuar lendo/i',
1231+
'/Atualize para assinatura paga/i',
1232+
'/Assine para ler a história completa/i',
1233+
'/apenas para assinantes/i',
1234+
'/Assine para desbloquear/i',
1235+
'/Torne-se um assinante pago/i',
1236+
'/Atualize sua assinatura/i',
1237+
'/conteúdo exclusivo para assinantes/i',
1238+
'/somente assinantes/i'
1239+
];
1240+
1241+
foreach ($subscriberPatterns as $pattern) {
1242+
if (preg_match($pattern, $content)) {
1243+
$this->climate->whisper("Detected Substack subscriber-only content in: {$url}");
1244+
return true;
1245+
}
1246+
}
1247+
1248+
return false;
1249+
}
1250+
1251+
/**
1252+
* Check all feed items for Substack subscriber-only content and mark them as invisible
1253+
*/
1254+
public function checkSubscriberContent(): void
1255+
{
1256+
$this->climate->info("Checking all feed items for subscriber-only content...");
1257+
1258+
$items = DB::query("SELECT id, url, content FROM feed_items WHERE is_visible = 1");
1259+
1260+
if (empty($items)) {
1261+
$this->climate->info("No visible items found to check");
1262+
return;
1263+
}
1264+
1265+
$totalItems = count($items);
1266+
$this->climate->info("Found {$totalItems} visible items to check");
1267+
1268+
$markedInvisible = 0;
1269+
$processed = 0;
1270+
1271+
foreach ($items as $item) {
1272+
$processed++;
1273+
1274+
// Show progress every 100 items
1275+
if ($processed % 100 === 0) {
1276+
$this->climate->info("Progress: {$processed}/{$totalItems} items checked...");
1277+
}
1278+
1279+
if ($this->isSubstackSubscriberOnly($item['url'], $item['content'])) {
1280+
DB::update('feed_items', [
1281+
'is_visible' => 0
1282+
], 'id=%i', $item['id']);
1283+
1284+
$markedInvisible++;
1285+
$this->climate->whisper("Marked as invisible (ID: {$item['id']}): {$item['url']}");
1286+
}
1287+
}
1288+
1289+
$this->climate->green("✓ Process complete!");
1290+
$this->climate->info("Total items checked: {$totalItems}");
1291+
$this->climate->info("Items marked as invisible: {$markedInvisible}");
1292+
}
1293+
11571294
private function extractImageFromUrl(string $url): ?string
11581295
{
11591296
if (empty($url)) {

0 commit comments

Comments
 (0)