Skip to content

Commit b016782

Browse files
authored
Merge pull request #23952 from abpframework/salihozkara-patch-2
Create workflow to auto-add SEO descriptions
2 parents 87514d4 + cf4289b commit b016782

File tree

3 files changed

+413
-1
lines changed

3 files changed

+413
-1
lines changed
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
import os
2+
import sys
3+
import re
4+
from openai import OpenAI
5+
6+
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
7+
8+
def has_seo_description(content):
9+
"""Check if content already has SEO description with Description field"""
10+
import json
11+
12+
# Match SEO description block with 3 or more backticks
13+
pattern = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+'
14+
match = re.search(pattern, content, flags=re.DOTALL)
15+
16+
if not match:
17+
return False
18+
19+
# Check if Description field exists and is not empty
20+
try:
21+
json_str = match.group(1)
22+
seo_data = json.loads(json_str)
23+
return 'Description' in seo_data and seo_data['Description']
24+
except json.JSONDecodeError:
25+
return False
26+
27+
def is_content_too_short(content):
28+
"""Check if content is less than 200 characters"""
29+
# Remove SEO tags if present for accurate count
30+
# Match SEO description block with 3 or more backticks
31+
clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
32+
33+
return len(clean_content.strip()) < 200
34+
35+
def get_content_preview(content, max_length=1000):
36+
"""Get preview of content for OpenAI"""
37+
# Remove existing SEO tags if present
38+
# Match SEO description block with 3 or more backticks
39+
clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
40+
41+
return clean_content[:max_length].strip()
42+
43+
def generate_description(content, filename):
44+
"""Generate SEO description using OpenAI with system prompt from OpenAIService.cs"""
45+
try:
46+
preview = get_content_preview(content)
47+
48+
response = client.chat.completions.create(
49+
model="gpt-4o-mini",
50+
messages=[
51+
{"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches.
52+
Highlight the value for developers using ABP Framework.
53+
Be written in a friendly and professional tone.
54+
Stay under 150 characters.
55+
--> https://abp.io/docs/latest <--"""},
56+
{"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page.
57+
58+
File: {filename}
59+
Content Preview:
60+
{preview}
61+
62+
Requirements:
63+
- Maximum 150 characters
64+
65+
Generate only the description text, nothing else:"""}
66+
],
67+
max_tokens=150,
68+
temperature=0.7
69+
)
70+
71+
description = response.choices[0].message.content.strip()
72+
73+
return description
74+
except Exception as e:
75+
print(f"❌ Error generating description: {e}")
76+
return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation."
77+
78+
def add_seo_description(content, description):
79+
"""Add or update SEO description in content"""
80+
import json
81+
82+
# Escape special characters for JSON
83+
escaped_desc = description.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
84+
85+
# Check if SEO block already exists
86+
pattern = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1'
87+
match = re.search(pattern, content, flags=re.DOTALL)
88+
89+
if match:
90+
# SEO block exists, update Description field
91+
backticks = match.group(1)
92+
json_str = match.group(2)
93+
94+
try:
95+
# Parse existing JSON
96+
seo_data = json.loads(json_str)
97+
# Update Description
98+
seo_data['Description'] = description
99+
# Convert back to formatted JSON
100+
updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False)
101+
102+
# Replace the old block with updated one
103+
new_block = f'''{backticks}json
104+
//[doc-seo]
105+
{updated_json}
106+
{backticks}'''
107+
108+
return re.sub(pattern, new_block, content, count=1, flags=re.DOTALL)
109+
except json.JSONDecodeError:
110+
# If JSON is invalid, replace the whole block
111+
pass
112+
113+
# No existing block or invalid JSON, add new block at the beginning
114+
seo_tag = f'''```json
115+
//[doc-seo]
116+
{{
117+
"Description": "{escaped_desc}"
118+
}}
119+
```
120+
121+
'''
122+
return seo_tag + content
123+
124+
def is_file_ignored(filepath, ignored_folders):
125+
"""Check if file is in an ignored folder"""
126+
path_parts = filepath.split('/')
127+
for ignored in ignored_folders:
128+
if ignored in path_parts:
129+
return True
130+
return False
131+
132+
def main():
133+
# Ignored folders from GitHub variable (or default values)
134+
IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources')
135+
IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()]
136+
137+
# Get changed files from environment or command line
138+
if len(sys.argv) > 1:
139+
# Files passed as command line arguments
140+
changed_files = sys.argv[1:]
141+
else:
142+
# Files from environment variable (for GitHub Actions)
143+
changed_files_str = os.environ.get('CHANGED_FILES', '')
144+
changed_files = [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()]
145+
146+
processed_count = 0
147+
skipped_count = 0
148+
skipped_too_short = 0
149+
skipped_ignored = 0
150+
updated_files = [] # Track actually updated files
151+
152+
print("🤖 Processing changed markdown files...\n")
153+
print(f"🚫 Ignored folders: {', '.join(IGNORED_FOLDERS)}\n")
154+
155+
for filepath in changed_files:
156+
if not filepath.endswith('.md'):
157+
continue
158+
159+
# Check if file is in ignored folder
160+
if is_file_ignored(filepath, IGNORED_FOLDERS):
161+
print(f"📄 Processing: {filepath}")
162+
print(f" 🚫 Skipped (ignored folder)\n")
163+
skipped_ignored += 1
164+
skipped_count += 1
165+
continue
166+
167+
print(f"📄 Processing: {filepath}")
168+
169+
try:
170+
# Read file
171+
with open(filepath, 'r', encoding='utf-8') as f:
172+
content = f.read()
173+
174+
# Check if content is too short (less than 200 characters)
175+
if is_content_too_short(content):
176+
print(f" ⏭️ Skipped (content less than 200 characters)\n")
177+
skipped_too_short += 1
178+
skipped_count += 1
179+
continue
180+
181+
# Check if already has SEO description
182+
if has_seo_description(content):
183+
print(f" ⏭️ Skipped (already has SEO description)\n")
184+
skipped_count += 1
185+
continue
186+
187+
# Generate description
188+
filename = os.path.basename(filepath)
189+
print(f" 🤖 Generating description...")
190+
description = generate_description(content, filename)
191+
print(f" 💡 Generated: {description}")
192+
193+
# Add SEO tag
194+
updated_content = add_seo_description(content, description)
195+
196+
# Write back
197+
with open(filepath, 'w', encoding='utf-8') as f:
198+
f.write(updated_content)
199+
200+
print(f" ✅ Updated successfully\n")
201+
processed_count += 1
202+
updated_files.append(filepath) # Track this file as updated
203+
204+
except Exception as e:
205+
print(f" ❌ Error: {e}\n")
206+
207+
print(f"\n📊 Summary:")
208+
print(f" ✅ Updated: {processed_count}")
209+
print(f" ⏭️ Skipped (total): {skipped_count}")
210+
print(f" ⏭️ Skipped (too short): {skipped_too_short}")
211+
print(f" 🚫 Skipped (ignored folder): {skipped_ignored}")
212+
213+
# Save counts and updated files list for next step
214+
with open('/tmp/seo_stats.txt', 'w') as f:
215+
f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}")
216+
217+
# Save updated files list
218+
with open('/tmp/seo_updated_files.txt', 'w') as f:
219+
f.write('\n'.join(updated_files))
220+
221+
if __name__ == '__main__':
222+
main()

0 commit comments

Comments
 (0)