Skip to content

Commit ebe5b27

Browse files
committed
Merge branch 'dev' into issue-23871
2 parents 8c75745 + 1d54599 commit ebe5b27

File tree

863 files changed

+13308
-1924
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

863 files changed

+13308
-1924
lines changed
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
import os
2+
import sys
3+
import re
4+
import json
5+
from openai import OpenAI
6+
7+
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
8+
9+
# Regex patterns as constants
10+
SEO_BLOCK_PATTERN = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+'
11+
SEO_BLOCK_WITH_BACKTICKS_PATTERN = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1'
12+
13+
def has_seo_description(content):
14+
"""Check if content already has SEO description with Description field"""
15+
match = re.search(SEO_BLOCK_PATTERN, content, flags=re.DOTALL)
16+
17+
if not match:
18+
return False
19+
20+
try:
21+
json_str = match.group(1)
22+
seo_data = json.loads(json_str)
23+
return 'Description' in seo_data and seo_data['Description']
24+
except json.JSONDecodeError:
25+
return False
26+
27+
def has_seo_block(content):
28+
"""Check if content has any SEO block (with or without Description)"""
29+
return bool(re.search(SEO_BLOCK_PATTERN, content, flags=re.DOTALL))
30+
31+
def remove_seo_blocks(content):
32+
"""Remove all SEO description blocks from content"""
33+
return re.sub(SEO_BLOCK_PATTERN + r'\s*', '', content, flags=re.DOTALL)
34+
35+
def is_content_too_short(content, min_length=200):
36+
"""Check if content is less than minimum length (excluding SEO blocks)"""
37+
clean_content = remove_seo_blocks(content)
38+
return len(clean_content.strip()) < min_length
39+
40+
def get_content_preview(content, max_length=1000):
41+
"""Get preview of content for OpenAI (excluding SEO blocks)"""
42+
clean_content = remove_seo_blocks(content)
43+
return clean_content[:max_length].strip()
44+
45+
def escape_json_string(text):
46+
"""Escape special characters for JSON"""
47+
return text.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
48+
49+
def create_seo_block(description):
50+
"""Create a new SEO block with the given description"""
51+
escaped_desc = escape_json_string(description)
52+
return f'''```json
53+
//[doc-seo]
54+
{{
55+
"Description": "{escaped_desc}"
56+
}}
57+
```
58+
59+
'''
60+
61+
def generate_description(content, filename):
62+
"""Generate SEO description using OpenAI"""
63+
try:
64+
preview = get_content_preview(content)
65+
66+
response = client.chat.completions.create(
67+
model="gpt-4o-mini",
68+
messages=[
69+
{"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches.
70+
Highlight the value for developers using ABP Framework.
71+
Be written in a friendly and professional tone.
72+
Stay under 150 characters.
73+
--> https://abp.io/docs/latest <--"""},
74+
{"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page.
75+
76+
File: {filename}
77+
Content Preview:
78+
{preview}
79+
80+
Requirements:
81+
- Maximum 150 characters
82+
83+
Generate only the description text, nothing else:"""}
84+
],
85+
max_tokens=150,
86+
temperature=0.7
87+
)
88+
89+
description = response.choices[0].message.content.strip()
90+
return description
91+
except Exception as e:
92+
print(f"❌ Error generating description: {e}")
93+
return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation."
94+
95+
def update_seo_description(content, description):
96+
"""Update existing SEO block with new description"""
97+
match = re.search(SEO_BLOCK_WITH_BACKTICKS_PATTERN, content, flags=re.DOTALL)
98+
99+
if not match:
100+
return None
101+
102+
backticks = match.group(1)
103+
json_str = match.group(2)
104+
105+
try:
106+
seo_data = json.loads(json_str)
107+
seo_data['Description'] = description
108+
updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False)
109+
110+
new_block = f'''{backticks}json
111+
//[doc-seo]
112+
{updated_json}
113+
{backticks}'''
114+
115+
return re.sub(SEO_BLOCK_WITH_BACKTICKS_PATTERN, new_block, content, count=1, flags=re.DOTALL)
116+
except json.JSONDecodeError:
117+
return None
118+
119+
def add_seo_description(content, description):
120+
"""Add or update SEO description in content"""
121+
# Try to update existing block first
122+
updated_content = update_seo_description(content, description)
123+
if updated_content:
124+
return updated_content
125+
126+
# No existing block or update failed, add new block at the beginning
127+
return create_seo_block(description) + content
128+
129+
def is_file_ignored(filepath, ignored_folders):
130+
"""Check if file is in an ignored folder"""
131+
path_parts = filepath.split('/')
132+
return any(ignored in path_parts for ignored in ignored_folders)
133+
134+
def get_changed_files():
135+
"""Get changed files from command line or environment variable"""
136+
if len(sys.argv) > 1:
137+
return sys.argv[1:]
138+
139+
changed_files_str = os.environ.get('CHANGED_FILES', '')
140+
return [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()]
141+
142+
def process_file(filepath, ignored_folders):
143+
"""Process a single markdown file. Returns (processed, skipped, skip_reason)"""
144+
if not filepath.endswith('.md'):
145+
return False, False, None
146+
147+
# Check if file is in ignored folder
148+
if is_file_ignored(filepath, ignored_folders):
149+
print(f"📄 Processing: {filepath}")
150+
print(f" 🚫 Skipped (ignored folder)\n")
151+
return False, True, 'ignored'
152+
153+
print(f"📄 Processing: {filepath}")
154+
155+
try:
156+
# Read file with original line endings
157+
with open(filepath, 'r', encoding='utf-8', newline='') as f:
158+
content = f.read()
159+
160+
# Check if content is too short
161+
if is_content_too_short(content):
162+
print(f" ⏭️ Skipped (content less than 200 characters)\n")
163+
return False, True, 'too_short'
164+
165+
# Check if already has SEO description
166+
if has_seo_description(content):
167+
print(f" ⏭️ Skipped (already has SEO description)\n")
168+
return False, True, 'has_description'
169+
170+
# Generate description
171+
filename = os.path.basename(filepath)
172+
print(f" 🤖 Generating description...")
173+
description = generate_description(content, filename)
174+
print(f" 💡 Generated: {description}")
175+
176+
# Add or update SEO description
177+
if has_seo_block(content):
178+
print(f" 🔄 Updating existing SEO block...")
179+
else:
180+
print(f" ➕ Adding new SEO block...")
181+
182+
updated_content = add_seo_description(content, description)
183+
184+
# Write back (preserving line endings)
185+
with open(filepath, 'w', encoding='utf-8', newline='') as f:
186+
f.write(updated_content)
187+
188+
print(f" ✅ Updated successfully\n")
189+
return True, False, None
190+
191+
except Exception as e:
192+
print(f" ❌ Error: {e}\n")
193+
return False, False, None
194+
195+
def save_statistics(processed_count, skipped_count, skipped_too_short, skipped_ignored):
196+
"""Save processing statistics to file"""
197+
try:
198+
with open('/tmp/seo_stats.txt', 'w') as f:
199+
f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}")
200+
except Exception as e:
201+
print(f"⚠️ Warning: Could not save statistics: {e}")
202+
203+
def save_updated_files(updated_files):
204+
"""Save list of updated files"""
205+
try:
206+
with open('/tmp/seo_updated_files.txt', 'w') as f:
207+
f.write('\n'.join(updated_files))
208+
except Exception as e:
209+
print(f"⚠️ Warning: Could not save updated files list: {e}")
210+
211+
def main():
212+
# Get ignored folders from environment
213+
IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources')
214+
IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()]
215+
216+
# Get changed files
217+
changed_files = get_changed_files()
218+
219+
# Statistics
220+
processed_count = 0
221+
skipped_count = 0
222+
skipped_too_short = 0
223+
skipped_ignored = 0
224+
updated_files = []
225+
226+
print("🤖 Processing changed markdown files...\n")
227+
print(f"� Ignored folders: {', '.join(IGNORED_FOLDERS)}\n")
228+
229+
# Process each file
230+
for filepath in changed_files:
231+
processed, skipped, skip_reason = process_file(filepath, IGNORED_FOLDERS)
232+
233+
if processed:
234+
processed_count += 1
235+
updated_files.append(filepath)
236+
elif skipped:
237+
skipped_count += 1
238+
if skip_reason == 'too_short':
239+
skipped_too_short += 1
240+
elif skip_reason == 'ignored':
241+
skipped_ignored += 1
242+
243+
# Print summary
244+
print(f"\n📊 Summary:")
245+
print(f" ✅ Updated: {processed_count}")
246+
print(f" ⏭️ Skipped (total): {skipped_count}")
247+
print(f" ⏭️ Skipped (too short): {skipped_too_short}")
248+
print(f" 🚫 Skipped (ignored folder): {skipped_ignored}")
249+
250+
# Save statistics
251+
save_statistics(processed_count, skipped_count, skipped_too_short, skipped_ignored)
252+
save_updated_files(updated_files)
253+
254+
if __name__ == '__main__':
255+
main()

0 commit comments

Comments
 (0)