abpframework
diff --git a/‎.github/scripts/add_seo_descriptions.py‎
Lines changed: 222 additions & 0 deletions b/‎.github/scripts/add_seo_descriptions.py‎
Lines changed: 222 additions & 0 deletions
@@ -0,0 +1,222 @@
+import os
+import sys
+import re
+from openai import OpenAI
+
+client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
+
+def has_seo_description(content):
+    """Check if content already has SEO description with Description field"""
+    import json
+    
+    # Match SEO description block with 3 or more backticks
+    pattern = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+'
+    match = re.search(pattern, content, flags=re.DOTALL)
+    
+    if not match:
+        return False
+    
+    # Check if Description field exists and is not empty
+    try:
+        json_str = match.group(1)
+        seo_data = json.loads(json_str)
+        return 'Description' in seo_data and seo_data['Description']
+    except json.JSONDecodeError:
+        return False
+
+def is_content_too_short(content):
+    """Check if content is less than 200 characters"""
+    # Remove SEO tags if present for accurate count
+    # Match SEO description block with 3 or more backticks
+    clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
+    
+    return len(clean_content.strip()) < 200
+
+def get_content_preview(content, max_length=1000):
+    """Get preview of content for OpenAI"""
+    # Remove existing SEO tags if present
+    # Match SEO description block with 3 or more backticks
+    clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL)
+    
+    return clean_content[:max_length].strip()
+
+def generate_description(content, filename):
+    """Generate SEO description using OpenAI with system prompt from OpenAIService.cs"""
+    try:
+        preview = get_content_preview(content)
+        
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches.
+Highlight the value for developers using ABP Framework.
+Be written in a friendly and professional tone.
+Stay under 150 characters.
+--> https://abp.io/docs/latest <--"""},
+                {"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page.
+
+File: {filename}
+Content Preview:
+{preview}
+
+Requirements:
+- Maximum 150 characters
+
+Generate only the description text, nothing else:"""}
+            ],
+            max_tokens=150,
+            temperature=0.7
+        )
+        
+        description = response.choices[0].message.content.strip()
+        
+        return description
+    except Exception as e:
+        print(f"❌ Error generating description: {e}")
+        return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation."
+
+def add_seo_description(content, description):
+    """Add or update SEO description in content"""
+    import json
+    
+    # Escape special characters for JSON
+    escaped_desc = description.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+    
+    # Check if SEO block already exists
+    pattern = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1'
+    match = re.search(pattern, content, flags=re.DOTALL)
+    
+    if match:
+        # SEO block exists, update Description field
+        backticks = match.group(1)
+        json_str = match.group(2)
+        
+        try:
+            # Parse existing JSON
+            seo_data = json.loads(json_str)
+            # Update Description
+            seo_data['Description'] = description
+            # Convert back to formatted JSON
+            updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False)
+            
+            # Replace the old block with updated one
+            new_block = f'''{backticks}json
+//[doc-seo]
+{updated_json}
+{backticks}'''
+            
+            return re.sub(pattern, new_block, content, count=1, flags=re.DOTALL)
+        except json.JSONDecodeError:
+            # If JSON is invalid, replace the whole block
+            pass
+    
+    # No existing block or invalid JSON, add new block at the beginning
+    seo_tag = f'''```json
+//[doc-seo]
+{{
+    "Description": "{escaped_desc}"
+}}
+```
+
+'''
+    return seo_tag + content
+
+def is_file_ignored(filepath, ignored_folders):
+    """Check if file is in an ignored folder"""
+    path_parts = filepath.split('/')
+    for ignored in ignored_folders:
+        if ignored in path_parts:
+            return True
+    return False
+
+def main():
+    # Ignored folders from GitHub variable (or default values)
+    IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources')
+    IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()]
+    
+    # Get changed files from environment or command line
+    if len(sys.argv) > 1:
+        # Files passed as command line arguments
+        changed_files = sys.argv[1:]
+    else:
+        # Files from environment variable (for GitHub Actions)
+        changed_files_str = os.environ.get('CHANGED_FILES', '')
+        changed_files = [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()]
+    
+    processed_count = 0
+    skipped_count = 0
+    skipped_too_short = 0
+    skipped_ignored = 0
+    updated_files = []  # Track actually updated files
+    
+    print("🤖 Processing changed markdown files...\n")
+    print(f"🚫 Ignored folders: {', '.join(IGNORED_FOLDERS)}\n")
+    
+    for filepath in changed_files:
+        if not filepath.endswith('.md'):
+            continue
+        
+        # Check if file is in ignored folder
+        if is_file_ignored(filepath, IGNORED_FOLDERS):
+            print(f"📄 Processing: {filepath}")
+            print(f"   🚫 Skipped (ignored folder)\n")
+            skipped_ignored += 1
+            skipped_count += 1
+            continue
+            
+        print(f"📄 Processing: {filepath}")
+        
+        try:
+            # Read file
+            with open(filepath, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # Check if content is too short (less than 200 characters)
+            if is_content_too_short(content):
+                print(f"   ⏭️  Skipped (content less than 200 characters)\n")
+                skipped_too_short += 1
+                skipped_count += 1
+                continue
+            
+            # Check if already has SEO description
+            if has_seo_description(content):
+                print(f"   ⏭️  Skipped (already has SEO description)\n")
+                skipped_count += 1
+                continue
+            
+            # Generate description
+            filename = os.path.basename(filepath)
+            print(f"   🤖 Generating description...")
+            description = generate_description(content, filename)
+            print(f"   💡 Generated: {description}")
+            
+            # Add SEO tag
+            updated_content = add_seo_description(content, description)
+            
+            # Write back
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write(updated_content)
+            
+            print(f"   ✅ Updated successfully\n")
+            processed_count += 1
+            updated_files.append(filepath)  # Track this file as updated
+            
+        except Exception as e:
+            print(f"   ❌ Error: {e}\n")
+    
+    print(f"\n📊 Summary:")
+    print(f"   ✅ Updated: {processed_count}")
+    print(f"   ⏭️  Skipped (total): {skipped_count}")
+    print(f"   ⏭️  Skipped (too short): {skipped_too_short}")
+    print(f"   🚫 Skipped (ignored folder): {skipped_ignored}")
+    
+    # Save counts and updated files list for next step
+    with open('/tmp/seo_stats.txt', 'w') as f:
+        f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}")
+    
+    # Save updated files list
+    with open('/tmp/seo_updated_files.txt', 'w') as f:
+        f.write('\n'.join(updated_files))
+
+if __name__ == '__main__':
+    main()