|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import re |
| 4 | +from openai import OpenAI |
| 5 | + |
| 6 | +client = OpenAI(api_key=os.environ['OPENAI_API_KEY']) |
| 7 | + |
| 8 | +def has_seo_description(content): |
| 9 | + """Check if content already has SEO description with Description field""" |
| 10 | + import json |
| 11 | + |
| 12 | + # Match SEO description block with 3 or more backticks |
| 13 | + pattern = r'```+json\s*//\[doc-seo\]\s*(\{.*?\})\s*```+' |
| 14 | + match = re.search(pattern, content, flags=re.DOTALL) |
| 15 | + |
| 16 | + if not match: |
| 17 | + return False |
| 18 | + |
| 19 | + # Check if Description field exists and is not empty |
| 20 | + try: |
| 21 | + json_str = match.group(1) |
| 22 | + seo_data = json.loads(json_str) |
| 23 | + return 'Description' in seo_data and seo_data['Description'] |
| 24 | + except json.JSONDecodeError: |
| 25 | + return False |
| 26 | + |
| 27 | +def is_content_too_short(content): |
| 28 | + """Check if content is less than 200 characters""" |
| 29 | + # Remove SEO tags if present for accurate count |
| 30 | + # Match SEO description block with 3 or more backticks |
| 31 | + clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL) |
| 32 | + |
| 33 | + return len(clean_content.strip()) < 200 |
| 34 | + |
| 35 | +def get_content_preview(content, max_length=1000): |
| 36 | + """Get preview of content for OpenAI""" |
| 37 | + # Remove existing SEO tags if present |
| 38 | + # Match SEO description block with 3 or more backticks |
| 39 | + clean_content = re.sub(r'```+json\s*//\[doc-seo\].*?```+\s*', '', content, flags=re.DOTALL) |
| 40 | + |
| 41 | + return clean_content[:max_length].strip() |
| 42 | + |
| 43 | +def generate_description(content, filename): |
| 44 | + """Generate SEO description using OpenAI with system prompt from OpenAIService.cs""" |
| 45 | + try: |
| 46 | + preview = get_content_preview(content) |
| 47 | + |
| 48 | + response = client.chat.completions.create( |
| 49 | + model="gpt-4o-mini", |
| 50 | + messages=[ |
| 51 | + {"role": "system", "content": """Create a short and engaging summary (1–2 sentences) for sharing this documentation link on Discord, LinkedIn, Reddit, Twitter and Facebook. Clearly describe what the page explains or teaches. |
| 52 | +Highlight the value for developers using ABP Framework. |
| 53 | +Be written in a friendly and professional tone. |
| 54 | +Stay under 150 characters. |
| 55 | +--> https://abp.io/docs/latest <--"""}, |
| 56 | + {"role": "user", "content": f"""Generate a concise, informative meta description for this documentation page. |
| 57 | +
|
| 58 | +File: {filename} |
| 59 | +Content Preview: |
| 60 | +{preview} |
| 61 | +
|
| 62 | +Requirements: |
| 63 | +- Maximum 150 characters |
| 64 | +
|
| 65 | +Generate only the description text, nothing else:"""} |
| 66 | + ], |
| 67 | + max_tokens=150, |
| 68 | + temperature=0.7 |
| 69 | + ) |
| 70 | + |
| 71 | + description = response.choices[0].message.content.strip() |
| 72 | + |
| 73 | + return description |
| 74 | + except Exception as e: |
| 75 | + print(f"❌ Error generating description: {e}") |
| 76 | + return f"Learn about {os.path.splitext(filename)[0]} in ABP Framework documentation." |
| 77 | + |
| 78 | +def add_seo_description(content, description): |
| 79 | + """Add or update SEO description in content""" |
| 80 | + import json |
| 81 | + |
| 82 | + # Escape special characters for JSON |
| 83 | + escaped_desc = description.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n') |
| 84 | + |
| 85 | + # Check if SEO block already exists |
| 86 | + pattern = r'(```+)json\s*//\[doc-seo\]\s*(\{.*?\})\s*\1' |
| 87 | + match = re.search(pattern, content, flags=re.DOTALL) |
| 88 | + |
| 89 | + if match: |
| 90 | + # SEO block exists, update Description field |
| 91 | + backticks = match.group(1) |
| 92 | + json_str = match.group(2) |
| 93 | + |
| 94 | + try: |
| 95 | + # Parse existing JSON |
| 96 | + seo_data = json.loads(json_str) |
| 97 | + # Update Description |
| 98 | + seo_data['Description'] = description |
| 99 | + # Convert back to formatted JSON |
| 100 | + updated_json = json.dumps(seo_data, indent=4, ensure_ascii=False) |
| 101 | + |
| 102 | + # Replace the old block with updated one |
| 103 | + new_block = f'''{backticks}json |
| 104 | +//[doc-seo] |
| 105 | +{updated_json} |
| 106 | +{backticks}''' |
| 107 | + |
| 108 | + return re.sub(pattern, new_block, content, count=1, flags=re.DOTALL) |
| 109 | + except json.JSONDecodeError: |
| 110 | + # If JSON is invalid, replace the whole block |
| 111 | + pass |
| 112 | + |
| 113 | + # No existing block or invalid JSON, add new block at the beginning |
| 114 | + seo_tag = f'''```json |
| 115 | +//[doc-seo] |
| 116 | +{{ |
| 117 | + "Description": "{escaped_desc}" |
| 118 | +}} |
| 119 | +``` |
| 120 | +
|
| 121 | +''' |
| 122 | + return seo_tag + content |
| 123 | + |
| 124 | +def is_file_ignored(filepath, ignored_folders): |
| 125 | + """Check if file is in an ignored folder""" |
| 126 | + path_parts = filepath.split('/') |
| 127 | + for ignored in ignored_folders: |
| 128 | + if ignored in path_parts: |
| 129 | + return True |
| 130 | + return False |
| 131 | + |
| 132 | +def main(): |
| 133 | + # Ignored folders from GitHub variable (or default values) |
| 134 | + IGNORED_FOLDERS_STR = os.environ.get('IGNORED_FOLDERS', 'Blog-Posts,Community-Articles,_deleted,_resources') |
| 135 | + IGNORED_FOLDERS = [folder.strip() for folder in IGNORED_FOLDERS_STR.split(',') if folder.strip()] |
| 136 | + |
| 137 | + # Get changed files from environment or command line |
| 138 | + if len(sys.argv) > 1: |
| 139 | + # Files passed as command line arguments |
| 140 | + changed_files = sys.argv[1:] |
| 141 | + else: |
| 142 | + # Files from environment variable (for GitHub Actions) |
| 143 | + changed_files_str = os.environ.get('CHANGED_FILES', '') |
| 144 | + changed_files = [f.strip() for f in changed_files_str.strip().split('\n') if f.strip()] |
| 145 | + |
| 146 | + processed_count = 0 |
| 147 | + skipped_count = 0 |
| 148 | + skipped_too_short = 0 |
| 149 | + skipped_ignored = 0 |
| 150 | + updated_files = [] # Track actually updated files |
| 151 | + |
| 152 | + print("🤖 Processing changed markdown files...\n") |
| 153 | + print(f"🚫 Ignored folders: {', '.join(IGNORED_FOLDERS)}\n") |
| 154 | + |
| 155 | + for filepath in changed_files: |
| 156 | + if not filepath.endswith('.md'): |
| 157 | + continue |
| 158 | + |
| 159 | + # Check if file is in ignored folder |
| 160 | + if is_file_ignored(filepath, IGNORED_FOLDERS): |
| 161 | + print(f"📄 Processing: {filepath}") |
| 162 | + print(f" 🚫 Skipped (ignored folder)\n") |
| 163 | + skipped_ignored += 1 |
| 164 | + skipped_count += 1 |
| 165 | + continue |
| 166 | + |
| 167 | + print(f"📄 Processing: {filepath}") |
| 168 | + |
| 169 | + try: |
| 170 | + # Read file |
| 171 | + with open(filepath, 'r', encoding='utf-8') as f: |
| 172 | + content = f.read() |
| 173 | + |
| 174 | + # Check if content is too short (less than 200 characters) |
| 175 | + if is_content_too_short(content): |
| 176 | + print(f" ⏭️ Skipped (content less than 200 characters)\n") |
| 177 | + skipped_too_short += 1 |
| 178 | + skipped_count += 1 |
| 179 | + continue |
| 180 | + |
| 181 | + # Check if already has SEO description |
| 182 | + if has_seo_description(content): |
| 183 | + print(f" ⏭️ Skipped (already has SEO description)\n") |
| 184 | + skipped_count += 1 |
| 185 | + continue |
| 186 | + |
| 187 | + # Generate description |
| 188 | + filename = os.path.basename(filepath) |
| 189 | + print(f" 🤖 Generating description...") |
| 190 | + description = generate_description(content, filename) |
| 191 | + print(f" 💡 Generated: {description}") |
| 192 | + |
| 193 | + # Add SEO tag |
| 194 | + updated_content = add_seo_description(content, description) |
| 195 | + |
| 196 | + # Write back |
| 197 | + with open(filepath, 'w', encoding='utf-8') as f: |
| 198 | + f.write(updated_content) |
| 199 | + |
| 200 | + print(f" ✅ Updated successfully\n") |
| 201 | + processed_count += 1 |
| 202 | + updated_files.append(filepath) # Track this file as updated |
| 203 | + |
| 204 | + except Exception as e: |
| 205 | + print(f" ❌ Error: {e}\n") |
| 206 | + |
| 207 | + print(f"\n📊 Summary:") |
| 208 | + print(f" ✅ Updated: {processed_count}") |
| 209 | + print(f" ⏭️ Skipped (total): {skipped_count}") |
| 210 | + print(f" ⏭️ Skipped (too short): {skipped_too_short}") |
| 211 | + print(f" 🚫 Skipped (ignored folder): {skipped_ignored}") |
| 212 | + |
| 213 | + # Save counts and updated files list for next step |
| 214 | + with open('/tmp/seo_stats.txt', 'w') as f: |
| 215 | + f.write(f"{processed_count}\n{skipped_count}\n{skipped_too_short}\n{skipped_ignored}") |
| 216 | + |
| 217 | + # Save updated files list |
| 218 | + with open('/tmp/seo_updated_files.txt', 'w') as f: |
| 219 | + f.write('\n'.join(updated_files)) |
| 220 | + |
| 221 | +if __name__ == '__main__': |
| 222 | + main() |
0 commit comments