Skip to content

Integration Doc Generator #76

Integration Doc Generator

Integration Doc Generator #76

name: Integration Doc Generator
on:
# Manual trigger
workflow_dispatch:
inputs:
force_check_all:
description: 'Force check all files (not just new ones)'
required: false
default: 'false'
type: choice
options:
- 'true'
- 'false'
# Run multiple times per day
schedule:
- cron: "0 */8 * * *" # Run every 8 hours (3 times per day)
# Event-based trigger - when changes are pushed to the integration-resources repo
repository_dispatch:
types: [integration_resources_updated]
jobs:
generate_docs:
runs-on: ubuntu-latest
steps:
- name: Checkout appsmith-docs
uses: actions/checkout@v4
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }} # Use a PAT with access to both repos
- name: Create exclusion list for new files
run: |
# Create list of SaaS integrations to exclude from new file detection
cat > saas_exclusions.txt << EOF
Asana_uqi_config.json
AWS_Lambda_uqi_config.json
Google_Docs_uqi_config.json
Google_Sheets_uqi_config.json
HubSpot_uqi_config.json
Jira_uqi_config.json
Salesforce_uqi_config.json
Slack_uqi_config.json
Twilio_uqi_config.json
Zendesk_uqi_config.json
EOF
- name: Ensure scripts directory exists
run: |
mkdir -p scripts
if [ ! -f scripts/processed_files.txt ]; then
touch scripts/processed_files.txt
fi
if [ ! -f scripts/file_hashes.json ]; then
echo "{}" > scripts/file_hashes.json
fi
- name: Fetch file list with retry logic
id: fetch_files
run: |
MAX_RETRIES=5
RETRY_COUNT=0
SUCCESS=false
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$SUCCESS" != "true" ]; do
echo "Attempt $(($RETRY_COUNT + 1)) to fetch files from integration-resources..."
# Add a small delay between retries
if [ $RETRY_COUNT -gt 0 ]; then
SLEEP_TIME=$(( RETRY_COUNT * 5 ))
echo "Waiting $SLEEP_TIME seconds before retry..."
sleep $SLEEP_TIME
fi
# Fetch file list with timeout
curl -s --max-time 30 -H "Authorization: Bearer ${{ secrets.REPO_ACCESS_TOKEN }}" \
-H "Accept: application/vnd.github+json" \
https://api.github.com/repos/appsmithorg/integration-resources/contents/Generic%20UQI%20Creation/uqi_configs \
-o response.json
# Check if we got a valid response
if jq -e 'type == "array"' response.json > /dev/null 2>&1; then
echo "✅ Successfully fetched file list"
SUCCESS=true
else
echo "❌ Failed to get valid file list. Response:"
cat response.json
RETRY_COUNT=$((RETRY_COUNT + 1 ))
fi
done
if [ "$SUCCESS" != "true" ]; then
echo "❌ Failed to fetch file list after $MAX_RETRIES attempts"
echo "files_found=false" >> $GITHUB_ENV
exit 0 # Exit gracefully to allow workflow to continue
fi
# Extract file names and their SHA hashes
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
echo "files_found=true" >> $GITHUB_ENV
- name: Identify new and modified files
id: detect_changes
if: env.files_found == 'true'
run: |
# Load previous file hashes
PREV_HASHES=$(cat scripts/file_hashes.json)
# Force check all files if requested
if [ "${{ github.event.inputs.force_check_all }}" == "true" ]; then
echo "🔄 Force checking all files as requested"
cat latest_files.txt > files_to_process.txt
else
# Find new files (not in processed_files.txt) - excluding SaaS integrations
NEW_FILES=$(comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) | grep -v -f saas_exclusions.txt || true)
# Check for modified files (SHA changed) - including SaaS integrations
MODIFIED_FILES=""
while IFS=$'\t' read -r FILE_NAME FILE_SHA; do
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ] && grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
echo "🔄 File modified: $FILE_NAME (SHA changed)"
MODIFIED_FILES="$MODIFIED_FILES$FILE_NAME"$'\n'
fi
done < latest_files_with_sha.txt
# Combine new and modified files
{ echo "$NEW_FILES"; echo "$MODIFIED_FILES"; } | grep -v "^$" > files_to_process.txt
fi
# Check if we have files to process
if [ -s files_to_process.txt ]; then
echo "🆕 Found files to process:"
cat files_to_process.txt
echo "changes_found=true" >> $GITHUB_ENV
else
echo "✅ No new or modified files to process."
echo "changes_found=false" >> $GITHUB_ENV
fi
# Count files to process
FILE_COUNT=$(wc -l < files_to_process.txt || echo "0")
echo "file_count=$FILE_COUNT" >> $GITHUB_ENV
- name: Exit if no files to process
if: env.files_found != 'true' || env.changes_found != 'true'
run: |
echo "No new integration updates found. Workflow completed successfully."
exit 0
- name: Process files
if: env.changes_found == 'true'
run: |
# Create a directory for generated docs
mkdir -p generated_docs
# Update file hashes JSON for tracking changes
HASHES_JSON=$(cat scripts/file_hashes.json)
# Initialize counter for successfully processed files
PROCESSED_COUNT=0
# Process each file
while IFS= read -r FILE_NAME; do
echo "⏳ Processing: $FILE_NAME"
# Download the file with retry logic
MAX_RETRIES=3
RETRY_COUNT=0
DOWNLOAD_SUCCESS=false
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$DOWNLOAD_SUCCESS" != "true" ]; do
FILE_URL="https://raw.githubusercontent.com/appsmithorg/integration-resources/main/Generic%20UQI%20Creation/uqi_configs/$FILE_NAME"
if curl -sSL --max-time 30 "$FILE_URL" -o "input_file.json" && [ -s "input_file.json" ]; then
DOWNLOAD_SUCCESS=true
else
echo "⚠️ Failed to download $FILE_NAME, attempt $(($RETRY_COUNT + 1 ))"
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 3
fi
done
if [ "$DOWNLOAD_SUCCESS" != "true" ]; then
echo "❌ Failed to download $FILE_NAME after $MAX_RETRIES attempts, skipping"
continue
fi
# Update hash in our tracking JSON
FILE_SHA=$(grep "$FILE_NAME" latest_files_with_sha.txt | cut -f2)
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
# Process with OpenAI API (using completion API, not chat)
echo "🧠 Extracting information with OpenAI API..."
# Extract information using OpenAI API
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt 2>/dev/null || echo "Extract the key information from this integration configuration file.")
USER_CONTENT=$(cat input_file.json)
# Use OpenAI Completion API (not Chat API)
PAYLOAD=$(jq -n --arg prompt "System: $SYSTEM_PROMPT\n\nUser: $USER_CONTENT" '{
model: "gpt-4.1",
prompt: $prompt,
max_tokens: 2000,
temperature: 0
}')
# Call OpenAI API with retry logic
MAX_RETRIES=3
RETRY_COUNT=0
API_SUCCESS=false
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$API_SUCCESS" != "true" ]; do
RESPONSE=$(curl -s --max-time 60 https://api.openai.com/v1/completions \
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" )
if echo "$RESPONSE" | jq -e '.choices[0].text' > /dev/null 2>&1; then
echo "$RESPONSE" | jq -r '.choices[0].text' > "extracted_info.md"
API_SUCCESS=true
else
echo "⚠️ OpenAI API error, attempt $(($RETRY_COUNT + 1))"
echo "$RESPONSE"
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 5
fi
done
if [ "$API_SUCCESS" != "true" ]; then
echo "❌ Failed to extract information after $MAX_RETRIES attempts, skipping"
continue
fi
# Generate documentation
echo "📝 Generating documentation..."
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt 2>/dev/null || echo "Generate comprehensive markdown documentation based on the extracted information.")
EXTRACTED_CONTENT=$(cat extracted_info.md)
# Use OpenAI Completion API again
PAYLOAD=$(jq -n --arg prompt "System: $SYSTEM_PROMPT\n\nUser: $EXTRACTED_CONTENT" '{
model: "gpt-4.1",
prompt: $prompt,
max_tokens: 4000,
temperature: 0.3
}')
# Call OpenAI API with retry logic
MAX_RETRIES=3
RETRY_COUNT=0
API_SUCCESS=false
while [ $RETRY_COUNT -lt $MAX_RETRIES ] && [ "$API_SUCCESS" != "true" ]; do
RESPONSE=$(curl -s --max-time 60 https://api.openai.com/v1/completions \
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
-H "Content-Type: application/json" \
-d "$PAYLOAD" )
if echo "$RESPONSE" | jq -e '.choices[0].text' > /dev/null 2>&1; then
echo "$RESPONSE" | jq -r '.choices[0].text' > "generated_doc.md"
API_SUCCESS=true
else
echo "⚠️ OpenAI API error, attempt $(($RETRY_COUNT + 1))"
echo "$RESPONSE"
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 5
fi
done
if [ "$API_SUCCESS" != "true" ]; then
echo "❌ Failed to generate documentation after $MAX_RETRIES attempts, skipping"
continue
fi
# Prepare final path
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
mkdir -p "$(dirname "$FINAL_PATH")"
# Copy to final location
cp "generated_doc.md" "$FINAL_PATH"
# Also save to our generated_docs directory for the PR
cp "generated_doc.md" "generated_docs/${INTEGRATION}.md"
# Mark as processed if it's a new file
if ! grep -q "^$FILE_NAME$" scripts/processed_files.txt; then
echo "$FILE_NAME" >> scripts/processed_files.txt
fi
# Increment the counter for successfully processed files
PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
echo "✅ Completed processing: $FILE_NAME"
done < files_to_process.txt
# Save updated hashes
echo "$HASHES_JSON" > scripts/file_hashes.json
# Set environment variable for PR creation
echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
# Check if any files were successfully processed
if [ $PROCESSED_COUNT -gt 0 ]; then
echo "content_generated=true" >> $GITHUB_ENV
echo "✅ Successfully processed $PROCESSED_COUNT files"
else
echo "content_generated=false" >> $GITHUB_ENV
echo "⚠️ No files were successfully processed"
fi
- name: Exit if no content was generated
if: env.content_generated != 'true'
run: |
echo "No content was generated from OpenAI. Workflow completed successfully without creating a PR."
exit 0
- name: Commit and open PR
if: env.content_generated == 'true'
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.REPO_ACCESS_TOKEN }}
title: "docs: add/update integration references"
commit-message: "docs: add/update integration references"
branch: "auto/docs-update-${{ github.run_id }}"
base: main
add-paths: |
website/docs/connect-data/reference/
scripts/processed_files.txt
scripts/file_hashes.json
body: |
This PR adds or updates integration reference documentation for **${{ env.processed_count }}** integrations.
Generated from the latest configuration files in the [integration-resources repository](https://github.com/appsmithorg/integration-resources/tree/main/Generic%20UQI%20Creation/uqi_configs).