Skip to content

Test Doc Generator

Test Doc Generator #9

name: Test Doc Generator
on:
workflow_dispatch:
inputs:
target_branch:
description: 'The branch in appsmith-docs to checkout and create PR against'
required: true
default: 'docs-staging' # Default to docs-staging
type: string
jobs:
generate_docs:
runs-on: ubuntu-latest
steps:
- name: Checkout appsmith-docs target branch
uses: actions/checkout@v4
with:
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
ref: ${{ github.event.inputs.target_branch }} # Checkout the specified branch
# Fetch depth 0 to get all history needed for base branch detection by create-pull-request
fetch-depth: 0
# No need for exclusion list step if not used
# - name: Create exclusion list
# run: echo > saas_exclusions.txt
- name: Ensure scripts directory and tracking files exist
run: |
mkdir -p scripts
# Initialize tracking files if they don't exist in the checked-out branch
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
[ -f scripts/file_hashes.json ] || echo "{}" > scripts/file_hashes.json
- name: Fetch file list from test repo
id: fetch_files
run: |
echo "Fetching files from source repo..."
curl -s --max-time 60 -H "Authorization: Bearer ${{ secrets.test_REPO_ACCESS_TOKEN }}" \
-H "Accept: application/vnd.github+json" \
https://api.github.com/repos/harshilp24/integration-resources-test/contents/Generic%20UQI%20Creation/uqi_configs \
-o response.json
if ! jq -e '.' response.json > /dev/null; then
echo "Error: Invalid JSON received from GitHub API."
cat response.json # Print response for debugging
exit 1
fi
# Check if the response is an array (list of files) or an object (error message)
if jq -e 'type == "array"' response.json > /dev/null; then
jq -r '.[] | select(.type=="file") | [.name, .sha] | @tsv' response.json > latest_files_with_sha.txt
jq -r '.[] | select(.type=="file") | .name' response.json > latest_files.txt
echo "files_found=true" >> $GITHUB_ENV
echo "Files list fetched successfully."
else
echo "Warning: Received non-array response from GitHub API (maybe empty dir or error?):"
cat response.json
# Create empty files to avoid errors downstream if dir is empty
touch latest_files_with_sha.txt
touch latest_files.txt
echo "files_found=false" >> $GITHUB_ENV # Indicate no files found
fi
- name: Identify new and modified files
id: detect_changes
# Only run if files were actually found in the source repo
if: env.files_found == 'true'
run: |
echo "Identifying changes against branch: ${{ github.event.inputs.target_branch }}"
# Read tracking files FROM THE CHECKED-OUT BRANCH
PREV_HASHES=$(cat scripts/file_hashes.json)
# Ensure processed_files.txt exists before sorting
[ -f scripts/processed_files.txt ] || touch scripts/processed_files.txt
# Find files present in latest_files.txt but not in processed_files.txt
comm -23 <(sort latest_files.txt) <(sort scripts/processed_files.txt) > new_files.tmp || true
echo "--- New Files ---"
cat new_files.tmp
echo "-----------------"
MODIFIED_FILES_LIST="modified_files.tmp"
touch $MODIFIED_FILES_LIST
echo "--- Checking for Modifications ---" >&2 # Debug output to stderr
while IFS=$ '\t' read -r FILE_NAME FILE_SHA; do
# Check if the file is listed in processed_files.txt (meaning it's not new)
if grep -q -x -F "$FILE_NAME" scripts/processed_files.txt; then
PREV_SHA=$(echo "$PREV_HASHES" | jq -r --arg file "$FILE_NAME" '.[$file] // ""')
echo "Checking: $FILE_NAME, Current SHA: $FILE_SHA, Previous SHA: $PREV_SHA" >&2
if [ -n "$PREV_SHA" ] && [ "$PREV_SHA" != "$FILE_SHA" ]; then
echo "$FILE_NAME" >> $MODIFIED_FILES_LIST
echo " -> Marked as modified." >&2
fi
fi
done < latest_files_with_sha.txt
echo "--- Modified Files ---"
cat $MODIFIED_FILES_LIST
echo "----------------------"
# Combine new and modified files, ensuring uniqueness and removing empty lines
cat new_files.tmp $MODIFIED_FILES_LIST | sort | uniq | grep -v '^$' > files_to_process.txt || true
echo "--- Files to Process ---"
cat files_to_process.txt
echo "------------------------"
if [ -s files_to_process.txt ]; then
echo "changes_found=true" >> $GITHUB_ENV
echo "Changes detected."
else
echo "changes_found=false" >> $GITHUB_ENV
echo "No new or modified files detected."
fi
# Clean up temporary files
rm -f new_files.tmp modified_files.tmp
- name: Exit if no files to process
if: env.changes_found != 'true'
run: |
echo "No changes detected in source files relative to branch '${{ github.event.inputs.target_branch }}'. Exiting."
exit 0
- name: Process files with OpenAI
# This step now correctly reads the initial hashes from the checked-out branch
# and updates the local files, which are then committed in the next step.
if: env.changes_found == 'true'
run: |
mkdir -p generated_docs
# Read initial hashes from the checked-out branch state
HASHES_JSON=$(cat scripts/file_hashes.json)
PROCESSED_COUNT=0
while IFS= read -r FILE_NAME; do
# Ensure FILE_NAME is not empty
if [ -z "$FILE_NAME" ]; then
continue
fi
echo "⏳ Processing $FILE_NAME"
# URL encode the filename for the URL
ENCODED_FILE_NAME=$(printf '%s' "$FILE_NAME" | jq -sRr @uri)
FILE_URL="https://raw.githubusercontent.com/harshilp24/integration-resources-test/main/Generic%20UQI%20Creation/uqi_configs/$ENCODED_FILE_NAME"
echo "Fetching content from: $FILE_URL"
curl -fsSL --max-time 60 "$FILE_URL" -o input_file.json
if [ $? -ne 0 ]; then
echo "Error: Failed to download $FILE_NAME from $FILE_URL" >&2
continue # Skip this file if download fails
fi
# Find the SHA for the current file from the fetched list
FILE_SHA_LINE=$(grep -F "$FILE_NAME"$ '\t' latest_files_with_sha.txt)
if [ -z "$FILE_SHA_LINE" ]; then
echo "Warning: Could not find SHA for $FILE_NAME in latest_files_with_sha.txt. Skipping hash update." >&2
else
FILE_SHA=$(echo "$FILE_SHA_LINE" | cut -f2)
echo "Updating hash for $FILE_NAME to $FILE_SHA"
# Update the hash in our JSON object
HASHES_JSON=$(echo "$HASHES_JSON" | jq --arg file "$FILE_NAME" --arg sha "$FILE_SHA" '.[$file] = $sha')
fi
# --- OpenAI Processing Start ---
# Prompt 1: Extract Info
SYSTEM_PROMPT=$(cat .github/prompts/extract_prompt.txt || echo "Extract important integration details.")
USER_CONTENT=$(cat input_file.json)
PAYLOAD=$(jq -n \
--arg system "$SYSTEM_PROMPT" \
--arg user "$USER_CONTENT" \
'{
model: "gpt-4-1106-preview",
messages: [
{"role": "system", "content": $system},
{"role": "user", "content": $user}
],
max_tokens: 2000,
temperature: 0
}')
RESPONSE1=$(curl -s https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
-H "Content-Type: application/json" \
-d "$PAYLOAD")
# Check for API errors
if echo "$RESPONSE1" | jq -e '.error' > /dev/null; then
echo "Error during OpenAI Prompt 1 for $FILE_NAME:" >&2
echo "$RESPONSE1" | jq '.' >&2
continue # Skip this file
fi
echo "$RESPONSE1" | jq -r '.choices[0].message.content' > extracted_info.md
# Prompt 2: Generate Markdown
SYSTEM_PROMPT=$(cat .github/prompts/generate_prompt.txt || echo "Generate reference documentation in markdown.")
EXTRACTED_CONTENT=$(cat extracted_info.md)
PAYLOAD=$(jq -n \
--arg system "$SYSTEM_PROMPT" \
--arg user "$EXTRACTED_CONTENT" \
'{
model: "gpt-4-1106-preview",
messages: [
{"role": "system", "content": $system},
{"role": "user", "content": $user}
],
max_tokens: 4000,
temperature: 0.3
}')
RESPONSE2=$(curl -s https://api.openai.com/v1/chat/completions \
-H "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
-H "Content-Type: application/json" \
-d "$PAYLOAD")
# Check for API errors
if echo "$RESPONSE2" | jq -e '.error' > /dev/null; then
echo "Error during OpenAI Prompt 2 for $FILE_NAME:" >&2
echo "$RESPONSE2" | jq '.' >&2
continue # Skip this file
fi
echo "$RESPONSE2" | jq -r '.choices[0].message.content' > generated_doc.md
# --- OpenAI Processing End ---
# Determine output path
INTEGRATION=$(echo "$FILE_NAME" | sed 's/_uqi_config\.json//' | tr '[:upper:]' '[:lower:]')
FINAL_PATH="website/docs/connect-data/reference/${INTEGRATION}.md"
mkdir -p "$(dirname "$FINAL_PATH")"
cp generated_doc.md "$FINAL_PATH"
# Optional: Keep a copy in a separate dir if needed for artifacts
# cp generated_doc.md "generated_docs/${INTEGRATION}.md"
# Add the successfully processed file to the list for this run
echo "$FILE_NAME" >> processed_files_this_run.txt
PROCESSED_COUNT=$((PROCESSED_COUNT + 1))
echo "✅ Finished processing $FILE_NAME"
done < files_to_process.txt
# Update the main tracking files with the results of this run
# Append newly processed files to the persistent list
if [ -f processed_files_this_run.txt ]; then
cat processed_files_this_run.txt >> scripts/processed_files.txt
# Ensure uniqueness and sort the persistent list
sort -u scripts/processed_files.txt -o scripts/processed_files.txt
rm processed_files_this_run.txt
fi
# Overwrite the persistent hash file with the updated JSON
echo "$HASHES_JSON" | jq '.' > scripts/file_hashes.json
echo "processed_count=$PROCESSED_COUNT" >> $GITHUB_ENV
if [ "$PROCESSED_COUNT" -gt 0 ]; then
echo "content_generated=true" >> $GITHUB_ENV
else
echo "content_generated=false" >> $GITHUB_ENV
fi
# Clean up intermediate files
rm -f input_file.json extracted_info.md generated_doc.md
- name: Commit and open PR against target branch
# Only run if content was actually generated in the previous step
if: env.content_generated == 'true'
uses: peter-evans/create-pull-request@v6 # Use v6 for latest features/fixes
with:
token: ${{ secrets.test_REPO_ACCESS_TOKEN }}
# Make title and commit message specific to the target branch
title: "docs: update integration docs for ${{ github.event.inputs.target_branch }}"
commit-message: "docs: automated generation for ${{ github.event.inputs.target_branch }}\n\nProcessed files based on changes in harshilp24/integration-resources-test."
# Create a branch name that includes the target branch for clarity
branch: "docs-update/${{ github.event.inputs.target_branch }}-${{ github.run_id }}"
# Set the base branch for the PR to the target branch
base: ${{ github.event.inputs.target_branch }}
# Add the generated docs and the UPDATED tracking files
add-paths: |
website/docs/connect-data/reference/
scripts/processed_files.txt
scripts/file_hashes.json
# Update PR body
body: |
✅ Automated PR: Generated/updated integration documentation based on changes in the source repository.
**Target Branch:** `${{ github.event.inputs.target_branch }}`
**Source Repo:** [harshilp24/integration-resources-test](https://github.com/harshilp24/integration-resources-test/tree/main/Generic%20UQI%20Creation/uqi_configs)
This PR includes:
- Updated markdown files in `website/docs/connect-data/reference/`
- Updated tracking files in `scripts/` to reflect the processed state for this branch.
# Optional: Add labels, assignees etc.
# labels: automated-pr, documentation
# assignees: your-github-username