Skip to content

Commit 12f5bad

Browse files
committed
Added enhanced log collection script and workflow steps for improved debugging of test failures
Signed-off-by: Helber Belmiro <[email protected]>
1 parent 8b394bf commit 12f5bad

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
NS=""
6+
OUTPUT_FILE="/tmp/enhanced_pod_logs.txt"
7+
TEST_CONTEXT=""
8+
START_TIME=""
9+
10+
while [[ "$#" -gt 0 ]]; do
11+
case $1 in
12+
--ns) NS="$2"; shift ;;
13+
--output) OUTPUT_FILE="$2"; shift ;;
14+
--test-context) TEST_CONTEXT="$2"; shift ;;
15+
--start-time) START_TIME="$2"; shift ;;
16+
*) echo "Unknown parameter passed: $1"; exit 1 ;;
17+
esac
18+
shift
19+
done
20+
21+
mkdir -p /tmp/enhanced.log
22+
23+
if [[ -z "$NS" ]]; then
24+
echo "Namespace (--ns) parameter is required."
25+
exit 1
26+
fi
27+
28+
function check_namespace {
29+
if ! kubectl get namespace "$1" &>/dev/null; then
30+
echo "Namespace '$1' does not exist."
31+
exit 1
32+
fi
33+
}
34+
35+
function collect_comprehensive_logs {
36+
local NAMESPACE=$1
37+
38+
echo "===== ENHANCED LOG COLLECTION REPORT =====" > "$OUTPUT_FILE"
39+
echo "Collection Time: $(date)" >> "$OUTPUT_FILE"
40+
echo "Test Context: ${TEST_CONTEXT:-'Not specified'}" >> "$OUTPUT_FILE"
41+
echo "Test Start Time: ${START_TIME:-'Not specified'}" >> "$OUTPUT_FILE"
42+
echo "Namespace: ${NAMESPACE}" >> "$OUTPUT_FILE"
43+
echo "" >> "$OUTPUT_FILE"
44+
45+
# 1. Collect all pod information with labels and annotations
46+
echo "===== POD OVERVIEW WITH LABELS =====" >> "$OUTPUT_FILE"
47+
kubectl get pods -n "${NAMESPACE}" -o wide --show-labels >> "$OUTPUT_FILE" 2>&1 || echo "Failed to get pod overview" >> "$OUTPUT_FILE"
48+
echo "" >> "$OUTPUT_FILE"
49+
50+
# 2. Collect Argo Workflow information
51+
echo "===== ARGO WORKFLOWS =====" >> "$OUTPUT_FILE"
52+
kubectl get workflows -n "${NAMESPACE}" -o wide --show-labels >> "$OUTPUT_FILE" 2>&1 || echo "No workflows found or failed to get workflows" >> "$OUTPUT_FILE"
53+
echo "" >> "$OUTPUT_FILE"
54+
55+
# 3. Collect recent events (last 30 minutes)
56+
echo "===== RECENT EVENTS =====" >> "$OUTPUT_FILE"
57+
kubectl get events -n "${NAMESPACE}" --sort-by='.lastTimestamp' >> "$OUTPUT_FILE" 2>&1 || echo "Failed to get events" >> "$OUTPUT_FILE"
58+
echo "" >> "$OUTPUT_FILE"
59+
60+
# 4. Filter pods created after test start time (if provided)
61+
local POD_NAMES
62+
if [[ -n "$START_TIME" ]]; then
63+
echo "===== PODS CREATED DURING TEST (after $START_TIME) =====" >> "$OUTPUT_FILE"
64+
POD_NAMES=$(kubectl get pods -n "${NAMESPACE}" -o jsonpath='{range .items[*]}{.metadata.name}{" "}{.metadata.creationTimestamp}{"\n"}{end}' | awk -v start_time="$START_TIME" '$2 >= start_time {print $1}')
65+
if [[ -n "$POD_NAMES" ]]; then
66+
echo "Test-related pods: $POD_NAMES" >> "$OUTPUT_FILE"
67+
else
68+
echo "No pods found created after $START_TIME" >> "$OUTPUT_FILE"
69+
# Fall back to all pods
70+
POD_NAMES=$(kubectl get pods -n "${NAMESPACE}" -o custom-columns=":metadata.name" --no-headers)
71+
fi
72+
else
73+
POD_NAMES=$(kubectl get pods -n "${NAMESPACE}" -o custom-columns=":metadata.name" --no-headers)
74+
fi
75+
echo "" >> "$OUTPUT_FILE"
76+
77+
if [[ -z "${POD_NAMES}" ]]; then
78+
echo "No pods found in namespace '${NAMESPACE}'." >> "$OUTPUT_FILE"
79+
return
80+
fi
81+
82+
# 5. Detailed pod information with logs
83+
for POD_NAME in ${POD_NAMES}; do
84+
{
85+
echo "=========================================="
86+
echo "POD: ${POD_NAME}"
87+
echo "=========================================="
88+
89+
echo "----- POD METADATA -----"
90+
kubectl get pod "${POD_NAME}" -n "${NAMESPACE}" -o yaml | grep -E "(name:|namespace:|labels:|annotations:|creationTimestamp:|phase:|conditions:)" || echo "Failed to get pod metadata"
91+
92+
echo ""
93+
echo "----- POD DESCRIPTION -----"
94+
kubectl describe pod "${POD_NAME}" -n "${NAMESPACE}" || echo "Failed to describe pod ${POD_NAME}"
95+
96+
echo ""
97+
echo "----- POD LOGS (last 1000 lines) -----"
98+
kubectl logs "${POD_NAME}" -n "${NAMESPACE}" --tail=1000 || echo "No logs found for pod ${POD_NAME}"
99+
100+
# Check for multiple containers
101+
local CONTAINERS
102+
CONTAINERS=$(kubectl get pod "${POD_NAME}" -n "${NAMESPACE}" -o jsonpath='{.spec.containers[*].name}' 2>/dev/null)
103+
if [[ $(echo "$CONTAINERS" | wc -w) -gt 1 ]]; then
104+
echo ""
105+
echo "----- CONTAINER LOGS -----"
106+
for CONTAINER in $CONTAINERS; do
107+
echo "--- Container: $CONTAINER ---"
108+
kubectl logs "${POD_NAME}" -c "$CONTAINER" -n "${NAMESPACE}" --tail=500 || echo "No logs for container $CONTAINER"
109+
done
110+
fi
111+
112+
echo ""
113+
echo "=========================================="
114+
echo ""
115+
} >> "$OUTPUT_FILE"
116+
done
117+
118+
# 6. Collect pipeline run information if available
119+
echo "===== PIPELINE RUNS (if available) =====" >> "$OUTPUT_FILE"
120+
kubectl get runs -n "${NAMESPACE}" -o wide --show-labels >> "$OUTPUT_FILE" 2>&1 || echo "No pipeline runs found or CRD not available" >> "$OUTPUT_FILE"
121+
echo "" >> "$OUTPUT_FILE"
122+
123+
echo "Enhanced log collection completed. Output saved to: $OUTPUT_FILE"
124+
}
125+
126+
check_namespace "$NS"
127+
collect_comprehensive_logs "$NS"

.github/workflows/e2e-test.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,40 @@ jobs:
158158
tls_enabled: ${{ matrix.pod_to_pod_tls_enabled }}
159159
ca_cert_path: ${{ env.CA_CERT_PATH }}
160160

161+
- name: Collect Pod logs on test failure
162+
id: collect-failure-logs
163+
shell: bash
164+
if: ${{ steps.test-run.outcome == 'failure' }}
165+
run: |
166+
echo "=== Collecting enhanced logs after test failure ==="
167+
NAMESPACE=${{ steps.configure.outputs.NAMESPACE }}
168+
TEST_START_TIME=$(date -u -d '30 minutes ago' '+%Y-%m-%dT%H:%M:%SZ')
169+
TEST_CONTEXT="${{ matrix.test_label}}_K8s-${{ matrix.k8s_version }}_cache-${{ matrix.cache_enabled }}"
170+
171+
# Create enhanced log collection
172+
chmod +x ./.github/resources/scripts/collect-enhanced-logs.sh
173+
./.github/resources/scripts/collect-enhanced-logs.sh \
174+
--ns $NAMESPACE \
175+
--output /tmp/enhanced_failure_logs.txt \
176+
--test-context "$TEST_CONTEXT" \
177+
--start-time "$TEST_START_TIME"
178+
179+
# Also collect Ginkgo test output if available
180+
if [ -f "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" ]; then
181+
echo "=== GINKGO TEST RESULTS ===" >> /tmp/enhanced_failure_logs.txt
182+
cat "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" >> /tmp/enhanced_failure_logs.txt 2>/dev/null || true
183+
fi
184+
continue-on-error: true
185+
186+
- name: Upload Pod logs on test failure
187+
uses: actions/upload-artifact@v4
188+
if: ${{ steps.test-run.outcome == 'failure' && steps.collect-failure-logs.outcome == 'success' }}
189+
with:
190+
name: failure-logs-${{ matrix.test_label}}-K8s-${{ matrix.k8s_version }}-cache-${{ matrix.cache_enabled }}-argo-${{ matrix.argo_version}}-proxy-${{ matrix.proxy }}-storage-${{ matrix.storage }}
191+
path: /tmp/enhanced_failure_logs.txt
192+
retention-days: 30
193+
continue-on-error: true
194+
161195
- name: Notify test reports
162196
shell: bash
163197
if: ${{ steps.test-run.outcome == 'success' }}
@@ -264,6 +298,52 @@ jobs:
264298
user_namespace: ${{ env.USER_NAMESPACE }}
265299
report_name: "E2EMultiUserTests_K8s=${{ matrix.k8s_version }}_cacheEnabled=${{ matrix.cache_enabled }}_multiUser=${{ matrix.multi_user }}_storage=${{ matrix.storage }}"
266300

301+
- name: Collect Pod logs on test failure
302+
id: collect-failure-logs
303+
shell: bash
304+
if: ${{ steps.test-run.outcome == 'failure' }}
305+
run: |
306+
echo "=== Collecting enhanced logs after test failure ==="
307+
NAMESPACE=${{ steps.configure.outputs.NAMESPACE }}
308+
TEST_START_TIME=$(date -u -d '30 minutes ago' '+%Y-%m-%dT%H:%M:%SZ')
309+
TEST_CONTEXT="MultiUser_K8s-${{ matrix.k8s_version }}_cache-${{ matrix.cache_enabled }}_storage-${{ matrix.storage }}"
310+
311+
# Create enhanced log collection
312+
chmod +x ./.github/resources/scripts/collect-enhanced-logs.sh
313+
./.github/resources/scripts/collect-enhanced-logs.sh \
314+
--ns $NAMESPACE \
315+
--output /tmp/enhanced_failure_logs.txt \
316+
--test-context "$TEST_CONTEXT" \
317+
--start-time "$TEST_START_TIME"
318+
319+
# Also collect user namespace logs for multi-user tests
320+
USER_NS="${{ env.USER_NAMESPACE }}"
321+
if [ "$USER_NS" != "$NAMESPACE" ]; then
322+
echo "=== USER NAMESPACE LOGS ===" >> /tmp/enhanced_failure_logs.txt
323+
./.github/resources/scripts/collect-enhanced-logs.sh \
324+
--ns "$USER_NS" \
325+
--output /tmp/user_ns_logs.txt \
326+
--test-context "$TEST_CONTEXT" \
327+
--start-time "$TEST_START_TIME"
328+
cat /tmp/user_ns_logs.txt >> /tmp/enhanced_failure_logs.txt 2>/dev/null || true
329+
fi
330+
331+
# Also collect Ginkgo test output if available
332+
if [ -f "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" ]; then
333+
echo "=== GINKGO TEST RESULTS ===" >> /tmp/enhanced_failure_logs.txt
334+
cat "${{ env.E2E_TESTS_DIR }}/reports/junit.xml" >> /tmp/enhanced_failure_logs.txt 2>/dev/null || true
335+
fi
336+
continue-on-error: true
337+
338+
- name: Upload Pod logs on test failure
339+
uses: actions/upload-artifact@v4
340+
if: ${{ steps.test-run.outcome == 'failure' && steps.collect-failure-logs.outcome == 'success' }}
341+
with:
342+
name: failure-logs-multiuser-K8s-${{ matrix.k8s_version }}-cache-${{ matrix.cache_enabled }}-storage-${{ matrix.storage }}
343+
path: /tmp/enhanced_failure_logs.txt
344+
retention-days: 30
345+
continue-on-error: true
346+
267347
- name: Notify test reports
268348
shell: bash
269349
if: ${{ steps.test-run.outcome == 'success' }}

0 commit comments

Comments
 (0)