Skip to content

Commit 1c243e6

Browse files
committed
syncing with version 1.38
2 parents 44ebf0d + bb7fc91 commit 1c243e6

File tree

2,314 files changed

+179044
-43975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,314 files changed

+179044
-43975
lines changed

.circleci/check-changes.sh

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/bin/bash
2+
# Copyright 2018-2025 contributors to the OpenLineage project
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Based on the changed files (or if we are on the main branch), generate a yaml file with
6+
# a list of workflow files. Use those files to build a workflow with a union of all tasks
7+
8+
# Print each command before executing it
9+
set -x
10+
11+
# Set default values if not provided
12+
NIGHTLY_RUN=${NIGHTLY_RUN:-"inactive"}
13+
INTEGRATION_TYPE=${INTEGRATION_TYPE:-"*"}
14+
15+
function check_change () {
16+
MOD=$1
17+
shift
18+
EXCLUDE_MD=$1
19+
shift
20+
21+
if [ "$EXCLUDE_MD" == "true" ]; then
22+
CHANGED_FILES=$(git diff --name-only main "$MOD" | grep -v ".*.md")
23+
else
24+
CHANGED_FILES=$(git diff --name-only main "$MOD")
25+
fi
26+
27+
if [ -n "$CHANGED_FILES" ]; then
28+
echo "Change found in $MOD"
29+
if [ "$1" == "*" ]; then
30+
ls -d "$PWD/.circleci/workflows/"* > workflow_files.txt
31+
else
32+
for ln in "$@"; do
33+
echo "$PWD/.circleci/workflows/$ln" >> workflow_files.txt
34+
done
35+
fi
36+
fi
37+
}
38+
39+
# Add always workflow
40+
echo "$PWD/.circleci/workflows/openlineage-always.yml" >> workflow_files.txt
41+
42+
# Nightly build - add workflows to be included
43+
if [ "$NIGHTLY_RUN" == "active" ]; then
44+
# run only Spark within nightly build
45+
echo "$PWD/.circleci/workflows/openlineage-spark.yml" >> workflow_files.txt
46+
echo "$PWD/.circleci/workflows/openlineage-java.yml" >> workflow_files.txt
47+
elif [ -n "$CIRCLE_TAG" ]; then
48+
# If we are on tag, run all of the workflows
49+
ls -d "$PWD"/.circleci/workflows/* > workflow_files.txt
50+
elif [ "$CIRCLE_BRANCH" == "main" ]; then
51+
# If we are on the main branch, run all of the workflows
52+
# if integration type is not all, we specify only a single integration type in workflow files
53+
if [ "$INTEGRATION_TYPE" != "*" ]; then
54+
ls -d "$PWD"/.circleci/workflows/openlineage-"$INTEGRATION_TYPE".yml > workflow_files.txt
55+
else
56+
ls -d "$PWD"/.circleci/workflows/* > workflow_files.txt
57+
fi
58+
else
59+
# Changes to the spec require all workflows to run
60+
check_change spec true "*"
61+
check_change .circleci true "*"
62+
check_change integration/sql/ true "*"
63+
64+
check_change client/java/ true openlineage-java.yml openlineage-flink.yml openlineage-spark.yml
65+
check_change integration/spark/ true openlineage-java.yml openlineage-spark.yml
66+
check_change integration/spark-extension-interfaces/ true openlineage-java.yml openlineage-spark.yml
67+
check_change integration/flink/ true openlineage-java.yml openlineage-flink.yml
68+
check_change client/python/ true openlineage-python.yml
69+
check_change integration/common/ true openlineage-python.yml
70+
check_change integration/airflow/ true openlineage-python.yml
71+
check_change integration/dagster/ true openlineage-python.yml
72+
check_change integration/dbt/ true openlineage-python.yml
73+
check_change proxy/fluentd/ true openlineage-proxy-fluentd.yml
74+
check_change website false openlineage-website.yml
75+
check_change integration/hive true openlineage-java.yml openlineage-hive.yml
76+
fi
77+
touch workflow_files.txt
78+
FILES=$(sort workflow_files.txt | uniq | tr "\n" " ")
79+
80+
# yq eval-all the workflow files specified in the workflow_files.txt file.
81+
# Collect all the jobs from each workflow except for the "workflow_complete" job and
82+
# create a union of all jobs.
83+
# Collect the "workflow_complete" job from each workflow and concatenate the "requires"
84+
# section of each and create a single "workflow_complete" job that is the union of all.
85+
# The output of this is a circleci configuration with a single workflow called "build"
86+
# that contains the union of all jobs plus the "workflow_complete" job that depends on
87+
# all required jobs.
88+
#
89+
# This configuration is piped into yq along with the continue_config.yml file and the
90+
# union of the two files is output to complete_config.yml
91+
92+
# shellcheck disable=SC2016,SC2086
93+
yq eval-all '.workflows | . as $wf ireduce({}; . * $wf) | to_entries |
94+
.[] |= (
95+
with(select(.key == "openlineage-always"); .) |
96+
with(select(.key != "openlineage-always"); .value.jobs |= map(select(.[].requires == null) |= .[].requires = ["always_run"]))
97+
) | from_entries |
98+
((map(.jobs[] | select(has("workflow_complete") | not)) | . as $item ireduce ([]; (. *+ $item) ))
99+
+ [(map(.jobs[] | select(has("workflow_complete"))) | .[] as $item ireduce ({}; . *+ $item))])' $FILES | \
100+
yq eval-all '{"workflows": {"build": {"jobs": .}}}' - | \
101+
yq eval-all '. as $wf ireduce({}; . * $wf)' .circleci/continue_config.yml - > complete_config.yml
102+
cat complete_config.yml # to reproduce generated workflow

.circleci/checksum.sh

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
#!/bin/bash
2+
# Copyright 2018-2025 contributors to the OpenLineage project
3+
# SPDX-License-Identifier: Apache-2.0
4+
25
RESULT_FILE=$1
36
BRANCH_NAME=$2
47

5-
if [ -f $RESULT_FILE ]; then
6-
rm $RESULT_FILE
8+
if [ -f "$RESULT_FILE" ]; then
9+
rm "$RESULT_FILE"
710
fi
8-
touch $RESULT_FILE
11+
touch "$RESULT_FILE"
912

1013
# For dependabot PRs, skip checksum generation to reuse the same cache and reduce storage usage.
11-
if [[ $BRANCH_NAME == dependabot* ]]; then
12-
echo "DEPENDABOT" >> $RESULT_FILE
14+
if [[ "$BRANCH_NAME" == dependabot* ]]; then
15+
echo "DEPENDABOT" >> "$RESULT_FILE"
1316
exit 0
1417
fi
1518

1619
checksum_file() {
17-
echo `openssl md5 $1 | awk '{print $2}'`
20+
openssl md5 "$1" | awk '{print $2}'
1821
}
1922

2023
FILES=()
@@ -23,8 +26,8 @@ while read -r -d ''; do
2326
done < <(find . -name 'build.gradle' -type f -print0)
2427

2528
# Loop through files and append MD5 to result file
26-
for FILE in ${FILES[@]}; do
27-
echo `checksum_file $FILE` >> $RESULT_FILE
29+
for FILE in "${FILES[@]}"; do
30+
checksum_file "$FILE" >> "$RESULT_FILE"
2831
done
2932
# Now sort the file so that it is
30-
sort $RESULT_FILE -o $RESULT_FILE
33+
sort "$RESULT_FILE" -o "$RESULT_FILE"

.circleci/config.yml

Lines changed: 29 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ setup: true
66
# the path of an updated fileset
77
orbs:
88
continuation: circleci/[email protected]
9+
github-cli: circleci/[email protected]
910

1011
# optional parameter when triggering to
1112
# only run a particular type of integration
@@ -48,9 +49,10 @@ jobs:
4849
# workflow
4950
determine_changed_modules:
5051
docker:
51-
- image: cimg/python:3.8
52+
- image: cimg/python:3.9
5253
steps:
5354
- checkout
55+
- github-cli/setup
5456
- run:
5557
name: Install yq
5658
command: |
@@ -59,87 +61,10 @@ jobs:
5961
sudo chmod +x /usr/local/bin/yq
6062
- run:
6163
name: Check changes and merge workflows.
62-
command: |
63-
# Based on the changed files (or if we are on the main branch), generate a yaml file with
64-
# a list of workflow files. Use those files to build a workflow with a union of all tasks
65-
66-
function check_change () {
67-
MOD=$1
68-
shift
69-
if [ $(git diff --name-only main $MOD | grep -v ".*.md" | wc -l) -gt 0 ]; then
70-
echo "Change found in $MOD"
71-
if [ "$1" == "*" ]; then
72-
ls -d $PWD/.circleci/workflows/* > workflow_files.txt
73-
else
74-
for ln in $@; do
75-
echo "$PWD/.circleci/workflows/$ln" >> workflow_files.txt
76-
done
77-
fi
78-
fi
79-
}
80-
81-
# Add always workflow
82-
echo "$PWD/.circleci/workflows/openlineage-always.yml" >> workflow_files.txt
83-
84-
# Nightly build - add workflows to be included
85-
if [ "<< pipeline.parameters.nightly-run >>" == "active" ]; then
86-
# run only Spark within nightly build
87-
echo "$PWD/.circleci/workflows/openlineage-spark.yml" >> workflow_files.txt
88-
echo "$PWD/.circleci/workflows/openlineage-java.yml" >> workflow_files.txt
89-
elif [ -n "$CIRCLE_TAG" ]; then
90-
# If we are on tag, run all of the workflows
91-
ls -d $PWD/.circleci/workflows/* > workflow_files.txt
92-
elif [ "$CIRCLE_BRANCH" == "main" ]; then
93-
# If we are on the main branch, run all of the workflows
94-
# if integration type is not all, we specify only a single integration type in workflow files
95-
if [ "<< pipeline.parameters.integration-type >>" != "*" ]; then
96-
ls -d $PWD/.circleci/workflows/openlineage-integration-<< pipeline.parameters.integration-type >>.yml > workflow_files.txt
97-
else
98-
ls -d $PWD/.circleci/workflows/* > workflow_files.txt
99-
fi
100-
else
101-
# Changes to the spec require all workflows to run
102-
check_change spec "*"
103-
check_change .circleci "*"
104-
check_change integration/sql/ "*"
105-
106-
check_change client/java/ openlineage-java.yml openlineage-flink.yml openlineage-spark.yml
107-
check_change integration/spark/ openlineage-java.yml openlineage-spark.yml
108-
check_change integration/spark-extension-interfaces/ openlineage-java.yml openlineage-spark.yml
109-
check_change integration/flink/ openlineage-java.yml openlineage-flink.yml
110-
check_change client/python/ openlineage-python.yml
111-
check_change integration/common/ openlineage-python.yml
112-
check_change integration/airflow/ openlineage-python.yml
113-
check_change integration/dagster/ openlineage-python.yml
114-
check_change integration/dbt/ openlineage-python.yml
115-
check_change proxy/backend/ openlineage-proxy-backend.yml
116-
check_change proxy/fluentd/ openlineage-proxy-fluentd.yml
117-
check_change website openlineage-website.yml
118-
fi
119-
touch workflow_files.txt
120-
FILES=$(sort workflow_files.txt | uniq | tr "\n" " ")
121-
122-
# yq eval-all the workflow files specified in the workflow_files.txt file.
123-
# Collect all the jobs from each workflow except for the "workflow_complete" job and
124-
# create a union of all jobs.
125-
# Collect the "workflow_complete" job from each workflow and concatenate the "requires"
126-
# section of each and create a single "workflow_complete" job that is the union of all.
127-
# The output of this is a circleci configuration with a single workflow called "build"
128-
# that contains the union of all jobs plus the "workflow_complete" job that depends on
129-
# all required jobs.
130-
#
131-
# This configuration is piped into yq along with the continue_config.yml file and the
132-
# union of the two files is output to complete_config.yml
133-
yq eval-all '.workflows | . as $wf ireduce({}; . * $wf) | to_entries |
134-
.[] |= (
135-
with(select(.key == "openlineage-always"); .) |
136-
with(select(.key != "openlineage-always"); .value.jobs |= map(select(.[].requires == null) |= .[].requires = ["always_run"]))
137-
) | from_entries |
138-
((map(.jobs[] | select(has("workflow_complete") | not)) | . as $item ireduce ([]; (. *+ $item) ))
139-
+ [(map(.jobs[] | select(has("workflow_complete"))) | .[] as $item ireduce ({}; . *+ $item))])' $FILES | \
140-
yq eval-all '{"workflows": {"build": {"jobs": .}}}' - | \
141-
yq eval-all '. as $wf ireduce({}; . * $wf)' .circleci/continue_config.yml - > complete_config.yml
142-
cat complete_config.yml # to reproduce generated workflow
64+
command: .circleci/check-changes.sh
65+
environment:
66+
NIGHTLY_RUN: << pipeline.parameters.nightly-run >>
67+
INTEGRATION_TYPE: << pipeline.parameters.integration-type >>
14368
- unless:
14469
condition:
14570
matches:
@@ -150,26 +75,27 @@ jobs:
15075
name: Remove approval steps if not pull from forks.
15176
command: |
15277
pip install pyyaml==6.0.1
153-
python -c "import yaml
154-
d = yaml.safe_load(open('complete_config.yml'))
155-
for workflow_name, workflow_definition in d['workflows'].items():
156-
jobs = workflow_definition.get('jobs') if isinstance(workflow_definition, dict) else None
157-
if not jobs: continue
158-
159-
# find all approvals
160-
approvals = list(filter(lambda x: isinstance(x, dict) and list(x.values())[0].get('type') == 'approval', jobs))
161-
for approval in approvals:
162-
approval_name = next(iter(approval))
163-
approval_upstreams = approval[approval_name].get('requires')
164-
approval_downstream = list(filter(lambda x: isinstance(x, dict) and approval_name in list(x.values())[0].get('requires', ''), jobs))
165-
# replace approval with its upstream jobs
166-
for job in approval_downstream:
167-
requires = next(iter(job.values()))['requires']
168-
requires.remove(approval_name)
169-
requires.extend(approval_upstreams)
170-
jobs.remove(approval)
171-
with open('complete_config.yml', 'w') as f:
172-
f.write(yaml.dump(d, sort_keys=False))"
78+
python dev/filter_approvals.py
79+
- run: |
80+
export PR_NUM=$(echo $CIRCLE_PULL_REQUEST | cut -d'/' -f7)
81+
export HAS_FULL_TESTS_LABEL=$(gh pr view $CIRCLE_PULL_REQUEST --repo OpenLineage/OpenLineage --json labels | jq 'any(.labels[]; .name == "full-tests")')
82+
if [ "<< pipeline.parameters.nightly-run >>" == "active" ]; then
83+
export IS_FULL_TESTS=1
84+
elif [ "<< pipeline.git.branch >>" == "main" ]; then
85+
export IS_FULL_TESTS=1
86+
elif [ "$HAS_FULL_TESTS_LABEL" == "true" ]; then
87+
export IS_FULL_TESTS=1
88+
else
89+
export IS_FULL_TESTS=0
90+
fi
91+
92+
echo $IS_FULL_TESTS
93+
94+
if [ -z "$IS_FULL_TESTS" ] || [ "$IS_FULL_TESTS" == "0" ]; then
95+
echo "Skipping full tests"
96+
pip install pyyaml==6.0.1
97+
python dev/filter_matrix.py
98+
fi
17399
- when:
174100
condition:
175101
or:
@@ -194,6 +120,7 @@ workflows:
194120
schedule_workflow:
195121
jobs:
196122
- determine_changed_modules:
123+
context: pr
197124
filters:
198125
tags:
199126
only: /^[0-9]+(\.[0-9]+){2}(-rc\.[0-9]+)?$/

0 commit comments

Comments
 (0)