diff --git a/README.md b/README.md index ae5cc8eb858..e6b57383869 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,7 @@ more. Openverse is live at [openverse.org](https://openverse.org). This repository also contains the following directories. -- [Brand](brand/) | Brand assets for Openverse such as logo and icon and - guidelines for using these assets +- [Brand](brand/) | Brand assets for Openverse such as logo, icon, and guidelines for using these assets. - [Templates](templates/) | Jinja templates that can be rendered into common scaffolding code for the project diff --git a/catalog/dags/common/refresh.py b/catalog/dags/common/refresh.py new file mode 100644 index 00000000000..2723e9f7ab7 --- /dev/null +++ b/catalog/dags/common/refresh.py @@ -0,0 +1,13 @@ +# catalog/dags/common/refresh.py + +from airflow.operators.python import PythonOperator + +def refresh_provider_data(provider_name): + def _refresh(): + print(f"Refreshing data for provider: {provider_name}") + # TODO: actual refresh logic (reuse from old ingestion file) + + return PythonOperator( + task_id=f"refresh_{provider_name}_data", + python_callable=_refresh + ) \ No newline at end of file diff --git a/catalog/dags/data_refresh/flickr_refresh.py b/catalog/dags/data_refresh/flickr_refresh.py new file mode 100644 index 00000000000..771a99f2935 --- /dev/null +++ b/catalog/dags/data_refresh/flickr_refresh.py @@ -0,0 +1,14 @@ +# catalog/dags/data_refresh/flickr_refresh.py + +from airflow import DAG +from datetime import datetime, timedelta +from common.refresh import refresh_provider_data + +with DAG( + dag_id="refresh_flickr_data", + schedule_interval="@daily", + start_date=datetime(2023, 1, 1), + catchup=False, + tags=["refresh", "flickr"] +) as dag: + refresh_task = refresh_provider_data("flickr") diff --git a/catalog/dags/providers/flickr.py b/catalog/dags/providers/flickr.py new file mode 100644 index 00000000000..7143cb02317 --- /dev/null +++ b/catalog/dags/providers/flickr.py @@ -0,0 +1,14 @@ +# catalog/dags/providers/flickr.py + +from airflow import DAG +from datetime import datetime +from providers.shared import ingest_flickr_data # assume you have this + +with DAG( + dag_id="ingest_flickr_data", + schedule_interval="@daily", + start_date=datetime(2023, 1, 1), + catchup=False, + tags=["ingestion", "flickr"] +) as dag: + ingest_task = ingest_flickr_data()