Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ more. Openverse is live at [openverse.org](https://openverse.org).

This repository also contains the following directories.

- [Brand](brand/) | Brand assets for Openverse such as logo and icon and
guidelines for using these assets
- [Brand](brand/) | Brand assets for Openverse such as logo, icon, and guidelines for using these assets.
- [Templates](templates/) | Jinja templates that can be rendered into common
scaffolding code for the project

Expand Down
13 changes: 13 additions & 0 deletions catalog/dags/common/refresh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# catalog/dags/common/refresh.py

from airflow.operators.python import PythonOperator

def refresh_provider_data(provider_name):
def _refresh():
print(f"Refreshing data for provider: {provider_name}")
# TODO: actual refresh logic (reuse from old ingestion file)

return PythonOperator(
task_id=f"refresh_{provider_name}_data",
python_callable=_refresh
)
14 changes: 14 additions & 0 deletions catalog/dags/data_refresh/flickr_refresh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# catalog/dags/data_refresh/flickr_refresh.py

from airflow import DAG
from datetime import datetime, timedelta
from common.refresh import refresh_provider_data

with DAG(
dag_id="refresh_flickr_data",
schedule_interval="@daily",
start_date=datetime(2023, 1, 1),
catchup=False,
tags=["refresh", "flickr"]
) as dag:
refresh_task = refresh_provider_data("flickr")
14 changes: 14 additions & 0 deletions catalog/dags/providers/flickr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# catalog/dags/providers/flickr.py

from airflow import DAG
from datetime import datetime
from providers.shared import ingest_flickr_data # assume you have this

with DAG(
dag_id="ingest_flickr_data",
schedule_interval="@daily",
start_date=datetime(2023, 1, 1),
catchup=False,
tags=["ingestion", "flickr"]
) as dag:
ingest_task = ingest_flickr_data()
Loading