OpenMetadata/ingestion/examples/sample_data/pipelines/pipelines.json
Suman Maharana dfd2bd5167
Fixes #25717: Airflow Task Instance link does not correctly filter by DAG (#25829)
* Fix: Airflow Task Instance link does not correctly filter by DAG

* fix airflow pipeline source url
2026-02-17 11:37:01 +05:30

356 lines
No EOL
13 KiB
JSON

{
"pipelines": [{
"name": "presto_etl",
"displayName": "Presto ETL",
"description": "Presto ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=presto_etl",
"scheduleInterval": "* * * * *",
"tasks": [
{
"name": "presto_task",
"displayName": "Presto Task",
"description": "Airflow operator to perform ETL on presto tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_address_etl",
"displayName": "dim_address etl",
"description": "dim_address ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"scheduleInterval": "5 * * * *",
"tasks": [{
"name": "dim_address_task",
"displayName": "dim_address Task",
"description": "Airflow operator to perform ETL and generate dim_address table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=dim_address_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_user_etl",
"displayName": "dim_user etl",
"description": "dim_user ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_user_etl",
"tasks": [{
"name": "dim_user_task",
"displayName": "dim_user Task",
"description": "Airflow operator to perform ETL and generate dim_user table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=dim_user_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_location_etl",
"displayName": "dim_location etl",
"description": "diim_location ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"tasks": [{
"name": "dim_location_task",
"displayName": "dim_location Task",
"description": "Airflow operator to perform ETL and generate dim_location table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=dim_location_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "dim_product_etl",
"displayName": "dim_product etl",
"description": "diim_product ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=dim_address_etl",
"tasks": [{
"name": "dim_product_task",
"displayName": "dim_product Task",
"description": "Airflow operator to perform ETL and generate dim_product table",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=dim_product_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PrestoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "trino_etl",
"displayName": "Trino ETL",
"description": "Trino ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=trino_etl",
"scheduleInterval": "@once",
"tasks": [{
"name": "trino_task",
"displayName": "Trino Task",
"description": "Airflow operator to perform ETL on trino tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "TrinoOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "hive_etl",
"displayName": "Hive ETL",
"description": "Hive ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=hive_etl",
"tasks": [{
"name": "hive_create_table",
"displayName": "Hive Create Table",
"description": "Hive Create Table Task",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=hive_create_table",
"downstreamTasks": ["assert_table_exits"],
"taskType": "HiveOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}
]
},
{
"name": "snowflake_etl",
"displayName": "Snowflake ETL",
"description": "Snowflake ETL pipeline",
"sourceUrl": "http://localhost:8080/tree?dag_id=snowflake_etl",
"tasks": [{
"name": "snowflake_task",
"displayName": "Snowflake Task",
"description": "Airflow operator to perform ETL on snowflake tables",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": ["assert_table_exists"],
"taskType": "SnowflakeOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "real_time_metrics",
"displayName": "Real-time Metrics Aggregation",
"description": "Real-time metrics aggregation pipeline running every 15 minutes",
"sourceUrl": "http://localhost:8080/tree?dag_id=real_time_metrics",
"scheduleInterval": "*/15 * * * *",
"tasks": [{
"name": "metrics_aggregation_task",
"displayName": "Metrics Aggregation Task",
"description": "Airflow operator to aggregate streaming metrics",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=metrics_aggregation_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PythonOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "ml_feature_pipeline",
"displayName": "ML Feature Engineering Pipeline",
"description": "Machine learning feature engineering pipeline running twice daily",
"sourceUrl": "http://localhost:8080/tree?dag_id=ml_feature_pipeline",
"scheduleInterval": "0 2,14 * * *",
"tasks": [{
"name": "feature_engineering_task",
"displayName": "Feature Engineering Task",
"description": "Airflow operator to generate ML features from order data",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=feature_engineering_task",
"downstreamTasks": ["assert_table_exists"],
"taskType": "PythonOperator"
},
{
"name": "assert_table_exists",
"displayName": "Assert Table Exists",
"description": "Assert if a table exists",
"sourceUrl": "http://localhost:8080/taskinstance/list/?_flt_3_dag_id=assert_table_exists",
"downstreamTasks": [],
"taskType": "HiveOperator"
}]
},
{
"name": "dbt_staging_shopify",
"displayName": "DBT Staging Shopify",
"description": "DBT pipeline for staging shopify raw data - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"scheduleInterval": "0 1 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_staging",
"displayName": "DBT Run Staging Models",
"description": "Execute dbt run for staging models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"downstreamTasks": ["dbt_test_staging"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_staging",
"displayName": "DBT Test Staging Models",
"description": "Execute dbt tests for staging models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_staging_shopify",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_transform_orders",
"displayName": "DBT Transform Orders",
"description": "DBT pipeline for transforming order data into fact tables - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"scheduleInterval": "0 3 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_orders",
"displayName": "DBT Run Order Transformations",
"description": "Execute dbt run for order fact tables",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"downstreamTasks": ["dbt_test_orders"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_orders",
"displayName": "DBT Test Order Models",
"description": "Execute dbt tests for order models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_transform_orders",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_analytics_customers",
"displayName": "DBT Customer Analytics",
"description": "DBT pipeline for customer analytics and aggregations - runs daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_analytics_customers",
"scheduleInterval": "0 4 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_build_analytics",
"displayName": "DBT Build Customer Analytics",
"description": "Execute dbt build for customer analytics models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_analytics_customers",
"downstreamTasks": [],
"taskType": "dbtBuildTask"
}]
},
{
"name": "dbt_snapshot_inventory",
"displayName": "DBT Snapshot Inventory",
"description": "DBT snapshot pipeline for tracking inventory changes - runs hourly",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_snapshot_inventory",
"scheduleInterval": "0 * * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_snapshot_task",
"displayName": "DBT Snapshot",
"description": "Execute dbt snapshot for inventory tracking",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_snapshot_inventory",
"downstreamTasks": [],
"taskType": "dbtSnapshotTask"
}]
},
{
"name": "dbt_test_data_quality",
"displayName": "DBT Data Quality Tests",
"description": "DBT data quality testing pipeline - runs 4 times daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_test_data_quality",
"scheduleInterval": "0 */6 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_test_all",
"displayName": "DBT Test All Models",
"description": "Execute comprehensive dbt tests across all models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_test_data_quality",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
},
{
"name": "dbt_ml_features",
"displayName": "DBT ML Feature Generation",
"description": "DBT pipeline for generating ML features - runs twice daily",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"scheduleInterval": "0 2,14 * * *",
"service": "sample_dbtcloud",
"tasks": [{
"name": "dbt_run_ml_features",
"displayName": "DBT Run ML Features",
"description": "Execute dbt run for ML feature generation",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"downstreamTasks": ["dbt_test_ml_features"],
"taskType": "dbtRunTask"
},
{
"name": "dbt_test_ml_features",
"displayName": "DBT Test ML Features",
"description": "Execute dbt tests for ML feature models",
"sourceUrl": "https://cloud.getdbt.com/deploy/123456/projects/654321/runs/dbt_ml_features",
"downstreamTasks": [],
"taskType": "dbtTestTask"
}]
}
]
}