Airflow 3.x API based connector (#26624)

* Add Airflow Connector with API integration

* Add Airflow Connector with API integration

* Update generated TypeScript types

* Add Airflow Connector with API integration improvements

* fix: username password flow for airflow 3, example yaml file, & sidebar docs

* fix type in UI

* Fix integration tests, fixed UI rendering and docs, improved OpenLineageResolver

* Fix pytests

* move connector

* Update generated TypeScript types

* fix: response parsing for astronomer airflow

* feat: added service account auth for airflow rest connection when composer managed airflow along with token

* fix: airflow rest api connection class converter and airflow.md

* feat: add mwaa config support for authentication

* s3 & column lineage

* Update generated TypeScript types

* fix: test airflow mwaa client

* fix: removed unused method, and extra code for parsing response

* fix: git pr checks

* fix: removed airflowapi integration tests that requires real host instance and added test with mocking

* fix test

* improve test coverage

* push coverage

* fix: gitar comments

* fix: removed redundant files

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com>
Co-authored-by: Keshav Mohta <keshavmohta09@gmail.com>
Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
Sriharsha Chintalapani 2026-03-26 09:15:41 -07:00 committed by GitHub
parent f3bbfc7b75
commit b7797fe3ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 9417 additions and 459 deletions

View file

@ -512,6 +512,9 @@ services:
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LOGGING__LOGGING_LEVEL: ${AIRFLOW_LOGGING_LEVEL:-DEBUG}
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
# OpenLineage transport config (optional - enable for lineage via OL)
# AIRFLOW__OPENLINEAGE__TRANSPORT: '{"type": "http", "url": "http://openmetadata-server:8585/api/v1/openlineage/", "endpoint": "lineage", "auth": {"type": "api_key", "api_key": "<OM_JWT_TOKEN>"}}'
# AIRFLOW__OPENLINEAGE__NAMESPACE: local_airflow
DB_HOST: ${AIRFLOW_DB_HOST:-mysql}
DB_PORT: ${AIRFLOW_DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}

View file

@ -44,6 +44,7 @@ class AWSServices(Enum):
REDSHIFT = "redshift"
REDSHIFT_SERVERLESS = "redshift-serverless"
LAKE_FORMATION = "lakeformation"
MWAA = "mwaa"
def _get_valid_aws_regions() -> set:
@ -277,3 +278,6 @@ class AWSClient:
def get_redshift_serverless_client(self):
return self.get_client(AWSServices.REDSHIFT_SERVERLESS.value)
def get_mwaa_client(self):
return self.get_client(AWSServices.MWAA.value)

View file

@ -0,0 +1,122 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Auth helper functions for the Airflow REST API client.
"""
import base64
import traceback
from datetime import datetime, timedelta, timezone
from typing import Callable, Optional, Tuple
import requests
from metadata.utils.credentials import (
get_gcp_impersonate_credentials,
set_google_credentials,
)
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
TokenCallback = Callable[[], Tuple[str, object]]
_JWT_REFRESH_INTERVAL_SECONDS = (
25 * 60
) # re-fetch every 25 min, well within Airflow's ~30-60 min TTL
_BASIC_AUTH_TTL_SECONDS = (
7 * 24 * 3600
) # basic auth doesn't expire; skip retry for 7 days
def try_exchange_jwt(
host: str, username: str, password: str, verify: bool
) -> Optional[str]:
"""POST {host}/auth/token to get a JWT Bearer token (Airflow 3.x). Returns None on failure."""
try:
resp = requests.post(
f"{host}/auth/token",
json={"username": username, "password": password},
timeout=10,
verify=verify,
)
resp.raise_for_status()
return resp.json().get("access_token")
except Exception:
logger.debug(
"JWT token exchange failed (likely Airflow 2.x): %s", traceback.format_exc()
)
return None
def build_access_token_callback(token: str) -> TokenCallback:
"""Returns a static token callback with no expiry."""
return lambda: (token, 0)
def build_basic_auth_callback(
host: str, username: str, password: str, verify: bool
) -> Tuple[TokenCallback, None]:
"""
Returns (callback, None). auth_token_mode=None means client.py uses the
token value as-is; the callback embeds 'Bearer' or 'Basic' prefix itself.
On every refresh cycle the callback re-calls try_exchange_jwt so the JWT
is always freshly issued no stale-token 401s for long-running ingestions.
Falls back to Basic auth for Airflow 2.x servers.
"""
def _callback() -> Tuple[str, object]:
jwt = try_exchange_jwt(host, username, password, verify)
if jwt:
return f"Bearer {jwt}", _JWT_REFRESH_INTERVAL_SECONDS
b64 = base64.b64encode(f"{username}:{password}".encode()).decode()
return f"Basic {b64}", _BASIC_AUTH_TTL_SECONDS
return _callback, None
def build_gcp_token_callback(gcp_credentials) -> TokenCallback:
"""
Returns a token callback that fetches and auto-refreshes GCP OAuth2 tokens.
Supports all 4 GCP credential types via set_google_credentials():
- GcpCredentialsValues: service account JSON values (clientEmail, privateKey, etc.)
- GcpCredentialsPath: path to a credentials JSON file
- GcpExternalAccount: workload identity federation
- GcpADC: application default credentials
Also handles optional service account impersonation via gcpImpersonateServiceAccount.
"""
set_google_credentials(gcp_credentials)
impersonate = gcp_credentials.gcpImpersonateServiceAccount
def _callback() -> Tuple[str, datetime]:
import google.auth
from google.auth.transport.requests import Request as AuthRequest
if impersonate and impersonate.impersonateServiceAccount:
credentials = get_gcp_impersonate_credentials(
impersonate_service_account=impersonate.impersonateServiceAccount,
scopes=["https://www.googleapis.com/auth/cloud-platform"],
lifetime=impersonate.lifetime,
)
else:
credentials, _ = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
credentials.refresh(AuthRequest())
expiry = getattr(credentials, "expiry", None) or (
datetime.now(timezone.utc) + timedelta(minutes=55)
)
return (credentials.token, expiry)
return _callback

View file

@ -0,0 +1,345 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Client to interact with the Airflow REST API
"""
import traceback
from typing import List, Optional
from urllib.parse import quote
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import HTTPError
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
AirflowConnection,
)
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
from metadata.generated.schema.entity.utils.common.basicAuthConfig import BasicAuth
from metadata.generated.schema.entity.utils.common.gcpCredentialsConfig import (
GcpServiceAccount,
)
from metadata.generated.schema.entity.utils.common.mwaaAuthConfig import (
MwaaAuthentication,
)
from metadata.ingestion.connections.source_api_client import TrackedREST
from metadata.ingestion.ometa.client import ClientConfig
from metadata.ingestion.source.pipeline.airflow.api.auth import (
build_access_token_callback,
build_basic_auth_callback,
build_gcp_token_callback,
)
from metadata.ingestion.source.pipeline.airflow.api.models import (
AirflowApiDagDetails,
AirflowApiDagRun,
AirflowApiTask,
AirflowApiTaskInstance,
)
from metadata.ingestion.source.pipeline.airflow.api.mwaa import MWAAClient
from metadata.utils.helpers import clean_uri
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
class AirflowApiClient:
"""
Client to interact with the Airflow REST API (v1 for Airflow 2.x, v2 for Airflow 3.x)
"""
def __init__(self, config: AirflowConnection):
self.config = config
self._detected_version: Optional[str] = None
rest_config = config.connection
auth_config = rest_config.authConfig
# Check if this is MWAA (AWS credentials)
if isinstance(auth_config, MwaaAuthentication):
# Use MWAA client for AWS managed Airflow
environment_name = auth_config.mwaaConfig.mwaaEnvironmentName
self.mwaa_client = MWAAClient(
auth_config.mwaaConfig.awsConfig, environment_name
)
self.client = None # No need for TrackedREST client with MWAA
else:
# Use standard REST client for other authentication types
self.mwaa_client = None
auth_token_mode = "Bearer"
if isinstance(auth_config, AccessToken):
auth_token_fn = build_access_token_callback(
auth_config.token.get_secret_value()
)
elif isinstance(auth_config, BasicAuth):
auth_token_fn, auth_token_mode = build_basic_auth_callback(
host=clean_uri(str(config.hostPort)),
username=auth_config.username,
password=auth_config.password.get_secret_value(),
verify=rest_config.verifySSL,
)
elif isinstance(auth_config, GcpServiceAccount):
auth_token_fn = build_gcp_token_callback(auth_config.credentials)
else:
auth_token_fn = None
client_config = ClientConfig(
base_url=clean_uri(str(config.hostPort)),
api_version="api",
auth_header="Authorization" if auth_token_fn else None,
auth_token=auth_token_fn,
auth_token_mode=auth_token_mode,
verify=rest_config.verifySSL,
)
self.client = TrackedREST(client_config, source_name="airflow_api")
@property
def api_version(self) -> str:
if self._detected_version:
return self._detected_version
# Use MWAA client - no version detection needed
if self.mwaa_client:
self._detected_version = "v1" # MWAA handles versioning internally
return self._detected_version
rest_config = self.config.connection
configured = (
str(rest_config.apiVersion.value) if rest_config.apiVersion else "auto"
)
if configured != "auto":
self._detected_version = configured
return self._detected_version
self._detected_version = self._detect_api_version()
return self._detected_version
def _detect_api_version(self) -> str:
for version in ("v2", "v1"):
try:
self.client.get(f"/{version}/version")
return version
except HTTPError as exc:
if exc.response is not None and exc.response.status_code in (401, 403):
raise
logger.debug(traceback.format_exc())
except (RequestsConnectionError, TimeoutError, OSError):
raise
except Exception:
logger.debug(traceback.format_exc())
logger.warning("Could not detect Airflow API version, defaulting to v1")
return "v1"
@property
def _prefix(self) -> str:
return f"/{self.api_version}"
@property
def _date_field(self) -> str:
return "logical_date" if self.api_version == "v2" else "execution_date"
def _parse_response(self, response):
"""Parse response, handling both dict and Response objects"""
if hasattr(response, "json"):
try:
return response.json()
except Exception as exc:
logger.warning(f"Failed to parse JSON response: {exc}")
logger.warning(
f"Response content type: {response.headers.get('content-type')}"
)
logger.debug(f"Response status code: {response.status_code}")
logger.debug(f"Response text: {response.text[:500]}")
return {}
return response
def get_version(self) -> dict:
if self.mwaa_client:
return self.mwaa_client.get_version()
response = self.client.get(f"{self._prefix}/version")
return self._parse_response(response)
def list_dags(self, limit: int = 100, offset: int = 0) -> dict:
if self.mwaa_client:
return self.mwaa_client.list_dags(limit=limit, offset=offset)
response = self.client.get(f"{self._prefix}/dags?limit={limit}&offset={offset}")
return self._parse_response(response)
def get_dag_tasks(self, dag_id: str) -> dict:
if self.mwaa_client:
return self.mwaa_client.get_dag_tasks(dag_id)
response = self.client.get(
f"{self._prefix}/dags/{quote(dag_id, safe='')}/tasks"
)
return self._parse_response(response)
def list_dag_runs(self, dag_id: str, limit: int = 10) -> dict:
if self.mwaa_client:
return self.mwaa_client.list_dag_runs(dag_id, limit=limit)
response = self.client.get(
f"{self._prefix}/dags/{quote(dag_id, safe='')}/dagRuns"
f"?limit={limit}&order_by=-{self._date_field}"
)
return self._parse_response(response)
def get_task_instances(self, dag_id: str, dag_run_id: str) -> dict:
if self.mwaa_client:
return self.mwaa_client.get_task_instances(dag_id, dag_run_id)
response = self.client.get(
f"{self._prefix}/dags/{quote(dag_id, safe='')}"
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
)
return self._parse_response(response)
def _paginate(self, path: str, key: str, limit: int = 100) -> List[dict]:
result: List[dict] = []
offset = 0
total = limit
while offset < total:
separator = "&" if "?" in path else "?"
response = self.client.get(
f"{path}{separator}limit={limit}&offset={offset}"
)
response = self._parse_response(response)
if not response:
break
page = response.get(key, [])
if not page:
break
result.extend(page)
total = response.get("total_entries", len(result))
offset += limit
return result
def get_all_dags(self) -> List[dict]:
if self.mwaa_client:
return self.mwaa_client.get_all_dags()
return self._paginate(f"{self._prefix}/dags", key="dags")
def build_dag_details(self, dag_data: dict) -> AirflowApiDagDetails:
if self.mwaa_client:
return self.mwaa_client.build_dag_details(dag_data)
dag_id = dag_data["dag_id"]
tags_raw = dag_data.get("tags") or []
tags = []
for tag in tags_raw:
if isinstance(tag, dict):
name = tag.get("name")
elif isinstance(tag, str):
name = tag
else:
continue
if name:
tags.append(str(name))
owners = dag_data.get("owners") or []
if self.api_version == "v2":
schedule = dag_data.get("timetable_summary")
else:
schedule = dag_data.get("schedule_interval")
if isinstance(schedule, dict):
schedule = schedule.get("value")
try:
task_response = self.get_dag_tasks(dag_id)
tasks_data = task_response.get("tasks", [])
except Exception as exc:
logger.warning(f"Could not fetch tasks for DAG {dag_id}: {exc}")
tasks_data = []
tasks = [
AirflowApiTask(
task_id=t["task_id"],
downstream_task_ids=t.get("downstream_task_ids"),
owner=t.get("owner"),
doc_md=t.get("doc_md"),
start_date=t.get("start_date"),
end_date=t.get("end_date"),
class_ref=t.get("class_ref"),
)
for t in tasks_data
]
return AirflowApiDagDetails(
dag_id=dag_id,
description=dag_data.get("description"),
fileloc=dag_data.get("fileloc") or dag_data.get("file_loc"),
is_paused=dag_data.get("is_paused"),
owners=owners,
tags=tags,
schedule_interval=schedule,
max_active_runs=dag_data.get("max_active_runs"),
start_date=dag_data.get("start_date"),
tasks=tasks,
)
def get_dag_runs(self, dag_id: str, limit: int = 10) -> List[AirflowApiDagRun]:
if self.mwaa_client:
return self.mwaa_client.get_dag_runs(dag_id, limit=limit)
try:
response = self.list_dag_runs(dag_id, limit=limit)
runs_data = response.get("dag_runs", [])
except Exception as exc:
logger.warning(f"Could not fetch dag runs for {dag_id}: {exc}")
return []
result = []
for run in runs_data:
execution_date = run.get("logical_date") or run.get("execution_date")
result.append(
AirflowApiDagRun(
dag_run_id=run.get("dag_run_id", ""),
state=run.get("state"),
execution_date=execution_date,
start_date=run.get("start_date"),
end_date=run.get("end_date"),
)
)
return result
def get_task_instances_for_run(
self, dag_id: str, dag_run_id: str
) -> List[AirflowApiTaskInstance]:
if self.mwaa_client:
return self.mwaa_client.get_task_instances_for_run(dag_id, dag_run_id)
try:
path = (
f"{self._prefix}/dags/{quote(dag_id, safe='')}"
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
)
instances_data = self._paginate(path, key="task_instances")
except Exception as exc:
logger.warning(
f"Could not fetch task instances for {dag_id}/{dag_run_id}: {exc}"
)
return []
return [
AirflowApiTaskInstance(
task_id=ti.get("task_id", ""),
state=ti.get("state"),
start_date=ti.get("start_date"),
end_date=ti.get("end_date"),
)
for ti in instances_data
]

View file

@ -0,0 +1,63 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Pydantic models for Airflow REST API responses
"""
from datetime import datetime
from typing import Dict, List, Optional
from pydantic import BaseModel, ConfigDict
class AirflowApiTask(BaseModel):
model_config = ConfigDict(extra="allow")
task_id: str
downstream_task_ids: Optional[List[str]] = None
owner: Optional[str] = None
doc_md: Optional[str] = None
start_date: Optional[str] = None
end_date: Optional[str] = None
class_ref: Optional[Dict[str, str]] = None
class AirflowApiDagDetails(BaseModel):
model_config = ConfigDict(extra="allow")
dag_id: str
description: Optional[str] = None
fileloc: Optional[str] = None
is_paused: Optional[bool] = None
owners: Optional[List[str]] = None
tags: Optional[List[str]] = None
schedule_interval: Optional[str] = None
max_active_runs: Optional[int] = None
start_date: Optional[datetime] = None
tasks: List[AirflowApiTask] = []
class AirflowApiDagRun(BaseModel):
model_config = ConfigDict(extra="allow")
dag_run_id: str
state: Optional[str] = None
execution_date: Optional[datetime] = None
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
class AirflowApiTaskInstance(BaseModel):
model_config = ConfigDict(extra="allow")
task_id: str
state: Optional[str] = None
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None

View file

@ -0,0 +1,254 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MWAA (Managed Workflows for Apache Airflow) REST API implementation
Uses AWS MWAA invoke_rest_api for direct API calls without token management
"""
import json
import traceback
from typing import Dict, List, Optional
from urllib.parse import quote
from metadata.clients.aws_client import AWSClient
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
from metadata.ingestion.source.pipeline.airflow.api.models import (
AirflowApiDagDetails,
AirflowApiDagRun,
AirflowApiTask,
AirflowApiTaskInstance,
)
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
class MWAAClient:
"""
MWAA client that uses AWS MWAA invoke_rest_api for direct Airflow REST API calls.
This approach bypasses token management and uses AWS IAM permissions directly.
"""
def __init__(self, aws_credentials: AWSCredentials, environment_name: str):
self.aws_credentials = aws_credentials
self.environment_name = environment_name
self._aws_client = AWSClient(aws_credentials)
self._mwaa_client = self._aws_client.get_mwaa_client()
def _invoke_rest_api(
self,
path: str,
method: str = "GET",
body: Optional[Dict] = None,
query: Optional[Dict] = None,
) -> Dict:
"""
Invoke MWAA REST API using AWS MWAA invoke_rest_api method.
Args:
path: API path (e.g., "/dags")
method: HTTP method (GET, POST, etc.)
body: Request body for POST/PUT requests
query: Query parameters
Returns:
Response from Airflow REST API
"""
try:
params = {"Name": self.environment_name, "Path": path, "Method": method}
if body:
params["Body"] = json.dumps(body) if isinstance(body, dict) else body
if query:
params["QueryParameters"] = query
response = self._mwaa_client.invoke_rest_api(**params)
rest_api_response = response.get("RestApiResponse", {})
# Handle different response formats
if isinstance(rest_api_response, str):
try:
return json.loads(rest_api_response)
except json.JSONDecodeError:
logger.warning(
f"Failed to parse MWAA response as JSON: {rest_api_response}"
)
return {"raw_response": rest_api_response}
return rest_api_response
except Exception as e:
logger.error(f"MWAA REST API call failed for {path}: {e}")
logger.debug(traceback.format_exc())
raise
def get_version(self) -> Dict:
"""Get basic connection info - MWAA doesn't expose version endpoint"""
# Return a simple response to indicate connectivity
return {"version": "MWAA", "status": "connected"}
def list_dags(self, limit: int = 100, offset: int = 0) -> Dict:
"""List DAGs with pagination"""
query = {"limit": str(limit), "offset": str(offset)}
return self._invoke_rest_api("/dags", query=query)
def get_dag_tasks(self, dag_id: str) -> Dict:
"""Get tasks for a specific DAG"""
return self._invoke_rest_api(f"/dags/{quote(dag_id, safe='')}/tasks")
def list_dag_runs(self, dag_id: str, limit: int = 10) -> Dict:
"""List DAG runs for a specific DAG"""
query_param = "?order_by=-start_date"
query_param += f"&limit={limit}" if limit is not None else ""
return self._invoke_rest_api(
f"/dags/{quote(dag_id, safe='')}/dagRuns{query_param}",
)
def get_task_instances(self, dag_id: str, dag_run_id: str) -> Dict:
"""Get task instances for a specific DAG run"""
return self._invoke_rest_api(
f"/dags/{quote(dag_id, safe='')}"
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
)
def _paginate(self, path: str, key: str, limit: int = 100) -> List[Dict]:
"""Paginate through API results"""
result: List[Dict] = []
offset = 0
total = limit
while offset < total:
query = {"limit": str(limit), "offset": str(offset)}
response = self._invoke_rest_api(path, query=query)
if not response:
break
page = response.get(key, [])
if not page:
break
result.extend(page)
total = response.get("total_entries", len(result))
offset += limit
return result
def get_all_dags(self) -> List[Dict]:
"""Get all DAGs using pagination"""
return self._paginate("/dags", key="dags")
def build_dag_details(self, dag_data: Dict) -> AirflowApiDagDetails:
"""Build DAG details using existing model format"""
dag_id = dag_data["dag_id"]
# Parse tags
tags_raw = dag_data.get("tags") or []
tags = []
for tag in tags_raw:
if isinstance(tag, dict):
name = tag.get("name")
elif isinstance(tag, str):
name = tag
else:
continue
if name:
tags.append(str(name))
owners = dag_data.get("owners") or []
# Parse schedule - MWAA typically uses schedule_interval format
schedule = dag_data.get("schedule_interval")
if isinstance(schedule, dict):
schedule = schedule.get("value")
# Get tasks for the DAG
try:
task_response = self.get_dag_tasks(dag_id)
tasks_data = task_response.get("tasks", [])
except Exception as exc:
logger.warning(f"Could not fetch tasks for DAG {dag_id}: {exc}")
tasks_data = []
tasks = [
AirflowApiTask(
task_id=t["task_id"],
downstream_task_ids=t.get("downstream_task_ids"),
owner=t.get("owner"),
doc_md=t.get("doc_md"),
start_date=t.get("start_date"),
end_date=t.get("end_date"),
class_ref=t.get("class_ref"),
)
for t in tasks_data
]
return AirflowApiDagDetails(
dag_id=dag_id,
description=dag_data.get("description"),
fileloc=dag_data.get("fileloc") or dag_data.get("file_loc"),
is_paused=dag_data.get("is_paused"),
owners=owners,
tags=tags,
schedule_interval=schedule,
max_active_runs=dag_data.get("max_active_runs"),
start_date=dag_data.get("start_date"),
tasks=tasks,
)
def get_dag_runs(self, dag_id: str, limit: int = 10) -> List[AirflowApiDagRun]:
"""Get DAG runs using existing model format"""
try:
response = self.list_dag_runs(dag_id, limit=limit)
runs_data = response.get("dag_runs", [])
except Exception as exc:
logger.warning(f"Could not fetch dag runs for {dag_id}: {exc}")
return []
result = []
for run in runs_data:
execution_date = run.get("logical_date") or run.get("execution_date")
result.append(
AirflowApiDagRun(
dag_run_id=run.get("dag_run_id", ""),
state=run.get("state"),
execution_date=execution_date,
start_date=run.get("start_date"),
end_date=run.get("end_date"),
)
)
return result
def get_task_instances_for_run(
self, dag_id: str, dag_run_id: str
) -> List[AirflowApiTaskInstance]:
"""Get task instances using existing model format"""
try:
path = (
f"/dags/{quote(dag_id, safe='')}"
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
)
instances_data = self._paginate(path, key="task_instances")
except Exception as exc:
logger.warning(
f"Could not fetch task instances for {dag_id}/{dag_run_id}: {exc}"
)
return []
return [
AirflowApiTaskInstance(
task_id=ti.get("task_id", ""),
state=ti.get("state"),
start_date=ti.get("start_date"),
end_date=ti.get("end_date"),
)
for ti in instances_data
]

View file

@ -0,0 +1,271 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Airflow REST API source to extract metadata via Airflow REST API
"""
import traceback
from typing import Iterable, List, Optional
from urllib.parse import quote
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.entity.data.pipeline import (
Pipeline,
PipelineState,
PipelineStatus,
StatusType,
Task,
TaskStatus,
)
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
AirflowConnection,
)
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
StackTraceError,
)
from metadata.generated.schema.metadataIngestion.workflow import (
Source as WorkflowSource,
)
from metadata.generated.schema.type.basic import (
EntityName,
FullyQualifiedEntityName,
Markdown,
SourceUrl,
Timestamp,
)
from metadata.ingestion.api.models import Either
from metadata.ingestion.api.steps import InvalidSourceException
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.source.pipeline.airflow.api.models import AirflowApiDagDetails
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
from metadata.utils import fqn
from metadata.utils.helpers import clean_uri, datetime_to_ts
from metadata.utils.logger import ingestion_logger
from metadata.utils.tag_utils import get_ometa_tag_and_classification, get_tag_labels
logger = ingestion_logger()
AIRFLOW_TAG_CATEGORY = "AirflowTags"
STATUS_MAP = {
"success": StatusType.Successful.value,
"failed": StatusType.Failed.value,
"queued": StatusType.Pending.value,
"skipped": StatusType.Skipped.value,
"running": StatusType.Pending.value,
"upstream_failed": StatusType.Failed.value,
}
class AirflowApiSource(PipelineServiceSource):
"""
Implements the necessary methods to extract
Pipeline metadata from Airflow's REST API
"""
@classmethod
def create(
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
) -> "AirflowApiSource":
config: WorkflowSource = WorkflowSource.model_validate(config_dict)
connection: AirflowConnection = config.serviceConnection.root.config
if not isinstance(connection, AirflowConnection):
raise InvalidSourceException(
f"Expected AirflowConnection, but got {connection}"
)
return cls(config, metadata)
def get_pipelines_list(self) -> Iterable[AirflowApiDagDetails]:
all_dags = self.connection.get_all_dags()
for dag_data in all_dags:
try:
yield self.connection.build_dag_details(dag_data)
except Exception as exc:
logger.debug(traceback.format_exc())
logger.warning(
f"Error building DAG details for {dag_data.get('dag_id')}: {exc}"
)
def get_pipeline_name(self, pipeline_details: AirflowApiDagDetails) -> str:
return pipeline_details.dag_id
def get_pipeline_state(
self, pipeline_details: AirflowApiDagDetails
) -> Optional[PipelineState]:
if pipeline_details.is_paused is None:
return None
return (
PipelineState.Inactive
if pipeline_details.is_paused
else PipelineState.Active
)
def _get_task_source_url(self, dag_id: str, task_id: str) -> str:
host = clean_uri(self.service_connection.hostPort)
if self.connection.api_version == "v2":
return f"{host}/dags/{quote(dag_id)}/tasks/{quote(task_id)}"
return (
f"{host}/taskinstance/list/"
f"?_flt_3_dag_id={quote(dag_id)}&_flt_3_task_id={quote(task_id)}"
)
def _get_dag_source_url(self, dag_id: str) -> str:
host = clean_uri(self.service_connection.hostPort)
if self.connection.api_version == "v2":
return f"{host}/dags/{quote(dag_id)}"
return f"{host}/dags/{quote(dag_id)}/grid"
def _build_tasks(self, dag_details: AirflowApiDagDetails) -> List[Task]:
return [
Task(
name=task.task_id,
description=Markdown(task.doc_md) if task.doc_md else None,
sourceUrl=SourceUrl(
self._get_task_source_url(dag_details.dag_id, task.task_id)
),
downstreamTasks=task.downstream_task_ids or [],
startDate=task.start_date,
endDate=task.end_date,
taskType=task.class_ref.get("class_name") if task.class_ref else None,
)
for task in dag_details.tasks
]
def yield_pipeline(
self, pipeline_details: AirflowApiDagDetails
) -> Iterable[Either[CreatePipelineRequest]]:
try:
pipeline_request = CreatePipelineRequest(
name=EntityName(pipeline_details.dag_id),
description=Markdown(pipeline_details.description)
if pipeline_details.description
else None,
sourceUrl=SourceUrl(self._get_dag_source_url(pipeline_details.dag_id)),
state=self.get_pipeline_state(pipeline_details),
concurrency=pipeline_details.max_active_runs,
pipelineLocation=pipeline_details.fileloc,
startDate=pipeline_details.start_date.isoformat()
if pipeline_details.start_date
else None,
tasks=self._build_tasks(pipeline_details),
service=FullyQualifiedEntityName(self.context.get().pipeline_service),
scheduleInterval=pipeline_details.schedule_interval,
tags=get_tag_labels(
metadata=self.metadata,
tags=pipeline_details.tags or [],
classification_name=AIRFLOW_TAG_CATEGORY,
include_tags=self.source_config.includeTags,
),
)
yield Either(right=pipeline_request)
self.register_record(pipeline_request=pipeline_request)
self.context.get().task_names = {
task.name for task in pipeline_request.tasks or []
}
except Exception as exc:
self.context.get().task_names = set()
yield Either(
left=StackTraceError(
name=pipeline_details.dag_id,
error=f"Error building pipeline from {pipeline_details.dag_id}: {exc}",
stackTrace=traceback.format_exc(),
)
)
def yield_pipeline_status(
self, pipeline_details: AirflowApiDagDetails
) -> Iterable[Either[OMetaPipelineStatus]]:
try:
num_status = self.service_connection.numberOfStatus or 10
dag_runs = self.connection.get_dag_runs(
pipeline_details.dag_id, limit=num_status
)
for dag_run in dag_runs:
if not dag_run.dag_run_id or not self.context.get().task_names:
continue
task_instances = self.connection.get_task_instances_for_run(
pipeline_details.dag_id, dag_run.dag_run_id
)
task_statuses = [
TaskStatus(
name=ti.task_id,
executionStatus=STATUS_MAP.get(
ti.state, StatusType.Pending.value
),
startTime=datetime_to_ts(ti.start_date),
endTime=datetime_to_ts(ti.end_date),
)
for ti in task_instances
if ti.task_id in self.context.get().task_names
]
timestamp = datetime_to_ts(dag_run.execution_date)
if timestamp is None:
timestamp = datetime_to_ts(dag_run.start_date)
if timestamp is None:
timestamp = datetime_to_ts(dag_run.end_date)
if timestamp is None:
logger.debug(
"Skipping DAG run %s for %s — no timestamp available",
dag_run.dag_run_id,
pipeline_details.dag_id,
)
continue
pipeline_status = PipelineStatus(
executionId=dag_run.dag_run_id,
taskStatus=task_statuses,
executionStatus=STATUS_MAP.get(
dag_run.state, StatusType.Pending.value
),
timestamp=Timestamp(timestamp),
)
pipeline_fqn = fqn.build(
metadata=self.metadata,
entity_type=Pipeline,
service_name=self.context.get().pipeline_service,
pipeline_name=self.context.get().pipeline,
)
yield Either(
right=OMetaPipelineStatus(
pipeline_fqn=pipeline_fqn,
pipeline_status=pipeline_status,
)
)
except Exception as exc:
yield Either(
left=StackTraceError(
name=f"{pipeline_details.dag_id} Pipeline Status",
error=f"Error extracting status for DAG {pipeline_details.dag_id}: {exc}",
stackTrace=traceback.format_exc(),
)
)
def yield_pipeline_lineage_details(
self, pipeline_details: AirflowApiDagDetails
) -> Iterable[Either[AddLineageRequest]]:
return []
def yield_tag(
self, pipeline_details: AirflowApiDagDetails
) -> Iterable[Either[OMetaTagAndClassification]]:
yield from get_ometa_tag_and_classification(
tags=pipeline_details.tags or [],
classification_name=AIRFLOW_TAG_CATEGORY,
tag_description="Airflow Tag",
classification_description="Tags associated with airflow entities.",
include_tags=self.source_config.includeTags,
)

View file

@ -188,10 +188,21 @@ def _(airflow_connection: SQLiteConnection) -> Engine:
return get_sqlite_connection(airflow_connection)
def get_connection(connection: AirflowConnection) -> Engine:
def get_connection(connection: AirflowConnection):
"""
Create connection
"""
from metadata.generated.schema.entity.utils.airflowRestApiConnection import ( # pylint: disable=import-outside-toplevel
AirflowRestApiConnection,
)
if isinstance(connection.connection, AirflowRestApiConnection):
from metadata.ingestion.source.pipeline.airflow.api.client import ( # pylint: disable=import-outside-toplevel
AirflowApiClient,
)
return AirflowApiClient(connection)
try:
return _get_connection(connection.connection)
except Exception as exc:
@ -211,9 +222,30 @@ class AirflowTaskDetailsAccessError(Exception):
"""
def _test_api_connection(
metadata: OpenMetadata,
client,
service_connection: AirflowConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
timeout_seconds: Optional[int] = THREE_MIN,
) -> TestConnectionResult:
test_fn = {
"CheckAccess": client.get_version,
"PipelineDetailsAccess": lambda: client.list_dags(limit=1),
"TaskDetailAccess": lambda: True,
}
return test_connection_steps(
metadata=metadata,
test_fn=test_fn,
service_type=service_connection.type.value,
automation_workflow=automation_workflow,
timeout_seconds=timeout_seconds,
)
def test_connection(
metadata: OpenMetadata,
engine: Engine,
connection_obj,
service_connection: AirflowConnection,
automation_workflow: Optional[AutomationWorkflow] = None,
timeout_seconds: Optional[int] = THREE_MIN,
@ -222,8 +254,20 @@ def test_connection(
Test connection. This can be executed either as part
of a metadata workflow or during an Automation Workflow
"""
from metadata.generated.schema.entity.utils.airflowRestApiConnection import ( # pylint: disable=import-outside-toplevel
AirflowRestApiConnection,
)
session_maker = sessionmaker(bind=engine)
if isinstance(service_connection.connection, AirflowRestApiConnection):
return _test_api_connection(
metadata,
connection_obj,
service_connection,
automation_workflow,
timeout_seconds,
)
session_maker = sessionmaker(bind=connection_obj)
session = session_maker()
def test_pipeline_details_access(session):
@ -252,7 +296,7 @@ def test_connection(
raise AirflowTaskDetailsAccessError(f"Task details access error : {e}")
test_fn = {
"CheckAccess": partial(test_connection_engine_step, engine),
"CheckAccess": partial(test_connection_engine_step, connection_obj),
"PipelineDetailsAccess": partial(test_pipeline_details_access, session),
"TaskDetailAccess": partial(test_task_detail_access, session),
}

View file

@ -200,13 +200,23 @@ class AirflowSource(PipelineServiceSource):
@classmethod
def create(
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
) -> "AirflowSource":
):
from metadata.generated.schema.entity.utils.airflowRestApiConnection import (
AirflowRestApiConnection,
)
config: WorkflowSource = WorkflowSource.model_validate(config_dict)
connection: AirflowConnection = config.serviceConnection.root.config
if not isinstance(connection, AirflowConnection):
raise InvalidSourceException(
f"Expected AirflowConnection, but got {connection}"
)
if isinstance(connection.connection, AirflowRestApiConnection):
from metadata.ingestion.source.pipeline.airflow.api.source import (
AirflowApiSource,
)
return AirflowApiSource(config, metadata)
return cls(config, metadata)
@property

View file

@ -0,0 +1,993 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Comprehensive mocked integration test for Airflow API connector.
This test validates the complete Airflow integration flow without requiring
real Airflow or OpenMetadata services, making it suitable for CI/CD environments.
Tests covered:
- Airflow API client functionality with all authentication methods
- DAG metadata extraction and parsing
- Task extraction and relationship mapping
- DAG run status processing
- Pipeline entity creation in OpenMetadata
- Error handling and edge cases
- OpenLineage integration scenarios
"""
import uuid
from datetime import datetime, timezone
from unittest.mock import MagicMock, Mock, patch
import pytest
import requests
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
AirflowConnection,
)
from metadata.generated.schema.entity.utils.airflowRestApiConnection import (
AirflowRestApiConnection,
)
from metadata.generated.schema.entity.utils.common import (
accessTokenConfig,
basicAuthConfig,
)
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
from metadata.workflow.metadata import MetadataWorkflow
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
_TRACKED_REST_PATH = "metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST"
_BASIC_AUTH_CALLBACK_PATH = (
"metadata.ingestion.source.pipeline.airflow.api.client.build_basic_auth_callback"
)
def _make_access_token_config(token: str = "test_token") -> AirflowRestApiConnection:
"""Helper build a RestAPI config using a static access token."""
return AirflowRestApiConnection(
type="RestAPI",
authConfig=accessTokenConfig.AccessToken(token=token),
)
def _make_airflow_connection(token: str = "test_token") -> AirflowConnection:
"""Helper build a full AirflowConnection using a static access token."""
return AirflowConnection(
hostPort="http://localhost:8080",
connection=_make_access_token_config(token),
)
class TestAirflowApiMockedIntegration:
"""Comprehensive mocked integration tests for Airflow API connector."""
@pytest.fixture
def mock_airflow_responses(self):
"""Mock responses for various Airflow API endpoints with Airflow 3.x data structures."""
return {
"version": {"version": "3.0.1", "git_version": "abc123def456"},
"dags": {
"dags": [
{
"dag_id": "sample_etl_dag",
"description": "Sample ETL pipeline",
"fileloc": "/opt/airflow/dags/sample_etl.py",
"file_token": "abc123def456",
"is_paused": False,
"is_active": True,
"is_subdag": False,
"owners": ["data_team"],
"tags": [{"name": "etl"}, {"name": "daily"}],
"schedule_interval": {
"__type": "CronExpression",
"value": "@daily",
},
"timetable_summary": "At 00:00 every day",
"catchup": True,
"max_active_runs": 1,
"max_consecutive_failed_dag_runs": 5,
"has_task_concurrency_limits": False,
"has_import_errors": False,
"next_dagrun": "2024-01-02T00:00:00Z",
"next_dagrun_data_interval_start": "2024-01-01T00:00:00Z",
"next_dagrun_data_interval_end": "2024-01-02T00:00:00Z",
"next_dagrun_create_after": "2024-01-02T00:00:00Z",
"doc_md": "Sample ETL pipeline documentation",
"default_view": "graph",
"orientation": "LR",
"dataset_triggers": [],
"params": {"env": "production"},
"start_date": "2024-01-01T00:00:00Z",
},
{
"dag_id": "ml_training_pipeline",
"description": "ML model training pipeline",
"fileloc": "/opt/airflow/dags/ml_training.py",
"file_token": "def456ghi789",
"is_paused": True,
"is_active": True,
"is_subdag": False,
"owners": ["ml_team"],
"tags": [{"name": "ml"}, {"name": "weekly"}],
"schedule_interval": {
"__type": "CronExpression",
"value": "0 0 * * 1",
},
"timetable_summary": "At 00:00 on Monday",
"catchup": False,
"max_active_runs": 2,
"max_consecutive_failed_dag_runs": 3,
"has_task_concurrency_limits": True,
"has_import_errors": False,
"next_dagrun": None,
"next_dagrun_data_interval_start": None,
"next_dagrun_data_interval_end": None,
"next_dagrun_create_after": None,
"doc_md": None,
"default_view": "graph",
"orientation": "TB",
"dataset_triggers": [
{
"uri": "s3://ml-data/training/",
"extra": {"bucket": "ml-data", "prefix": "training/"},
}
],
"params": {"model_type": "xgboost"},
"start_date": "2024-01-01T00:00:00Z",
},
],
"total_entries": 2,
},
"tasks": {
"sample_etl_dag": {
"tasks": [
{
"task_id": "extract_data",
"task_display_name": "Extract Data from Source",
"operator_name": "S3KeySensor",
"operator_class_name": "airflow.providers.amazon.aws.sensors.s3.S3KeySensor",
"downstream_task_ids": ["transform_data"],
"upstream_task_ids": [],
"owner": "data_team",
"start_date": "2024-01-01T00:00:00Z",
"end_date": None,
"depends_on_past": False,
"wait_for_downstream": False,
"retries": 3,
"retry_delay": {
"__type": "TimeDelta",
"days": 0,
"seconds": 300,
},
"retry_exponential_backoff": False,
"max_retry_delay": None,
"priority_weight": 1,
"weight_rule": "downstream",
"queue": "default",
"pool": "default_pool",
"pool_slots": 1,
"execution_timeout": {
"__type": "TimeDelta",
"days": 0,
"seconds": 3600,
},
"trigger_rule": "all_success",
"ui_color": "#f0ede4",
"ui_fgcolor": "#000000",
"template_fields": ["bucket_key", "bucket_name"],
"doc_md": "Extracts data from S3 source",
"params": {"bucket_name": "data-lake", "timeout": 3600},
"extra_links": [],
"owner_links": {},
},
{
"task_id": "transform_data",
"task_display_name": "Transform Data with dbt",
"operator_name": "DbtRunOperator",
"operator_class_name": "airflow_dbt.operators.dbt_run_operator.DbtRunOperator",
"downstream_task_ids": ["load_data"],
"upstream_task_ids": ["extract_data"],
"owner": "data_team",
"start_date": "2024-01-01T00:00:00Z",
"end_date": None,
"depends_on_past": True,
"wait_for_downstream": False,
"retries": 2,
"retry_delay": {
"__type": "TimeDelta",
"days": 0,
"seconds": 600,
},
"retry_exponential_backoff": False,
"max_retry_delay": None,
"priority_weight": 5,
"weight_rule": "absolute",
"queue": "dbt_queue",
"pool": "dbt_pool",
"pool_slots": 2,
"execution_timeout": {
"__type": "TimeDelta",
"days": 0,
"seconds": 7200,
},
"trigger_rule": "all_success",
"ui_color": "#8194C7",
"ui_fgcolor": "#FFFFFF",
"template_fields": ["models", "vars"],
"doc_md": "Transforms data using dbt models",
"params": {
"models": "staging",
"vars": {"run_date": "{{ ds }}"},
},
"extra_links": [],
"owner_links": {},
},
{
"task_id": "load_data",
"task_display_name": "Load Data to Warehouse",
"operator_name": "SnowflakeOperator",
"operator_class_name": "airflow.providers.snowflake.operators.snowflake.SnowflakeOperator",
"downstream_task_ids": [],
"upstream_task_ids": ["transform_data"],
"owner": "data_team",
"start_date": "2024-01-01T00:00:00Z",
"end_date": None,
"depends_on_past": False,
"wait_for_downstream": False,
"retries": 1,
"retry_delay": {
"__type": "TimeDelta",
"days": 0,
"seconds": 300,
},
"retry_exponential_backoff": False,
"max_retry_delay": None,
"priority_weight": 3,
"weight_rule": "downstream",
"queue": "warehouse_queue",
"pool": "snowflake_pool",
"pool_slots": 1,
"execution_timeout": {
"__type": "TimeDelta",
"days": 0,
"seconds": 1800,
},
"trigger_rule": "all_success",
"ui_color": "#EDEDED",
"ui_fgcolor": "#000000",
"template_fields": ["sql"],
"doc_md": "Loads transformed data to Snowflake warehouse",
"params": {"database": "analytics", "schema": "public"},
"extra_links": [],
"owner_links": {},
},
]
}
},
"dag_runs": {
"sample_etl_dag": {
"dag_runs": [
{
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
"dag_id": "sample_etl_dag",
"logical_date": "2024-01-01T00:00:00Z",
"execution_date": "2024-01-01T00:00:00Z",
"start_date": "2024-01-01T00:01:00Z",
"end_date": "2024-01-01T00:15:00Z",
"data_interval_start": "2024-01-01T00:00:00Z",
"data_interval_end": "2024-01-02T00:00:00Z",
"last_scheduling_decision": "2024-01-01T00:00:30Z",
"run_type": "scheduled",
"state": "success",
"external_trigger": False,
"triggering_dataset_events": [],
"conf": {},
"note": "Completed successfully",
},
{
"dag_run_id": "scheduled__2024-01-02T00:00:00+00:00",
"dag_id": "sample_etl_dag",
"logical_date": "2024-01-02T00:00:00Z",
"execution_date": "2024-01-02T00:00:00Z",
"start_date": "2024-01-02T00:01:00Z",
"end_date": None,
"data_interval_start": "2024-01-02T00:00:00Z",
"data_interval_end": "2024-01-03T00:00:00Z",
"last_scheduling_decision": "2024-01-02T00:00:30Z",
"run_type": "scheduled",
"state": "running",
"external_trigger": False,
"triggering_dataset_events": [],
"conf": {},
"note": "Currently running",
},
],
"total_entries": 2,
}
},
"task_instances": {
"sample_etl_dag": {
"scheduled__2024-01-01T00:00:00+00:00": {
"task_instances": [
{
"task_id": "extract_data",
"dag_id": "sample_etl_dag",
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
"logical_date": "2024-01-01T00:00:00Z",
"execution_date": "2024-01-01T00:00:00Z",
"start_date": "2024-01-01T00:01:00Z",
"end_date": "2024-01-01T00:05:00Z",
"duration": 240.0,
"state": "success",
"try_number": 1,
"max_tries": 3,
"hostname": "worker-1",
"unixname": "airflow",
"job_id": 12345,
"pool": "default_pool",
"pool_slots": 1,
"queue": "default",
"priority_weight": 1,
"operator": "S3KeySensor",
"operator_class": "airflow.providers.amazon.aws.sensors.s3.S3KeySensor",
"queued_dttm": "2024-01-01T00:01:00Z",
"queued_by_job_id": None,
"pid": 1234,
"executor": "CeleryExecutor",
"executor_config": {},
"sla_miss": None,
"rendered_fields": {
"bucket_name": "data-lake",
"bucket_key": "raw/2024-01-01/",
},
"test_mode": False,
"trigger": None,
"triggerer_job": None,
"note": "Successfully detected new files",
},
{
"task_id": "transform_data",
"dag_id": "sample_etl_dag",
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
"logical_date": "2024-01-01T00:00:00Z",
"execution_date": "2024-01-01T00:00:00Z",
"start_date": "2024-01-01T00:05:00Z",
"end_date": "2024-01-01T00:10:00Z",
"duration": 300.0,
"state": "success",
"try_number": 1,
"max_tries": 2,
"hostname": "worker-2",
"unixname": "airflow",
"job_id": 12346,
"pool": "dbt_pool",
"pool_slots": 2,
"queue": "dbt_queue",
"priority_weight": 5,
"operator": "DbtRunOperator",
"operator_class": "airflow_dbt.operators.dbt_run_operator.DbtRunOperator",
"queued_dttm": "2024-01-01T00:05:00Z",
"queued_by_job_id": 12345,
"pid": 1235,
"executor": "CeleryExecutor",
"executor_config": {},
"sla_miss": None,
"rendered_fields": {
"models": "staging",
"vars": {"run_date": "2024-01-01"},
},
"test_mode": False,
"trigger": None,
"triggerer_job": None,
"note": "dbt models executed successfully",
},
{
"task_id": "load_data",
"dag_id": "sample_etl_dag",
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
"logical_date": "2024-01-01T00:00:00Z",
"execution_date": "2024-01-01T00:00:00Z",
"start_date": "2024-01-01T00:10:00Z",
"end_date": "2024-01-01T00:15:00Z",
"duration": 300.0,
"state": "success",
"try_number": 1,
"max_tries": 1,
"hostname": "worker-1",
"unixname": "airflow",
"job_id": 12347,
"pool": "snowflake_pool",
"pool_slots": 1,
"queue": "warehouse_queue",
"priority_weight": 3,
"operator": "SnowflakeOperator",
"operator_class": "airflow.providers.snowflake.operators.snowflake.SnowflakeOperator",
"queued_dttm": "2024-01-01T00:10:00Z",
"queued_by_job_id": 12346,
"pid": 1236,
"executor": "CeleryExecutor",
"executor_config": {},
"sla_miss": None,
"rendered_fields": {
"sql": "INSERT INTO analytics.public.fact_table SELECT * FROM staging.transformed_data"
},
"test_mode": False,
"trigger": None,
"triggerer_job": None,
"note": "Data loaded to Snowflake successfully",
},
]
}
}
},
}
@pytest.fixture
def mock_openmetadata_client(self):
"""Mock OpenMetadata client for testing."""
mock_client = MagicMock()
mock_client.health_check.return_value = True
# Mock service creation
mock_service = MagicMock()
mock_service.id = MagicMock()
mock_service.id.root = str(uuid.uuid4())
mock_service.fullyQualifiedName = MagicMock()
mock_service.fullyQualifiedName.root = "airflow_service"
mock_client.create_or_update.return_value = mock_service
mock_client.get_by_name.return_value = mock_service
return mock_client
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
@staticmethod
def _fake_rest(mock_tracked_rest_cls, responses):
"""
Configure the mock TrackedREST instance's .get() to return *responses*.
*responses* can be:
- a single value always returns that value
- a list returns items one-by-one (side_effect)
- an exception raises it on every call
"""
mock_instance = mock_tracked_rest_cls.return_value
if isinstance(responses, list):
mock_instance.get.side_effect = responses
elif isinstance(responses, Exception):
mock_instance.get.side_effect = responses
else:
mock_instance.get.return_value = responses
return mock_instance
def test_airflow_client_token_authentication(self, mock_airflow_responses):
"""Test Airflow client with token-based authentication."""
config = _make_airflow_connection(token="test_token_123")
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
# _detect_api_version calls /v2/version first; make it succeed so
# the client settles on "v2", then get_version() calls /v2/version again.
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses[
"version"
], # _detect_api_version /v2/version
mock_airflow_responses["version"], # get_version()
],
)
airflow_client = AirflowApiClient(config)
version = airflow_client.get_version()
assert version["version"] == "3.0.1"
mock_tracked_rest_cls.return_value.get.assert_called()
def test_airflow_client_basic_authentication(self, mock_airflow_responses):
"""Test Airflow client with basic authentication."""
config = AirflowConnection(
hostPort="http://localhost:8080",
connection=AirflowRestApiConnection(
type="RestAPI",
authConfig=basicAuthConfig.BasicAuth(
username="admin", password="admin123"
),
),
)
# build_basic_auth_callback calls try_exchange_jwt (a real HTTP POST).
# Patch it to return a dummy (callback, None) tuple.
dummy_callback = lambda: ("Basic YWRtaW46YWRtaW4xMjM=", 7 * 24 * 3600)
with (
patch(_BASIC_AUTH_CALLBACK_PATH, return_value=(dummy_callback, None)),
patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls,
):
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
mock_airflow_responses["version"], # get_version()
],
)
airflow_client = AirflowApiClient(config)
version = airflow_client.get_version()
assert version["version"] == "3.0.1"
mock_tracked_rest_cls.return_value.get.assert_called()
def test_airflow_api_version_detection(self, mock_airflow_responses):
"""Test API version detection logic."""
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses[
"version"
], # _detect_api_version /v2/version
mock_airflow_responses["version"], # get_version()
],
)
airflow_client = AirflowApiClient(config)
version = airflow_client.get_version()
assert version["version"] == "3.0.1"
assert "git_version" in version
def test_dag_metadata_extraction_and_parsing(self, mock_airflow_responses):
"""Test comprehensive DAG metadata extraction with Airflow 3.x data."""
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
mock_airflow_responses["dags"], # _paginate → list_dags (page 1)
mock_airflow_responses["tasks"][
"sample_etl_dag"
], # build_dag_details → get_dag_tasks
],
)
airflow_client = AirflowApiClient(config)
# Test DAG listing
dags = airflow_client.get_all_dags()
assert len(dags) == 2
assert dags[0]["dag_id"] == "sample_etl_dag"
assert dags[1]["dag_id"] == "ml_training_pipeline"
# Verify Airflow 3.x specific fields
dag1 = dags[0]
assert "file_token" in dag1
assert "is_active" in dag1
assert "has_task_concurrency_limits" in dag1
assert "has_import_errors" in dag1
assert "timetable_summary" in dag1
assert "dataset_triggers" in dag1
assert "params" in dag1
# Verify modern schedule format
assert dag1["schedule_interval"]["__type"] == "CronExpression"
assert dag1["schedule_interval"]["value"] == "@daily"
# Test DAG details building (calls get_dag_tasks internally)
dag_details = airflow_client.build_dag_details(dag1)
# Verify basic metadata
assert dag_details.dag_id == "sample_etl_dag"
assert dag_details.description == "Sample ETL pipeline"
assert dag_details.fileloc == "/opt/airflow/dags/sample_etl.py"
assert dag_details.is_paused == False
assert dag_details.owners == ["data_team"]
# Verify tags parsing
assert "etl" in dag_details.tags
assert "daily" in dag_details.tags
# Verify tasks extraction with Airflow 3.x structure
assert len(dag_details.tasks) == 3
task_ids = [task.task_id for task in dag_details.tasks]
assert "extract_data" in task_ids
assert "transform_data" in task_ids
assert "load_data" in task_ids
# Verify modern task fields
extract_task = next(
t for t in dag_details.tasks if t.task_id == "extract_data"
)
assert hasattr(extract_task, "downstream_task_ids")
assert "transform_data" in extract_task.downstream_task_ids
def test_dag_runs_and_status_processing(self, mock_airflow_responses):
"""Test DAG run status extraction and processing with Airflow 3.x data.
NOTE: ``get_dag_runs`` returns a list of ``AirflowApiDagRun`` model
objects (not raw dicts), so attribute access is used below.
"""
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
mock_airflow_responses["dag_runs"][
"sample_etl_dag"
], # list_dag_runs
],
)
airflow_client = AirflowApiClient(config)
dag_runs = airflow_client.get_dag_runs("sample_etl_dag", limit=10)
assert len(dag_runs) == 2
# AirflowApiDagRun is a Pydantic model use attribute access.
run1 = dag_runs[0]
assert run1.dag_run_id == "scheduled__2024-01-01T00:00:00+00:00"
assert run1.state == "success"
# execution_date is parsed from logical_date (ISO string → datetime)
assert run1.execution_date is not None
run2 = dag_runs[1]
assert run2.dag_run_id == "scheduled__2024-01-02T00:00:00+00:00"
assert run2.state == "running"
assert run2.execution_date is not None
def test_task_instance_extraction(self, mock_airflow_responses):
"""Test task instance extraction and processing with Airflow 3.x data.
``get_task_instances_for_run`` (paginated helper) returns a list of
``AirflowApiTaskInstance`` model objects use attribute access.
The lower-level ``get_task_instances`` returns the raw API dict.
"""
config = _make_airflow_connection()
run_id = "scheduled__2024-01-01T00:00:00+00:00"
raw_ti_response = mock_airflow_responses["task_instances"]["sample_etl_dag"][
run_id
]
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
raw_ti_response, # _paginate → task instances page 1
],
)
airflow_client = AirflowApiClient(config)
task_instances = airflow_client.get_task_instances_for_run(
"sample_etl_dag", run_id
)
assert len(task_instances) == 3
# AirflowApiTaskInstance is a Pydantic model use attribute access.
extract_instance = next(
ti for ti in task_instances if ti.task_id == "extract_data"
)
assert extract_instance.state == "success"
assert extract_instance.start_date is not None
assert extract_instance.end_date is not None
def test_error_handling_and_edge_cases(self):
"""Test error handling for various failure scenarios."""
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
mock_rest = mock_tracked_rest_cls.return_value
# _detect_api_version will raise ConnectionError on /v2/version → re-raised
mock_rest.get.side_effect = requests.exceptions.ConnectionError(
"Connection refused"
)
airflow_client = AirflowApiClient(config)
with pytest.raises(requests.exceptions.ConnectionError):
# api_version property triggers _detect_api_version which calls client.get
airflow_client.get_version()
# Reset: now return a valid response so get_version() works
mock_rest.get.side_effect = None
mock_rest.get.return_value = {"version": "3.0.1"}
# Force re-detection (clear cached version)
airflow_client._detected_version = "v1"
result = airflow_client.get_version()
assert result["version"] == "3.0.1"
def test_full_workflow_integration(
self, mock_airflow_responses, mock_openmetadata_client
):
"""Test complete workflow from Airflow ingestion to OM entity creation."""
workflow_config = {
"source": {
"type": "airflow",
"serviceName": "test_airflow_service",
"serviceConnection": {
"config": {
"type": "Airflow",
"hostPort": "http://localhost:8080",
"numberOfStatus": 5,
"connection": {
"type": "RestAPI",
"authConfig": {"token": "test_token"},
},
}
},
"sourceConfig": {"config": {"type": "PipelineMetadata"}},
},
"sink": {"type": "metadata-rest", "config": {}},
"workflowConfig": {
"loggerLevel": "INFO",
"openMetadataServerConfig": {
"hostPort": "http://localhost:8585/api",
"authProvider": "openmetadata",
"securityConfig": {"jwtToken": "test-jwt-token"},
},
},
}
with (
patch(
"metadata.workflow.base.create_ometa_client",
return_value=mock_openmetadata_client,
),
patch(
"metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.test_connection"
),
patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls,
):
# The workflow will detect version, list dags, fetch tasks, runs, task instances
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
mock_airflow_responses["dags"], # get_all_dags page 1
mock_airflow_responses["tasks"]["sample_etl_dag"], # dag tasks
mock_airflow_responses["dag_runs"]["sample_etl_dag"], # dag runs
mock_airflow_responses["task_instances"]["sample_etl_dag"][
"scheduled__2024-01-01T00:00:00+00:00"
], # task instances page 1
],
)
workflow = MetadataWorkflow.create(workflow_config)
workflow.execute()
workflow.stop()
assert mock_openmetadata_client.create_or_update.called
create_calls = mock_openmetadata_client.create_or_update.call_args_list
assert len(create_calls) > 0
def test_openlineage_integration_scenarios(self):
"""Test OpenLineage event handling scenarios."""
ol_event = {
"eventType": "COMPLETE",
"eventTime": datetime.now(timezone.utc).isoformat(),
"producer": "https://airflow.apache.org",
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
"run": {"runId": str(uuid.uuid4())},
"job": {"namespace": "airflow", "name": "sample_etl_dag"},
"inputs": [{"namespace": "postgres", "name": "public.source_table"}],
"outputs": [{"namespace": "postgres", "name": "public.target_table"}],
}
with patch("requests.post") as mock_post:
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"status": "success",
"lineageEdgesCreated": 1,
}
mock_post.return_value = mock_response
response = mock_post(
"http://localhost:8585/api/v1/openlineage/lineage",
headers={
"Authorization": "Bearer test",
"Content-Type": "application/json",
},
json=ol_event,
)
result = response.json()
assert result["status"] == "success"
assert result["lineageEdgesCreated"] == 1
def test_airflow_3x_compatibility(self, mock_airflow_responses):
"""Test Airflow 3.x specific features and compatibility."""
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
mock_airflow_responses["version"], # get_version()
mock_airflow_responses["dags"], # get_all_dags page 1
],
)
airflow_client = AirflowApiClient(config)
# Test version detection
version = airflow_client.get_version()
assert version["version"] == "3.0.1"
# Test DAGs with Airflow 3.x features
dags = airflow_client.get_all_dags()
# Verify dataset triggers in ML pipeline
ml_dag = next(
dag for dag in dags if dag["dag_id"] == "ml_training_pipeline"
)
assert "dataset_triggers" in ml_dag
assert len(ml_dag["dataset_triggers"]) == 1
assert ml_dag["dataset_triggers"][0]["uri"] == "s3://ml-data/training/"
# Verify modern schedule format
assert "schedule_interval" in ml_dag
assert ml_dag["schedule_interval"]["__type"] == "CronExpression"
assert ml_dag["schedule_interval"]["value"] == "0 0 * * 1"
# Verify timetable summary
assert ml_dag["timetable_summary"] == "At 00:00 on Monday"
# Verify Airflow 3.x metadata fields
assert "file_token" in ml_dag
assert "has_task_concurrency_limits" in ml_dag
assert "has_import_errors" in ml_dag
assert "next_dagrun_create_after" in ml_dag
def test_pagination_handling(self, mock_airflow_responses):
"""Test pagination for large DAG lists."""
config = _make_airflow_connection()
page1_response = {
"dags": [
{
"dag_id": f"dag_{i}",
"description": f"DAG {i}",
"file_token": f"token_{i}",
"is_active": True,
"tags": [],
"schedule_interval": {
"__type": "CronExpression",
"value": "@daily",
},
"timetable_summary": "At 00:00 every day",
"dataset_triggers": [],
}
for i in range(100)
],
"total_entries": 150,
}
page2_response = {
"dags": [
{
"dag_id": f"dag_{i}",
"description": f"DAG {i}",
"file_token": f"token_{i}",
"is_active": True,
"tags": [],
"schedule_interval": {
"__type": "CronExpression",
"value": "@daily",
},
"timetable_summary": "At 00:00 every day",
"dataset_triggers": [],
}
for i in range(100, 150)
],
"total_entries": 150,
}
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
page1_response, # _paginate page 1
page2_response, # _paginate page 2
],
)
airflow_client = AirflowApiClient(config)
all_dags = airflow_client.get_all_dags()
assert len(all_dags) == 150
assert all_dags[0]["dag_id"] == "dag_0"
assert all_dags[-1]["dag_id"] == "dag_149"
assert "file_token" in all_dags[0]
assert "timetable_summary" in all_dags[0]
def test_special_character_handling(self, mock_airflow_responses):
"""Test handling of special characters in DAG IDs and names."""
special_dag_response = {
"dags": [
{
"dag_id": "etl-pipeline_with.special@chars",
"description": "ETL with special chars: <>\"'&",
"fileloc": "/opt/airflow/dags/special chars/dag file.py",
"file_token": "special_token_123",
"is_active": True,
"is_paused": False,
"owners": ["data-team"],
"tags": [{"name": "special-tag_with.chars"}],
"schedule_interval": {
"__type": "CronExpression",
"value": "@daily",
},
"timetable_summary": "At 00:00 every day",
"dataset_triggers": [],
"params": {},
}
],
"total_entries": 1,
}
config = _make_airflow_connection()
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
self._fake_rest(
mock_tracked_rest_cls,
[
mock_airflow_responses["version"], # _detect_api_version
special_dag_response, # _paginate page 1
{"tasks": []}, # build_dag_details → get_dag_tasks
],
)
airflow_client = AirflowApiClient(config)
dags = airflow_client.get_all_dags()
dag = dags[0]
assert dag["dag_id"] == "etl-pipeline_with.special@chars"
assert "special chars:" in dag["description"]
assert dag["tags"][0]["name"] == "special-tag_with.chars"
# Test DAG details building
dag_details = airflow_client.build_dag_details(dag)
assert dag_details.dag_id == "etl-pipeline_with.special@chars"
assert "special-tag_with.chars" in dag_details.tags
# Run specific test methods
if __name__ == "__main__":
pytest.main(
[
__file__
+ "::TestAirflowApiMockedIntegration::test_full_workflow_integration",
"-v",
]
)

View file

@ -0,0 +1,52 @@
"""
DAG that triggers OpenLineage events with inlets/outlets for lineage testing.
Uses Airflow 3.x native OpenLineage support.
"""
from datetime import datetime
from airflow import DAG
from airflow.lineage.entities import Table as LineageTable
from airflow.operators.bash import BashOperator
default_args = {
"owner": "test_owner",
"depends_on_past": False,
"retries": 0,
}
inlet_table = LineageTable(
cluster="default",
database="test_db",
name="source_table",
)
outlet_table = LineageTable(
cluster="default",
database="test_db",
name="target_table",
)
with DAG(
dag_id="lineage_etl",
default_args=default_args,
description="ETL pipeline with lineage for E2E testing",
schedule=None,
start_date=datetime(2024, 1, 1),
catchup=False,
tags=["e2e_test", "lineage"],
) as dag:
extract = BashOperator(
task_id="extract",
bash_command="echo extracting data from source",
inlets=[inlet_table],
)
transform = BashOperator(
task_id="transform",
bash_command="echo transforming data",
)
load = BashOperator(
task_id="load",
bash_command="echo loading data to target",
outlets=[outlet_table],
)
extract >> transform >> load

View file

@ -0,0 +1,40 @@
"""
DAG with OpenLineage Dataset inlets/outlets for E2E lineage testing.
When this DAG runs with the OL provider installed and transport configured,
Airflow 3.x emits COMPLETE events with these datasets as inputs/outputs.
The OM OpenLineage endpoint resolves them to existing sample_data tables.
"""
from datetime import datetime, timezone
from airflow.decorators import dag, task
from openlineage.client.event_v2 import Dataset
RAW_ORDER = Dataset(
namespace="sample_data",
name="ecommerce_db.shopify.raw_order",
)
FACT_ORDER = Dataset(
namespace="sample_data",
name="ecommerce_db.shopify.fact_order",
)
@dag(
dag_id="ol_lineage_etl",
description="ETL with OpenLineage inlets/outlets for E2E lineage testing",
schedule=None,
start_date=datetime(2024, 1, 1, tzinfo=timezone.utc),
catchup=False,
is_paused_upon_creation=True,
tags=["e2e_test", "openlineage", "lineage"],
)
def ol_lineage_etl():
@task(inlets=[RAW_ORDER], outlets=[FACT_ORDER])
def transform():
print("Transforming raw_order -> fact_order")
transform()
ol_lineage_etl()

View file

@ -0,0 +1,29 @@
"""
Sample branching DAG for AirflowApi connector E2E testing.
Tests that parallel task structures are captured correctly.
"""
from datetime import datetime
from airflow import DAG
from airflow.operators.bash import BashOperator
default_args = {
"owner": "test_owner",
"depends_on_past": False,
"retries": 0,
}
with DAG(
dag_id="sample_branching",
default_args=default_args,
description="Branching pipeline for E2E testing",
schedule=None,
start_date=datetime(2024, 1, 1),
catchup=False,
tags=["e2e_test"],
) as dag:
start = BashOperator(task_id="start", bash_command="echo start")
branch_a = BashOperator(task_id="branch_a", bash_command="echo branch_a")
branch_b = BashOperator(task_id="branch_b", bash_command="echo branch_b")
join = BashOperator(task_id="join", bash_command="echo join")
start >> [branch_a, branch_b] >> join

View file

@ -0,0 +1,28 @@
"""
Sample ETL DAG for AirflowApi connector E2E testing.
A simple 3-task DAG: extract -> transform -> load
"""
from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.bash import BashOperator
default_args = {
"owner": "test_owner",
"depends_on_past": False,
"retries": 0,
}
with DAG(
dag_id="sample_etl",
default_args=default_args,
description="Sample ETL pipeline for E2E testing",
schedule=timedelta(days=1),
start_date=datetime(2024, 1, 1),
catchup=False,
tags=["e2e_test", "etl"],
) as dag:
extract = BashOperator(task_id="extract", bash_command="echo extracting")
transform = BashOperator(task_id="transform", bash_command="echo transforming")
load = BashOperator(task_id="load", bash_command="echo loading")
extract >> transform >> load

View file

@ -0,0 +1,316 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Integration test: OpenLineage events OM lineage resolution.
Verifies that OL COMPLETE events with input/output datasets are resolved
to existing OM table entities and lineage edges are created.
Prerequisites:
- OM server running at localhost:8585
- Sample data ingested (tables exist in sample_data service)
- OpenLineage settings: enabled=true, eventTypeFilter includes COMPLETE
"""
import json
import uuid
import pytest
import requests
from metadata.generated.schema.entity.data.pipeline import Pipeline
from metadata.generated.schema.entity.data.table import Table
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
OpenMetadataConnection,
)
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
OpenMetadataJWTClientConfig,
)
from metadata.ingestion.ometa.ometa_api import OpenMetadata
OM_HOST = "http://localhost:8585"
OM_API = f"{OM_HOST}/api"
OM_JWT = (
"eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGci"
"OiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcm"
"ciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7"
"HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7"
"P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVK"
"wEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfd"
"QllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
)
OL_ENDPOINT = f"{OM_HOST}/api/v1/openlineage/lineage"
AUTH_HEADERS = {
"Authorization": f"Bearer {OM_JWT}",
"Content-Type": "application/json",
}
def _om_reachable() -> bool:
try:
return requests.get(f"{OM_API}/v1/system/version", timeout=5).status_code == 200
except Exception:
return False
def _sample_data_exists() -> bool:
try:
resp = requests.get(
f"{OM_API}/v1/tables/name/sample_data.ecommerce_db.shopify.raw_order",
headers=AUTH_HEADERS,
timeout=5,
)
return resp.status_code == 200
except Exception:
return False
pytestmark = [
pytest.mark.skipif(not _om_reachable(), reason="OM not running at localhost:8585"),
pytest.mark.skipif(
not _sample_data_exists(), reason="Sample data tables not ingested"
),
]
@pytest.fixture(scope="module")
def metadata():
meta = OpenMetadata(
OpenMetadataConnection(
hostPort=OM_API,
authProvider="openmetadata",
securityConfig=OpenMetadataJWTClientConfig(jwtToken=OM_JWT),
)
)
assert meta.health_check()
return meta
@pytest.fixture(scope="module")
def ensure_ol_settings():
"""Ensure OpenLineage settings allow COMPLETE events."""
resp = requests.put(
f"{OM_API}/v1/system/settings",
headers=AUTH_HEADERS,
json={
"config_type": "openLineageSettings",
"config_value": {
"enabled": True,
"autoCreateEntities": True,
"eventTypeFilter": ["COMPLETE"],
"defaultPipelineService": "openlineage",
},
},
timeout=10,
)
assert resp.status_code == 200, f"Failed to set OL settings: {resp.text}"
def _send_ol_event(
job_namespace: str,
job_name: str,
inputs: list,
outputs: list,
run_id: str = None,
) -> dict:
event = {
"eventType": "COMPLETE",
"eventTime": "2026-03-23T12:00:00Z",
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
"producer": "https://airflow.apache.org",
"run": {"runId": run_id or str(uuid.uuid4())},
"job": {"namespace": job_namespace, "name": job_name},
"inputs": inputs,
"outputs": outputs,
}
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
assert (
resp.status_code == 200
), f"OL endpoint returned {resp.status_code}: {resp.text}"
return resp.json()
class TestOpenLineageEndpointAcceptsEvents:
def test_accepts_complete_event(self, ensure_ol_settings):
result = _send_ol_event(
job_namespace="test",
job_name="test_job",
inputs=[],
outputs=[],
)
assert result["status"] == "success"
def test_rejects_without_schema_url(self):
event = {
"eventType": "COMPLETE",
"eventTime": "2026-03-23T12:00:00Z",
"producer": "test",
"run": {"runId": str(uuid.uuid4())},
"job": {"namespace": "test", "name": "test"},
"inputs": [],
"outputs": [],
}
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
assert resp.status_code == 400
class TestOpenLineageResolvesExistingTables:
"""Verify OL events with inputs/outputs matching existing sample_data tables
create lineage edges in OM."""
def test_creates_lineage_edge_for_known_tables(self, metadata, ensure_ol_settings):
"""Send an OL event referencing sample_data tables and verify lineage."""
src_fqn = "sample_data.ecommerce_db.shopify.raw_order"
tgt_fqn = "sample_data.ecommerce_db.shopify.fact_order"
# Verify tables exist
src = metadata.get_by_name(entity=Table, fqn=src_fqn)
tgt = metadata.get_by_name(entity=Table, fqn=tgt_fqn)
assert src is not None, f"Table {src_fqn} must exist"
assert tgt is not None, f"Table {tgt_fqn} must exist"
result = _send_ol_event(
job_namespace="airflow_e2e_lineage",
job_name="sample_transform",
inputs=[
{"namespace": "sample_data", "name": "ecommerce_db.shopify.raw_order"}
],
outputs=[
{"namespace": "sample_data", "name": "ecommerce_db.shopify.fact_order"}
],
)
assert (
result["lineageEdgesCreated"] > 0
), f"Expected lineage edges to be created, got: {json.dumps(result, indent=2)}"
def test_lineage_edge_has_openlineage_source(self, metadata, ensure_ol_settings):
"""Verify the created lineage edge has source=OpenLineage."""
src_fqn = "sample_data.ecommerce_db.shopify.raw_order"
lineage = metadata.get_lineage_by_name(
entity=Table, fqn=src_fqn, up_depth=0, down_depth=3
)
downstream = lineage.get("downstreamEdges", [])
ol_edges = [
e
for e in downstream
if e.get("lineageDetails", {}).get("source") == "OpenLineage"
]
assert len(ol_edges) > 0, (
f"Expected at least one OpenLineage-sourced edge from {src_fqn}, "
f"got sources: {[e.get('lineageDetails',{}).get('source') for e in downstream]}"
)
def test_lineage_references_existing_pipeline(self, metadata, ensure_ol_settings):
"""When an AirflowApi pipeline already exists, OL events should resolve
to it via the sample_airflow service (which has sample DAGs)."""
# sample_airflow service has pipeline "sample_airflow.dim_product_etl"
pipeline_fqn = "sample_airflow.dim_product_etl"
pipeline = metadata.get_by_name(entity=Pipeline, fqn=pipeline_fqn)
if not pipeline:
pytest.skip(f"Pipeline {pipeline_fqn} not in sample data")
# The OL event's job namespace/name won't auto-match to this pipeline.
# Instead, add lineage manually via API with source=OpenLineage to prove
# the lineage model supports it. This is what would happen when
# BigQuery/Spark operators emit OL events that the mapper resolves.
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
from metadata.generated.schema.type.entityLineage import (
EntitiesEdge,
LineageDetails,
)
from metadata.generated.schema.type.entityLineage import Source as LineageSource
from metadata.generated.schema.type.entityReference import EntityReference
src_fqn = "sample_data.ecommerce_db.shopify.raw_customer"
tgt_fqn = "sample_data.ecommerce_db.shopify.dim_address"
src = metadata.get_by_name(entity=Table, fqn=src_fqn)
tgt = metadata.get_by_name(entity=Table, fqn=tgt_fqn)
if not src or not tgt:
pytest.skip(f"Tables {src_fqn} or {tgt_fqn} not in sample data")
metadata.add_lineage(
AddLineageRequest(
edge=EntitiesEdge(
fromEntity=EntityReference(id=src.id.root, type="table"),
toEntity=EntityReference(id=tgt.id.root, type="table"),
lineageDetails=LineageDetails(
pipeline=EntityReference(id=pipeline.id.root, type="pipeline"),
source=LineageSource.OpenLineage,
),
)
)
)
lineage = metadata.get_lineage_by_name(
entity=Table, fqn=src_fqn, up_depth=0, down_depth=3
)
ol_edges = [
e
for e in lineage.get("downstreamEdges", [])
if e.get("lineageDetails", {}).get("source") == "OpenLineage"
and e.get("lineageDetails", {}).get("pipeline") is not None
]
assert len(ol_edges) > 0, "Expected OL edge with pipeline reference"
pipeline_ref = ol_edges[0]["lineageDetails"]["pipeline"]
assert pipeline_ref["type"] == "pipeline"
assert "dim_product_etl" in pipeline_ref.get("fullyQualifiedName", "")
def test_no_edges_for_nonexistent_tables(self, ensure_ol_settings):
"""OL events with unknown table names should create 0 edges."""
result = _send_ol_event(
job_namespace="test",
job_name="unknown_job",
inputs=[
{"namespace": "nonexistent_service", "name": "fake_schema.fake_table"}
],
outputs=[
{"namespace": "nonexistent_service", "name": "fake_schema.fake_output"}
],
)
assert result["lineageEdgesCreated"] == 0
def test_no_edges_for_empty_inputs_outputs(self, ensure_ol_settings):
"""OL events with no inputs/outputs should create 0 edges."""
result = _send_ol_event(
job_namespace="test",
job_name="empty_job",
inputs=[],
outputs=[],
)
assert result["lineageEdgesCreated"] == 0
class TestOpenLineageEventTypeFiltering:
def test_start_events_skipped_when_filter_is_complete(self, ensure_ol_settings):
"""START events should be skipped when filter only allows COMPLETE."""
event = {
"eventType": "START",
"eventTime": "2026-03-23T12:00:00Z",
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
"producer": "test",
"run": {"runId": str(uuid.uuid4())},
"job": {"namespace": "test", "name": "start_test"},
"inputs": [
{"namespace": "sample_data", "name": "ecommerce_db.shopify.raw_order"}
],
"outputs": [
{"namespace": "sample_data", "name": "ecommerce_db.shopify.fact_order"}
],
}
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
result = resp.json()
assert (
result["lineageEdgesCreated"] == 0
), "START events should not create edges"

View file

@ -0,0 +1,562 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Tests for Airflow REST API authentication methods.
These tests verify every auth path in auth.py and the AirflowApiClient constructor:
- AccessToken : static bearer token, no refresh
- BasicAuth : Airflow 3.x JWT exchange (success) and Basic auth fallback
- GcpCredentials : all 4 GCP credential types + service account impersonation
- Token refresh : GCP callback is called on every invocation (google-auth
manages expiry internally; REST client calls callback when
its own expires_in check triggers)
"""
import base64
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
import pytest
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
from metadata.generated.schema.entity.utils.common.basicAuthConfig import BasicAuth
from metadata.generated.schema.entity.utils.common.gcpCredentialsConfig import (
GcpServiceAccount,
)
from metadata.ingestion.source.pipeline.airflow.api.auth import (
_BASIC_AUTH_TTL_SECONDS,
_JWT_REFRESH_INTERVAL_SECONDS,
build_access_token_callback,
build_basic_auth_callback,
build_gcp_token_callback,
try_exchange_jwt,
)
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
# ── Helpers ─────────────────────────────────────────────────────────────────
def _make_config(auth_variant):
"""
Build a minimal AirflowConnection config mock for AirflowApiClient.
auth_variant is a real typed instance (AccessToken, BasicAuth,
GcpCredentialsConfig) or a plain MagicMock for the unknown-type test.
"""
rest_config = MagicMock()
rest_config.authConfig = auth_variant
rest_config.apiVersion = MagicMock()
rest_config.apiVersion.value = "v1"
rest_config.verifySSL = True
config = MagicMock()
config.hostPort = "http://airflow.example.com:8080"
config.connection = rest_config
return config
# ── try_exchange_jwt ─────────────────────────────────────────────────────────
class TestTryExchangeJwt:
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
def test_returns_access_token_on_success(self, mock_post):
mock_response = MagicMock()
mock_response.json.return_value = {"access_token": "jwt_abc123"}
mock_response.raise_for_status = MagicMock()
mock_post.return_value = mock_response
result = try_exchange_jwt(
"http://airflow.example.com:8080", "admin", "password", True
)
assert result == "jwt_abc123"
mock_post.assert_called_once_with(
"http://airflow.example.com:8080/auth/token",
json={"username": "admin", "password": "password"},
timeout=10,
verify=True,
)
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
def test_returns_none_when_http_error(self, mock_post):
from requests.exceptions import HTTPError
mock_response = MagicMock()
mock_response.raise_for_status.side_effect = HTTPError("401")
mock_post.return_value = mock_response
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", True)
assert result is None
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
def test_returns_none_on_connection_error(self, mock_post):
mock_post.side_effect = Exception("Connection refused")
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", False)
assert result is None
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
def test_returns_none_when_token_missing_from_response(self, mock_post):
mock_response = MagicMock()
mock_response.json.return_value = {"detail": "no token here"}
mock_response.raise_for_status = MagicMock()
mock_post.return_value = mock_response
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", True)
assert result is None
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
def test_passes_verify_ssl_false(self, mock_post):
mock_response = MagicMock()
mock_response.json.return_value = {"access_token": "tok"}
mock_response.raise_for_status = MagicMock()
mock_post.return_value = mock_response
try_exchange_jwt("http://airflow.example.com:8080", "u", "p", False)
assert mock_post.call_args.kwargs["verify"] is False
# ── build_access_token_callback ──────────────────────────────────────────────
class TestBuildAccessTokenCallback:
def test_returns_static_token(self):
cb = build_access_token_callback("my_static_token")
token, expiry = cb()
assert token == "my_static_token"
def test_expiry_is_zero(self):
cb = build_access_token_callback("tok")
_, expiry = cb()
assert expiry == 0
def test_callback_is_idempotent(self):
cb = build_access_token_callback("tok")
assert cb() == cb()
def test_different_tokens_produce_different_callbacks(self):
cb1 = build_access_token_callback("token_a")
cb2 = build_access_token_callback("token_b")
assert cb1()[0] == "token_a"
assert cb2()[0] == "token_b"
# ── build_basic_auth_callback ────────────────────────────────────────────────
class TestBuildBasicAuthCallback:
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value="jwt_token_xyz",
)
def test_jwt_success_returns_bearer_mode(self, _mock_jwt):
cb, mode = build_basic_auth_callback(
"http://airflow.example.com:8080", "admin", "pass", True
)
assert mode is None
token, expiry = cb()
assert token == "Bearer jwt_token_xyz"
assert expiry == _JWT_REFRESH_INTERVAL_SECONDS
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value=None,
)
def test_jwt_failure_falls_back_to_basic(self, _mock_jwt):
cb, mode = build_basic_auth_callback(
"http://airflow.example.com:8080", "admin", "secret", True
)
assert mode is None
token, expiry = cb()
expected_b64 = base64.b64encode(b"admin:secret").decode()
assert token == f"Basic {expected_b64}"
assert expiry == _BASIC_AUTH_TTL_SECONDS
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value=None,
)
def test_basic_token_encodes_colon_in_password_correctly(self, _mock_jwt):
cb, mode = build_basic_auth_callback("http://h", "user", "pass:word", True)
token, _ = cb()
assert token.startswith("Basic ")
decoded = base64.b64decode(token[len("Basic ") :]).decode()
assert decoded == "user:pass:word"
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value=None,
)
def test_passes_host_and_credentials_to_jwt_exchange(self, mock_jwt):
cb, _ = build_basic_auth_callback("http://my.airflow.com", "alice", "pw", False)
cb()
mock_jwt.assert_called_once_with("http://my.airflow.com", "alice", "pw", False)
# ── build_gcp_token_callback ─────────────────────────────────────────────────
class TestBuildGcpTokenCallback:
def _make_gcp_credentials(self, impersonate=None):
creds = MagicMock()
creds.gcpImpersonateServiceAccount = impersonate
return creds
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_set_google_credentials_called_on_build(self, mock_set):
gcp_creds = self._make_gcp_credentials()
build_gcp_token_callback(gcp_creds)
mock_set.assert_called_once_with(gcp_creds)
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_callback_returns_token_and_expiry(self, _mock_set, mock_default):
expiry = datetime.now(timezone.utc) + timedelta(hours=1)
mock_creds = MagicMock(token="gcp_access_token", expiry=expiry)
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials()
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
token, returned_expiry = cb()
assert token == "gcp_access_token"
assert returned_expiry == expiry
mock_creds.refresh.assert_called_once()
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_fallback_expiry_when_credentials_have_no_expiry(
self, _mock_set, mock_default
):
mock_creds = MagicMock(token="tok")
mock_creds.expiry = None
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials()
cb = build_gcp_token_callback(gcp_creds)
before = datetime.now(timezone.utc) + timedelta(minutes=54)
with patch("google.auth.transport.requests.Request"):
_, expiry = cb()
after = datetime.now(timezone.utc) + timedelta(minutes=56)
assert before < expiry < after
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.get_gcp_impersonate_credentials"
)
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_impersonation_uses_impersonate_credentials(
self, _mock_set, mock_impersonate
):
impersonate = MagicMock()
impersonate.impersonateServiceAccount = "svc@project.iam.gserviceaccount.com"
impersonate.lifetime = 3600
mock_impersonated = MagicMock(
token="impersonated_token",
expiry=datetime.now(timezone.utc) + timedelta(hours=1),
)
mock_impersonate.return_value = mock_impersonated
gcp_creds = self._make_gcp_credentials(impersonate=impersonate)
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
token, _ = cb()
assert token == "impersonated_token"
mock_impersonate.assert_called_once_with(
impersonate_service_account="svc@project.iam.gserviceaccount.com",
scopes=["https://www.googleapis.com/auth/cloud-platform"],
lifetime=3600,
)
mock_impersonated.refresh.assert_called_once()
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.get_gcp_impersonate_credentials"
)
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_no_impersonation_when_field_is_none(
self, _mock_set, mock_default, mock_impersonate
):
mock_creds = MagicMock(token="tok", expiry=None)
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials(impersonate=None)
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
cb()
mock_impersonate.assert_not_called()
mock_default.assert_called_once()
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_callback_calls_refresh_on_every_invocation(self, _mock_set, mock_default):
mock_creds = MagicMock(
token="tok",
expiry=datetime.now(timezone.utc) + timedelta(hours=1),
)
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials()
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
cb()
cb()
cb()
assert mock_creds.refresh.call_count == 3
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_scopes_include_cloud_platform(self, _mock_set, mock_default):
mock_creds = MagicMock(token="tok", expiry=None)
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials()
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
cb()
mock_default.assert_called_once_with(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_expiry_returned_from_credentials(self, _mock_set, mock_default):
future = datetime(2030, 1, 1, tzinfo=timezone.utc)
mock_creds = MagicMock(token="tok", expiry=future)
mock_default.return_value = (mock_creds, "project")
gcp_creds = self._make_gcp_credentials()
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
_, expiry = cb()
assert expiry == future
# ── GCP credential type coverage ─────────────────────────────────────────────
class TestGcpCredentialTypeCoverage:
"""
Verify that set_google_credentials is called (and the token callback works)
for each of the 4 GCP credential types. The actual credential handling is in
credentials.py; here we confirm build_gcp_token_callback wires through to it.
"""
@pytest.mark.parametrize(
"gcp_config_type_name",
[
"GcpCredentialsValues",
"GcpCredentialsPath",
"GcpExternalAccount",
"GcpADC",
],
)
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_set_google_credentials_called_for_all_types(
self, mock_set, mock_default, gcp_config_type_name
):
mock_creds = MagicMock(token="tok", expiry=None)
mock_default.return_value = (mock_creds, "project")
gcp_credentials = MagicMock()
gcp_credentials.gcpImpersonateServiceAccount = None
cb = build_gcp_token_callback(gcp_credentials)
mock_set.assert_called_once_with(gcp_credentials)
with patch("google.auth.transport.requests.Request"):
token, _ = cb()
assert token == "tok"
# ── AirflowApiClient constructor (e2e) ────────────────────────────────────────
class TestAirflowApiClientAuthConfig:
"""
End-to-end tests for AirflowApiClient.__init__. TrackedREST is patched so
no network calls are made; we inspect the ClientConfig passed to it.
auth_variant instances are real Pydantic models isinstance() checks in
client.py dispatch correctly without any authType discriminator field.
"""
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_access_token_sets_bearer_mode_and_static_token(self, mock_rest_cls):
variant = AccessToken(token="static_token_value")
config = _make_config(variant)
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.auth_header == "Authorization"
assert client_config.auth_token_mode == "Bearer"
token, expiry = client_config.auth_token()
assert token == "static_token_value"
assert expiry == 0
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value="jwt_from_airflow3",
)
def test_basic_auth_with_jwt_exchange_sets_bearer(self, _mock_jwt, mock_rest_cls):
variant = BasicAuth(username="admin", password="secret")
config = _make_config(variant)
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.auth_header == "Authorization"
assert client_config.auth_token_mode is None
token, _ = client_config.auth_token()
assert token == "Bearer jwt_from_airflow3"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
@patch(
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
return_value=None,
)
def test_basic_auth_without_jwt_falls_back_to_basic_mode(
self, _mock_jwt, mock_rest_cls
):
variant = BasicAuth(username="admin", password="secret")
config = _make_config(variant)
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.auth_header == "Authorization"
assert client_config.auth_token_mode is None
token, _ = client_config.auth_token()
expected = base64.b64encode(b"admin:secret").decode()
assert token == f"Basic {expected}"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
@patch("google.auth.default")
def test_gcp_credentials_sets_bearer_with_live_callback(
self, mock_default, _mock_set, mock_rest_cls
):
expiry = datetime.now(timezone.utc) + timedelta(hours=1)
mock_creds = MagicMock(token="gcp_tok", expiry=expiry)
mock_default.return_value = (mock_creds, "project")
gcp_credentials_mock = MagicMock()
gcp_credentials_mock.gcpImpersonateServiceAccount = None
variant = GcpServiceAccount.model_construct(credentials=gcp_credentials_mock)
config = _make_config(variant)
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.auth_header == "Authorization"
assert client_config.auth_token_mode == "Bearer"
with patch("google.auth.transport.requests.Request"):
token, returned_expiry = client_config.auth_token()
assert token == "gcp_tok"
assert returned_expiry == expiry
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_unknown_auth_type_sets_no_auth_header(self, mock_rest_cls):
config = _make_config(MagicMock())
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.auth_header is None
assert client_config.auth_token is None
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_base_url_uses_host_port(self, mock_rest_cls):
variant = AccessToken(token="tok")
config = _make_config(variant)
config.hostPort = "https://my-composer.example.com:443"
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert "my-composer.example.com" in client_config.base_url
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_verify_ssl_false_passed_to_client(self, mock_rest_cls):
variant = AccessToken(token="tok")
config = _make_config(variant)
config.connection.verifySSL = False
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.verify is False
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_api_version_is_api(self, mock_rest_cls):
variant = AccessToken(token="tok")
config = _make_config(variant)
AirflowApiClient(config)
client_config = mock_rest_cls.call_args[0][0]
assert client_config.api_version == "api"
# ── GCP token refresh integration ────────────────────────────────────────────
class TestGcpTokenRefreshIntegration:
"""
Verify that repeated callback calls each refresh credentials independently.
This mirrors how REST._request() calls auth_token() each time expires_in passes.
"""
@patch("google.auth.default")
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
def test_each_callback_call_refreshes_credentials(self, _mock_set, mock_default):
call_count = {"n": 0}
tokens = ["token_v1", "token_v2", "token_v3"]
def make_mock_creds():
m = MagicMock()
m.expiry = datetime.now(timezone.utc) + timedelta(hours=1)
def do_refresh(_req):
call_count["n"] += 1
m.refresh.side_effect = do_refresh
type(m).token = property(
lambda self: tokens[min(call_count["n"] - 1, len(tokens) - 1)]
)
return m
mock_creds = make_mock_creds()
mock_default.return_value = (mock_creds, "project")
gcp_creds = MagicMock()
gcp_creds.gcpImpersonateServiceAccount = None
cb = build_gcp_token_callback(gcp_creds)
with patch("google.auth.transport.requests.Request"):
t1, _ = cb()
t2, _ = cb()
t3, _ = cb()
assert mock_creds.refresh.call_count == 3
assert t1 == "token_v1"
assert t2 == "token_v2"
assert t3 == "token_v3"

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,713 @@
# Copyright 2025 Collate
# Licensed under the Collate Community License, Version 1.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Tests for AirflowApi pipeline connector
"""
from datetime import datetime
from unittest.mock import MagicMock, patch
import pytest
from requests.exceptions import ConnectionError as RequestsConnectionError
from requests.exceptions import HTTPError
from metadata.generated.schema.entity.data.pipeline import PipelineState, StatusType
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
from metadata.ingestion.source.pipeline.airflow.api.models import (
AirflowApiDagDetails,
AirflowApiDagRun,
AirflowApiTask,
AirflowApiTaskInstance,
)
from metadata.ingestion.source.pipeline.airflow.api.source import (
STATUS_MAP,
AirflowApiSource,
)
from metadata.utils.helpers import datetime_to_ts
# ── Shared Helpers ───────────────────────────────────────────────────────
def _make_client(mock_rest_cls, api_version="v1"):
"""Create an AirflowApiClient with mocked TrackedREST using AccessToken auth."""
mock_rest_cls.return_value = MagicMock()
auth_config = AccessToken(token="test_token")
rest_config = MagicMock()
rest_config.authConfig = auth_config
rest_config.apiVersion = MagicMock()
rest_config.apiVersion.value = api_version
rest_config.verifySSL = True
config = MagicMock()
config.hostPort = "http://localhost:8080"
config.connection = rest_config
client = AirflowApiClient(config)
return client, mock_rest_cls.return_value
def _make_source_and_dag(task_names=None):
"""Create a mocked AirflowApiSource and a minimal DAG for status/pipeline tests."""
source = MagicMock()
source.service_connection = MagicMock()
source.service_connection.numberOfStatus = 5
source.service_connection.hostPort = "http://airflow.example.com:8080"
context = MagicMock()
context.pipeline_service = "test_service"
context.pipeline = "test_dag"
context.task_names = task_names or {"task_1"}
source.context.get.return_value = context
source.connection = MagicMock()
source.connection.api_version = "v1"
source.metadata = MagicMock()
source.source_config = MagicMock()
source.source_config.includeTags = True
source._get_dag_source_url = (
lambda dag_id: f"http://airflow.example.com:8080/dags/{dag_id}/grid"
)
source._get_task_source_url = lambda dag_id, task_id: (
f"http://airflow.example.com:8080/taskinstance/list/"
f"?_flt_3_dag_id={dag_id}&_flt_3_task_id={task_id}"
)
source._build_tasks = lambda details: AirflowApiSource._build_tasks(source, details)
source.register_record = MagicMock()
source.get_pipeline_state = lambda details: (
(PipelineState.Inactive if details.is_paused else PipelineState.Active)
if details.is_paused is not None
else None
)
dag = AirflowApiDagDetails(
dag_id="test_dag",
description="A test pipeline",
is_paused=False,
tags=["team:data"],
schedule_interval="@daily",
tasks=[
AirflowApiTask(
task_id="task_1",
downstream_task_ids=["task_2"],
class_ref={"class_name": "PythonOperator"},
doc_md="Task 1 docs",
),
AirflowApiTask(task_id="task_2"),
],
)
return source, dag
# ── Status Mapping ───────────────────────────────────────────────────────
class TestStatusMapping:
def test_success_maps_to_successful(self):
assert STATUS_MAP["success"] == StatusType.Successful.value
def test_failed_maps_to_failed(self):
assert STATUS_MAP["failed"] == StatusType.Failed.value
def test_queued_maps_to_pending(self):
assert STATUS_MAP["queued"] == StatusType.Pending.value
def test_skipped_maps_to_skipped(self):
assert STATUS_MAP["skipped"] == StatusType.Skipped.value
def test_running_maps_to_pending(self):
assert STATUS_MAP["running"] == StatusType.Pending.value
def test_upstream_failed_maps_to_failed(self):
assert STATUS_MAP["upstream_failed"] == StatusType.Failed.value
def test_unknown_state_defaults(self):
assert (
STATUS_MAP.get("nonexistent", StatusType.Pending.value)
== StatusType.Pending.value
)
# ── Models ───────────────────────────────────────────────────────────────
class TestModels:
def test_dag_details_minimal(self):
dag = AirflowApiDagDetails(dag_id="test_dag")
assert dag.dag_id == "test_dag"
assert dag.tasks == []
assert dag.tags is None
def test_dag_details_with_tasks(self):
dag = AirflowApiDagDetails(
dag_id="test_dag",
description="A test dag",
is_paused=False,
tasks=[
AirflowApiTask(
task_id="task_1",
downstream_task_ids=["task_2"],
class_ref={"class_name": "BashOperator"},
),
AirflowApiTask(task_id="task_2"),
],
)
assert len(dag.tasks) == 2
assert dag.tasks[0].downstream_task_ids == ["task_2"]
assert dag.tasks[0].class_ref["class_name"] == "BashOperator"
def test_dag_run(self):
run = AirflowApiDagRun(
dag_run_id="manual__2024-01-01",
state="success",
)
assert run.dag_run_id == "manual__2024-01-01"
assert run.state == "success"
def test_task_instance(self):
ti = AirflowApiTaskInstance(
task_id="task_1",
state="success",
)
assert ti.task_id == "task_1"
assert ti.state == "success"
# ── Client: API Version Detection ────────────────────────────────────────
class TestClientApiVersionDetection:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_auto_detect_v2(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
mock_rest.get.return_value = {"version": "3.0.0"}
assert client.api_version == "v2"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_auto_detect_v1_fallback(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
def side_effect(path):
if "/v2/" in path:
raise Exception("Not found")
return {"version": "2.9.0"}
mock_rest.get.side_effect = side_effect
assert client.api_version == "v1"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_explicit_version(self, mock_rest_cls):
client, _ = _make_client(mock_rest_cls, api_version="v1")
assert client.api_version == "v1"
# ── Client: Build DAG Details ────────────────────────────────────────────
class TestClientBuildDagDetails:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_build_dag_details_normalizes_tags(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {"tasks": []}
dag_data = {
"dag_id": "test_dag",
"tags": [{"name": "team:data"}, {"name": "env:prod"}],
"owners": ["admin"],
}
result = client.build_dag_details(dag_data)
assert result.tags == ["team:data", "env:prod"]
assert result.owners == ["admin"]
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_build_dag_details_with_tasks(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {
"tasks": [
{
"task_id": "extract",
"downstream_task_ids": ["transform"],
"class_ref": {
"class_name": "PythonOperator",
"module_path": "airflow.operators.python",
},
},
{
"task_id": "transform",
"downstream_task_ids": [],
"class_ref": {
"class_name": "BashOperator",
"module_path": "airflow.operators.bash",
},
},
]
}
dag_data = {"dag_id": "etl_pipeline", "tags": [], "owners": []}
result = client.build_dag_details(dag_data)
assert len(result.tasks) == 2
assert result.tasks[0].task_id == "extract"
assert result.tasks[0].downstream_task_ids == ["transform"]
assert result.tasks[0].class_ref["class_name"] == "PythonOperator"
# ── Client: Date Field ───────────────────────────────────────────────────
class TestClientDateField:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_v1_uses_execution_date(self, mock_rest_cls):
client, _ = _make_client(mock_rest_cls, api_version="v1")
assert client._date_field == "execution_date"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_v2_uses_logical_date(self, mock_rest_cls):
client, _ = _make_client(mock_rest_cls, api_version="v2")
assert client._date_field == "logical_date"
# ── Source URL Generation ────────────────────────────────────────────────
class TestSourceUrlGeneration:
def _make_source(self, api_version: str):
source = MagicMock()
source.service_connection = MagicMock()
source.service_connection.hostPort = "http://airflow.example.com:8080"
source.connection = MagicMock()
source.connection.api_version = api_version
return source
def test_v2_dag_url(self):
source = self._make_source("v2")
url = AirflowApiSource._get_dag_source_url(source, "my_dag")
assert url == "http://airflow.example.com:8080/dags/my_dag"
def test_v1_dag_url(self):
source = self._make_source("v1")
url = AirflowApiSource._get_dag_source_url(source, "my_dag")
assert url == "http://airflow.example.com:8080/dags/my_dag/grid"
def test_v2_task_url(self):
source = self._make_source("v2")
url = AirflowApiSource._get_task_source_url(source, "my_dag", "my_task")
assert url == "http://airflow.example.com:8080/dags/my_dag/tasks/my_task"
def test_v1_task_url(self):
source = self._make_source("v1")
url = AirflowApiSource._get_task_source_url(source, "my_dag", "my_task")
assert "taskinstance/list" in url
assert "_flt_3_dag_id=my_dag" in url
assert "_flt_3_task_id=my_task" in url
# ── Pagination: DAGs ─────────────────────────────────────────────────────
class TestPaginateGetAllDags:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_single_page(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {
"dags": [{"dag_id": "a"}, {"dag_id": "b"}],
"total_entries": 2,
}
result = client.get_all_dags()
assert len(result) == 2
assert result[0]["dag_id"] == "a"
assert mock_rest.get.call_count == 1
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_multiple_pages(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
page1 = {
"dags": [{"dag_id": f"dag_{i}"} for i in range(100)],
"total_entries": 250,
}
page2 = {
"dags": [{"dag_id": f"dag_{i}"} for i in range(100, 200)],
"total_entries": 250,
}
page3 = {
"dags": [{"dag_id": f"dag_{i}"} for i in range(200, 250)],
"total_entries": 250,
}
mock_rest.get.side_effect = [page1, page2, page3]
result = client.get_all_dags()
assert len(result) == 250
assert result[0]["dag_id"] == "dag_0"
assert result[-1]["dag_id"] == "dag_249"
assert mock_rest.get.call_count == 3
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_empty_response(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {"dags": [], "total_entries": 0}
result = client.get_all_dags()
assert result == []
# ── Pagination: Task Instances ───────────────────────────────────────────
class TestPaginateTaskInstances:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_single_page_task_instances(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {
"task_instances": [
{"task_id": "t1", "state": "success"},
{"task_id": "t2", "state": "failed"},
],
"total_entries": 2,
}
result = client.get_task_instances_for_run("dag1", "run1")
assert len(result) == 2
assert result[0].task_id == "t1"
assert result[0].state == "success"
assert result[1].task_id == "t2"
assert result[1].state == "failed"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_multi_page_task_instances(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
page1 = {
"task_instances": [
{"task_id": f"t_{i}", "state": "success"} for i in range(100)
],
"total_entries": 150,
}
page2 = {
"task_instances": [
{"task_id": f"t_{i}", "state": "success"} for i in range(100, 150)
],
"total_entries": 150,
}
mock_rest.get.side_effect = [page1, page2]
result = client.get_task_instances_for_run("big_dag", "run1")
assert len(result) == 150
assert result[0].task_id == "t_0"
assert result[-1].task_id == "t_149"
assert mock_rest.get.call_count == 2
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_task_instances_api_error_returns_empty(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.side_effect = Exception("Connection refused")
result = client.get_task_instances_for_run("dag1", "run1")
assert result == []
# ── Auth & Connectivity Error Propagation ────────────────────────────────
class TestAuthErrorPropagation:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_401_is_raised_during_version_detection(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
response = MagicMock()
response.status_code = 401
mock_rest.get.side_effect = HTTPError(response=response)
with pytest.raises(HTTPError):
client._detect_api_version()
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_403_is_raised_during_version_detection(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
response = MagicMock()
response.status_code = 403
mock_rest.get.side_effect = HTTPError(response=response)
with pytest.raises(HTTPError):
client._detect_api_version()
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_404_falls_through_to_next_version(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
response_404 = MagicMock()
response_404.status_code = 404
def side_effect(path):
if "/v2/" in path:
raise HTTPError(response=response_404)
return {"version": "2.9.0"}
mock_rest.get.side_effect = side_effect
assert client._detect_api_version() == "v1"
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_connection_error_is_raised(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
mock_rest.get.side_effect = RequestsConnectionError("Connection refused")
with pytest.raises(RequestsConnectionError):
client._detect_api_version()
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_timeout_error_is_raised(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
mock_rest.get.side_effect = TimeoutError("timed out")
with pytest.raises(TimeoutError):
client._detect_api_version()
# ── Tag Edge Cases ───────────────────────────────────────────────────────
class TestBuildDagDetailsTagEdgeCases:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_empty_tag_names_are_filtered(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {"tasks": []}
dag_data = {
"dag_id": "test_dag",
"tags": [{"name": ""}, {"name": "valid_tag"}, {"name": None}],
}
result = client.build_dag_details(dag_data)
assert result.tags == ["valid_tag"]
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_non_string_non_dict_tags_are_skipped(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {"tasks": []}
dag_data = {
"dag_id": "test_dag",
"tags": [123, None, {"name": "good"}, True],
}
result = client.build_dag_details(dag_data)
assert result.tags == ["good"]
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_string_tags_are_kept(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {"tasks": []}
dag_data = {
"dag_id": "test_dag",
"tags": ["simple_string_tag", {"name": "dict_tag"}],
}
result = client.build_dag_details(dag_data)
assert result.tags == ["simple_string_tag", "dict_tag"]
# ── Pipeline Status: Timestamp Fallback ──────────────────────────────────
class TestPipelineStatusTimestampFallback:
def test_uses_execution_date_when_available(self):
source, dag = _make_source_and_dag()
exec_dt = datetime(2025, 1, 15, 12, 0)
start_dt = datetime(2025, 1, 15, 12, 5)
source.connection.get_dag_runs.return_value = [
AirflowApiDagRun(
dag_run_id="run_1",
state="success",
execution_date=exec_dt,
start_date=start_dt,
),
]
source.connection.get_task_instances_for_run.return_value = []
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
assert len(results) == 1
status = results[0].right.pipeline_status
expected_ts = datetime_to_ts(exec_dt)
assert status.timestamp.root == expected_ts
def test_falls_back_to_start_date(self):
source, dag = _make_source_and_dag()
start_dt = datetime(2025, 1, 15, 12, 5)
source.connection.get_dag_runs.return_value = [
AirflowApiDagRun(
dag_run_id="run_1",
state="success",
execution_date=None,
start_date=start_dt,
),
]
source.connection.get_task_instances_for_run.return_value = []
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
assert len(results) == 1
status = results[0].right.pipeline_status
expected_ts = datetime_to_ts(start_dt)
assert status.timestamp.root == expected_ts
def test_falls_back_to_end_date(self):
source, dag = _make_source_and_dag()
end_dt = datetime(2025, 1, 15, 12, 10)
source.connection.get_dag_runs.return_value = [
AirflowApiDagRun(
dag_run_id="run_1",
state="success",
execution_date=None,
start_date=None,
end_date=end_dt,
),
]
source.connection.get_task_instances_for_run.return_value = []
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
assert len(results) == 1
status = results[0].right.pipeline_status
expected_ts = datetime_to_ts(end_dt)
assert status.timestamp.root == expected_ts
def test_skips_run_with_no_timestamp(self):
source, dag = _make_source_and_dag()
source.connection.get_dag_runs.return_value = [
AirflowApiDagRun(
dag_run_id="run_no_ts",
state="success",
execution_date=None,
start_date=None,
end_date=None,
),
]
source.connection.get_task_instances_for_run.return_value = []
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
assert len(results) == 0
# ── Pipeline State ───────────────────────────────────────────────────────
class TestGetPipelineState:
def test_paused_returns_inactive(self):
source, _ = _make_source_and_dag()
dag = AirflowApiDagDetails(dag_id="test", is_paused=True)
result = AirflowApiSource.get_pipeline_state(source, dag)
assert result == PipelineState.Inactive
def test_not_paused_returns_active(self):
source, _ = _make_source_and_dag()
dag = AirflowApiDagDetails(dag_id="test", is_paused=False)
result = AirflowApiSource.get_pipeline_state(source, dag)
assert result == PipelineState.Active
def test_none_paused_returns_none(self):
source, _ = _make_source_and_dag()
dag = AirflowApiDagDetails(dag_id="test", is_paused=None)
result = AirflowApiSource.get_pipeline_state(source, dag)
assert result is None
# ── Build Tasks ──────────────────────────────────────────────────────────
class TestBuildTasks:
def test_builds_tasks_with_all_fields(self):
source, dag = _make_source_and_dag()
tasks = AirflowApiSource._build_tasks(source, dag)
assert len(tasks) == 2
t1 = tasks[0]
assert t1.name == "task_1"
assert t1.downstreamTasks == ["task_2"]
assert t1.taskType == "PythonOperator"
assert t1.description is not None
assert "Task 1 docs" in t1.description.root
def test_builds_tasks_with_none_class_ref(self):
source, _ = _make_source_and_dag()
dag = AirflowApiDagDetails(
dag_id="test",
tasks=[AirflowApiTask(task_id="t1", class_ref=None)],
)
tasks = AirflowApiSource._build_tasks(source, dag)
assert len(tasks) == 1
assert tasks[0].taskType is None
def test_builds_tasks_empty(self):
source, _ = _make_source_and_dag()
dag = AirflowApiDagDetails(dag_id="test", tasks=[])
tasks = AirflowApiSource._build_tasks(source, dag)
assert tasks == []
# ── Yield Pipeline ───────────────────────────────────────────────────────
class TestYieldPipeline:
@patch(
"metadata.ingestion.source.pipeline.airflow.api.source.get_tag_labels",
return_value=[],
)
def test_yields_create_pipeline_request(self, _mock_tags):
source, dag = _make_source_and_dag()
results = list(AirflowApiSource.yield_pipeline(source, dag))
assert len(results) == 1
request = results[0].right
assert request.name.root == "test_dag"
assert request.description.root == "A test pipeline"
assert request.scheduleInterval == "@daily"
assert len(request.tasks) == 2
assert request.tasks[0].name == "task_1"
@patch(
"metadata.ingestion.source.pipeline.airflow.api.source.get_tag_labels",
return_value=[],
)
def test_yields_error_on_exception(self, _mock_tags):
source, dag = _make_source_and_dag()
# Break the service name to trigger a validation error
source.context.get.return_value.pipeline_service = None
results = list(AirflowApiSource.yield_pipeline(source, dag))
assert len(results) == 1
assert results[0].left is not None
assert "test_dag" in results[0].left.name
# ── Client: DAG Runs Parsing ─────────────────────────────────────────────
class TestClientGetDagRuns:
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_parses_dag_runs_with_logical_date(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.return_value = {
"dag_runs": [
{
"dag_run_id": "run_1",
"state": "success",
"logical_date": "2025-01-15T12:00:00+00:00",
"start_date": "2025-01-15T12:01:00+00:00",
"end_date": "2025-01-15T12:05:00+00:00",
}
]
}
runs = client.get_dag_runs("my_dag", limit=5)
assert len(runs) == 1
assert runs[0].dag_run_id == "run_1"
assert runs[0].state == "success"
assert runs[0].execution_date is not None
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
def test_returns_empty_on_api_error(self, mock_rest_cls):
client, mock_rest = _make_client(mock_rest_cls)
mock_rest.get.side_effect = Exception("API down")
runs = client.get_dag_runs("my_dag")
assert runs == []

View file

@ -0,0 +1,249 @@
/*
* Copyright 2021 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.it.tests;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.time.Instant;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Order;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;
import org.junit.jupiter.api.extension.ExtendWith;
import org.openmetadata.it.util.SdkClients;
import org.openmetadata.it.util.TestNamespace;
import org.openmetadata.it.util.TestNamespaceExtension;
import org.openmetadata.schema.entity.data.Database;
import org.openmetadata.schema.entity.data.DatabaseSchema;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.entity.services.DatabaseService;
import org.openmetadata.schema.type.Column;
import org.openmetadata.schema.type.ColumnDataType;
import org.openmetadata.sdk.fluent.DatabaseSchemas;
import org.openmetadata.sdk.fluent.DatabaseServices;
import org.openmetadata.sdk.fluent.Databases;
import org.openmetadata.sdk.fluent.LineageAPI;
import org.openmetadata.sdk.fluent.OpenLineage;
import org.openmetadata.sdk.fluent.Tables;
import org.openmetadata.sdk.fluent.wrappers.FluentTable;
/**
* Integration tests for OpenLineage lineage resolution.
*
* <p>Verifies that OL COMPLETE events with input/output datasets are resolved to existing OM table
* entities and lineage edges are created with source=OpenLineage.
*
* <p>Creates its own test entities (service, database, schema, tables) to avoid depending on sample
* data being loaded externally.
*/
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@ExtendWith(TestNamespaceExtension.class)
public class OpenLineageLineageResolutionIT {
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final List<Column> DEFAULT_COLUMNS =
List.of(
new Column().withName("id").withDataType(ColumnDataType.BIGINT),
new Column().withName("name").withDataType(ColumnDataType.VARCHAR).withDataLength(255));
private static String srcFqn;
private static String tgtFqn;
private static String serviceName;
private static String schemaFqn;
@BeforeAll
static void setup() {
OpenLineage.setDefaultClient(SdkClients.adminClient());
Tables.setDefaultClient(SdkClients.adminClient());
LineageAPI.setDefaultClient(SdkClients.adminClient());
DatabaseServices.setDefaultClient(SdkClients.adminClient());
Databases.setDefaultClient(SdkClients.adminClient());
DatabaseSchemas.setDefaultClient(SdkClients.adminClient());
String uniqueId = UUID.randomUUID().toString().substring(0, 8);
serviceName = "ol_test_svc_" + uniqueId;
DatabaseService service =
DatabaseServices.builder()
.name(serviceName)
.connection(
DatabaseServices.postgresConnection()
.hostPort("localhost:5432")
.username("test")
.build())
.description("Test service for OpenLineage resolution tests")
.create();
Database db =
Databases.create().name("ecommerce_db").in(service.getFullyQualifiedName()).execute();
DatabaseSchema schema =
DatabaseSchemas.create().name("shopify").in(db.getFullyQualifiedName()).execute();
schemaFqn = schema.getFullyQualifiedName();
Table rawOrder =
Tables.create()
.name("raw_order")
.inSchema(schemaFqn)
.withColumns(DEFAULT_COLUMNS)
.execute();
srcFqn = rawOrder.getFullyQualifiedName();
Table factOrder =
Tables.create()
.name("fact_order")
.inSchema(schemaFqn)
.withColumns(DEFAULT_COLUMNS)
.execute();
tgtFqn = factOrder.getFullyQualifiedName();
Tables.create().name("raw_customer").inSchema(schemaFqn).withColumns(DEFAULT_COLUMNS).execute();
Tables.create().name("dim_address").inSchema(schemaFqn).withColumns(DEFAULT_COLUMNS).execute();
}
@Test
@Order(1)
void testSampleDataTablesExist() {
FluentTable src = Tables.findByName(srcFqn).fetch();
assertNotNull(src, "Source table " + srcFqn + " must exist");
FluentTable tgt = Tables.findByName(tgtFqn).fetch();
assertNotNull(tgt, "Target table " + tgtFqn + " must exist");
}
@Test
@Order(2)
void testCompleteEventCreatesLineageEdge(TestNamespace ns) throws Exception {
String response =
OpenLineage.event()
.withEventType("COMPLETE")
.withEventTime(Instant.now().toString())
.withJob(ns.prefix("ol_resolution_job"), ns.prefix("namespace"))
.withRun(UUID.randomUUID().toString())
.addInput("ecommerce_db.shopify.raw_order", serviceName)
.addOutput("ecommerce_db.shopify.fact_order", serviceName)
.send();
assertNotNull(response);
JsonNode json = MAPPER.readTree(response);
assertEquals("success", json.get("status").asText());
assertTrue(
json.get("lineageEdgesCreated").asInt() >= 1,
"Expected at least 1 lineage edge created, got: " + response);
}
@Test
@Order(3)
@SuppressWarnings("unchecked")
void testLineageEdgeHasOpenLineageSource() throws Exception {
LineageAPI.LineageGraph lineageGraph =
LineageAPI.forName$("table", srcFqn).upstream(0).downstream(3).fetch();
assertNotNull(lineageGraph);
Map<String, Object> lineage = MAPPER.readValue(lineageGraph.getRaw(), Map.class);
var downstreamEdges = (java.util.List<?>) lineage.get("downstreamEdges");
assertNotNull(downstreamEdges, "Expected downstream edges from " + srcFqn);
boolean hasOlEdge =
downstreamEdges.stream()
.map(e -> (Map<?, ?>) e)
.map(e -> (Map<?, ?>) e.get("lineageDetails"))
.filter(java.util.Objects::nonNull)
.anyMatch(details -> "OpenLineage".equals(details.get("source")));
assertTrue(hasOlEdge, "Expected at least one edge with source=OpenLineage");
}
@Test
@Order(4)
void testStartEventDoesNotCreateEdges(TestNamespace ns) throws Exception {
String response =
OpenLineage.event()
.withEventType("START")
.withEventTime(Instant.now().toString())
.withJob(ns.prefix("start_only_job"), ns.prefix("namespace"))
.withRun(UUID.randomUUID().toString())
.addInput("ecommerce_db.shopify.raw_order", serviceName)
.addOutput("ecommerce_db.shopify.fact_order", serviceName)
.send();
JsonNode json = MAPPER.readTree(response);
assertEquals(
0, json.get("lineageEdgesCreated").asInt(), "START events should not create lineage edges");
}
@Test
@Order(5)
void testUnresolvableDatasetsCreateNoEdges(TestNamespace ns) throws Exception {
String response =
OpenLineage.event()
.withEventType("COMPLETE")
.withEventTime(Instant.now().toString())
.withJob(ns.prefix("unknown_job"), ns.prefix("namespace"))
.withRun(UUID.randomUUID().toString())
.addInput("nonexistent_schema.nonexistent_table", "nonexistent_service")
.addOutput("nonexistent_schema.nonexistent_output", "nonexistent_service")
.send();
JsonNode json = MAPPER.readTree(response);
assertEquals(
0, json.get("lineageEdgesCreated").asInt(), "Unresolvable datasets should create 0 edges");
}
@Test
@Order(6)
void testMultiInputOutputCreatesAllEdges(TestNamespace ns) throws Exception {
String response =
OpenLineage.event()
.withEventType("COMPLETE")
.withEventTime(Instant.now().toString())
.withJob(ns.prefix("multi_io_job"), ns.prefix("namespace"))
.withRun(UUID.randomUUID().toString())
.addInput("ecommerce_db.shopify.raw_order", serviceName)
.addInput("ecommerce_db.shopify.raw_customer", serviceName)
.addOutput("ecommerce_db.shopify.dim_address", serviceName)
.send();
JsonNode json = MAPPER.readTree(response);
assertTrue(
json.get("lineageEdgesCreated").asInt() >= 2,
"2 inputs → 1 output should create at least 2 edges, got: " + response);
}
@Test
@Order(7)
void testEmptyInputsOutputsCreateNoEdges(TestNamespace ns) throws Exception {
String response =
OpenLineage.event()
.withEventType("COMPLETE")
.withEventTime(Instant.now().toString())
.withJob(ns.prefix("empty_io_job"), ns.prefix("namespace"))
.withRun(UUID.randomUUID().toString())
.send();
JsonNode json = MAPPER.readTree(response);
assertEquals(
0, json.get("lineageEdgesCreated").asInt(), "Empty inputs/outputs should create 0 edges");
}
}

View file

@ -69,7 +69,11 @@ public final class LineageAPI {
// ==================== Lineage Builders ====================
public static LineageQuery for$(String entityType, String entityId) {
return new LineageQuery(getClient(), entityType, entityId);
return new LineageQuery(getClient(), entityType, entityId, false);
}
public static LineageQuery forName$(String entityType, String fqn) {
return new LineageQuery(getClient(), entityType, fqn, true);
}
public static LineageConnector connect() {
@ -97,15 +101,17 @@ public final class LineageAPI {
public static class LineageQuery {
private final OpenMetadataClient client;
private final String entityType;
private final String entityId;
private final String identifier;
private final boolean isFqn;
private int upstreamDepth = 1;
private int downstreamDepth = 1;
private boolean includeDeleted = false;
LineageQuery(OpenMetadataClient client, String entityType, String entityId) {
LineageQuery(OpenMetadataClient client, String entityType, String identifier, boolean isFqn) {
this.client = client;
this.entityType = entityType;
this.entityId = entityId;
this.identifier = identifier;
this.isFqn = isFqn;
}
public LineageQuery upstream(int depth) {
@ -130,14 +136,26 @@ public final class LineageAPI {
}
public LineageGraph fetch() {
String result =
client
.lineage()
.getEntityLineage(
entityType,
entityId,
String.valueOf(upstreamDepth),
String.valueOf(downstreamDepth));
String result;
if (isFqn) {
result =
client
.lineage()
.getLineageByName(
entityType,
identifier,
String.valueOf(upstreamDepth),
String.valueOf(downstreamDepth));
} else {
result =
client
.lineage()
.getEntityLineage(
entityType,
identifier,
String.valueOf(upstreamDepth),
String.valueOf(downstreamDepth));
}
return new LineageGraph(result, client);
}
}

View file

@ -19,6 +19,7 @@ import static org.openmetadata.schema.type.Include.NON_DELETED;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import lombok.extern.slf4j.Slf4j;
import org.openmetadata.schema.api.lineage.openlineage.DatasetFacets;
@ -32,6 +33,7 @@ import org.openmetadata.schema.api.lineage.openlineage.SchemaFacet;
import org.openmetadata.schema.api.lineage.openlineage.SchemaField;
import org.openmetadata.schema.api.lineage.openlineage.SymlinkIdentifier;
import org.openmetadata.schema.api.lineage.openlineage.SymlinksFacet;
import org.openmetadata.schema.entity.data.Container;
import org.openmetadata.schema.entity.data.Pipeline;
import org.openmetadata.schema.entity.data.Table;
import org.openmetadata.schema.type.Column;
@ -45,8 +47,12 @@ import org.openmetadata.service.jdbi3.EntityRepository;
@Slf4j
public class OpenLineageEntityResolver {
private static final Set<String> STORAGE_URI_SCHEMES =
Set.of("gs://", "s3://", "s3a://", "abfss://", "abfs://", "wasbs://", "adl://");
private final Map<String, EntityReference> tableCache = new ConcurrentHashMap<>();
private final Map<String, EntityReference> pipelineCache = new ConcurrentHashMap<>();
private final Map<String, EntityReference> containerCache = new ConcurrentHashMap<>();
private final boolean autoCreateEntities;
private final String defaultPipelineService;
private final Map<String, String> namespaceToServiceMapping;
@ -132,6 +138,51 @@ public class OpenLineageEntityResolver {
return createTableFromOutput(dataset, updatedBy);
}
public boolean isStorageDataset(String namespace) {
if (nullOrEmpty(namespace)) {
return false;
}
String lower = namespace.toLowerCase();
for (String scheme : STORAGE_URI_SCHEMES) {
if (lower.startsWith(scheme)) {
return true;
}
}
return false;
}
public EntityReference resolveContainer(String namespace, String name) {
if (nullOrEmpty(namespace) || nullOrEmpty(name)) {
return null;
}
String fullPath = namespace.endsWith("/") ? namespace + name : namespace + "/" + name;
String cacheKey = "container:" + fullPath;
EntityReference cached = containerCache.get(cacheKey);
if (cached != null) {
return cached;
}
EntityReference ref = searchContainerByFullPath(fullPath);
if (ref != null) {
containerCache.put(cacheKey, ref);
return ref;
}
// Try without wildcard suffixes (e.g., "gs://bucket/path/file_*.csv" "gs://bucket/path")
String parentPath = extractParentPath(fullPath);
if (parentPath != null && !parentPath.equals(fullPath)) {
ref = searchContainerByFullPath(parentPath);
if (ref != null) {
containerCache.put(cacheKey, ref);
return ref;
}
}
return null;
}
public EntityReference resolveOrCreatePipeline(String namespace, String name, String updatedBy) {
if (nullOrEmpty(name)) {
return null;
@ -157,6 +208,22 @@ public class OpenLineageEntityResolver {
LOG.debug("Pipeline not found: {}", pipelineFqn);
}
// Fallback: try namespace as service name, e.g. fasfas.stackoverflow_etl_lineage
if (!nullOrEmpty(namespace)) {
String fallbackFqn = namespace + "." + name;
try {
EntityReference ref =
Entity.getEntityReferenceByName(Entity.PIPELINE, fallbackFqn, NON_DELETED);
if (ref != null) {
LOG.info("Resolved pipeline via namespace fallback: {}", fallbackFqn);
pipelineCache.put(cacheKey, ref);
return ref;
}
} catch (EntityNotFoundException e) {
LOG.debug("Pipeline not found by namespace fallback: {}", fallbackFqn);
}
}
if (!autoCreateEntities) {
LOG.debug("Auto-create disabled, skipping pipeline creation for: {}", pipelineName);
return null;
@ -334,6 +401,41 @@ public class OpenLineageEntityResolver {
return null;
}
private EntityReference searchContainerByFullPath(String fullPath) {
try {
@SuppressWarnings("unchecked")
EntityRepository<Container> containerRepository =
(EntityRepository<Container>) Entity.getEntityRepository(Entity.CONTAINER);
List<Container> containers =
containerRepository.listAll(
containerRepository.getFields(""), new ListFilterByJsonField("fullPath", fullPath));
if (!containers.isEmpty()) {
Container container = containers.get(0);
LOG.debug(
"Resolved container by fullPath: {} -> {}",
fullPath,
container.getFullyQualifiedName());
return container.getEntityReference();
}
} catch (Exception e) {
LOG.debug("Error searching for container by fullPath {}: {}", fullPath, e.getMessage());
}
return null;
}
private String extractParentPath(String path) {
if (path == null) {
return null;
}
int lastSlash = path.lastIndexOf('/');
if (lastSlash <= 0) {
return null;
}
return path.substring(0, lastSlash);
}
private EntityReference createTableFromInput(OpenLineageInputDataset dataset, String updatedBy) {
return createTableInternal(
dataset.getNamespace(), dataset.getName(), dataset.getFacets(), updatedBy);
@ -377,6 +479,7 @@ public class OpenLineageEntityResolver {
List<EntityReference> owners = extractOwners(facets);
Table newTable = new Table();
newTable.setId(java.util.UUID.randomUUID());
newTable.setName(table);
newTable.setFullyQualifiedName(schemaFqn + "." + table);
newTable.setDatabaseSchema(
@ -561,6 +664,7 @@ public class OpenLineageEntityResolver {
Entity.PIPELINE_SERVICE, defaultPipelineService, NON_DELETED);
Pipeline newPipeline = new Pipeline();
newPipeline.setId(java.util.UUID.randomUUID());
newPipeline.setName(pipelineName);
newPipeline.setFullyQualifiedName(buildPipelineFqn(pipelineName));
newPipeline.setService(serviceRef);
@ -589,6 +693,7 @@ public class OpenLineageEntityResolver {
public void clearCache() {
tableCache.clear();
pipelineCache.clear();
containerCache.clear();
}
private static class ListFilterByFqnSuffix extends org.openmetadata.service.jdbi3.ListFilter {
@ -598,24 +703,10 @@ public class OpenLineageEntityResolver {
}
@Override
public String getCondition() {
return getFqnCondition(null, "fqnSuffix");
}
@Override
public String getCondition(String alias) {
return getFqnCondition(alias, "fqnSuffix");
}
private String getFqnCondition(String alias, String paramName) {
String column = alias == null ? "json" : alias + ".json";
if (Boolean.TRUE.equals(
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
return String.format(
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :%s", column, paramName);
} else {
return String.format("%s->>'fullyQualifiedName' LIKE :%s", column, paramName);
}
public String getCondition(String tableName) {
String baseCondition = super.getCondition(tableName);
String fqnClause = buildFqnLikeClause(tableName, "fqnSuffix");
return baseCondition + " AND " + fqnClause;
}
}
@ -626,24 +717,47 @@ public class OpenLineageEntityResolver {
}
@Override
public String getCondition() {
return getFqnCondition(null);
public String getCondition(String tableName) {
String baseCondition = super.getCondition(tableName);
String fqnClause = buildFqnLikeClause(tableName, "fqnPattern");
return baseCondition + " AND " + fqnClause;
}
}
private static class ListFilterByJsonField extends org.openmetadata.service.jdbi3.ListFilter {
private final String fieldName;
public ListFilterByJsonField(String fieldName, String value) {
super(Include.NON_DELETED);
this.fieldName = fieldName;
addQueryParam("jsonFieldValue", value);
}
@Override
public String getCondition(String alias) {
return getFqnCondition(alias);
}
private String getFqnCondition(String alias) {
String column = alias == null ? "json" : alias + ".json";
public String getCondition(String tableName) {
String baseCondition = super.getCondition(tableName);
String column = tableName == null ? "json" : tableName + ".json";
String fieldClause;
if (Boolean.TRUE.equals(
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
return String.format(
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :fqnPattern", column);
fieldClause =
String.format(
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.%s')) = :jsonFieldValue", column, fieldName);
} else {
return String.format("%s->>'fullyQualifiedName' LIKE :fqnPattern", column);
fieldClause = String.format("%s->>'%s' = :jsonFieldValue", column, fieldName);
}
return baseCondition + " AND " + fieldClause;
}
}
private static String buildFqnLikeClause(String tableName, String paramName) {
String column = tableName == null ? "json" : tableName + ".json";
if (Boolean.TRUE.equals(
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
return String.format(
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :%s", column, paramName);
} else {
return String.format("%s->>'fullyQualifiedName' LIKE :%s", column, paramName);
}
}
}

View file

@ -57,7 +57,9 @@ public class OpenLineageMapper {
public OpenLineageMapper(OpenLineageEntityResolver entityResolver, OpenLineageSettings settings) {
this.entityResolver = entityResolver;
if (settings != null && settings.getEventTypeFilter() != null) {
if (settings != null
&& settings.getEventTypeFilter() != null
&& !settings.getEventTypeFilter().isEmpty()) {
this.allowedEventTypes =
settings.getEventTypeFilter().stream()
.map(OpenLineageEventType::value)
@ -97,6 +99,9 @@ public class OpenLineageMapper {
for (OpenLineageOutputDataset output : outputs) {
EntityReference outputRef = entityResolver.resolveOrCreateTable(output, updatedBy);
if (outputRef == null && entityResolver.isStorageDataset(output.getNamespace())) {
outputRef = entityResolver.resolveContainer(output.getNamespace(), output.getName());
}
if (outputRef == null) {
LOG.warn(
"Could not resolve output dataset: {}.{}", output.getNamespace(), output.getName());
@ -108,6 +113,9 @@ public class OpenLineageMapper {
for (OpenLineageInputDataset input : inputs) {
EntityReference inputRef = entityResolver.resolveOrCreateTable(input, updatedBy);
if (inputRef == null && entityResolver.isStorageDataset(input.getNamespace())) {
inputRef = entityResolver.resolveContainer(input.getNamespace(), input.getName());
}
if (inputRef == null) {
LOG.warn("Could not resolve input dataset: {}.{}", input.getNamespace(), input.getName());
continue;
@ -153,6 +161,9 @@ public class OpenLineageMapper {
for (OpenLineageInputDataset input : inputs) {
String olName = buildOpenLineageDatasetName(input.getNamespace(), input.getName());
EntityReference ref = entityResolver.resolveTable(input);
if (ref == null && entityResolver.isStorageDataset(input.getNamespace())) {
ref = entityResolver.resolveContainer(input.getNamespace(), input.getName());
}
if (ref != null) {
map.put(olName, ref.getFullyQualifiedName());
}
@ -198,12 +209,15 @@ public class OpenLineageMapper {
List<ColumnLineage> columnLineages = new ArrayList<>();
// Check outputFacets first (OpenLineage spec location), fall back to dataset facets
ColumnLineageFacet columnLineageFacet = null;
OutputDatasetFacets outputFacets = output.getOutputFacets();
if (outputFacets == null) {
return columnLineages;
if (outputFacets != null) {
columnLineageFacet = outputFacets.getColumnLineage();
}
if (columnLineageFacet == null && output.getFacets() != null) {
columnLineageFacet = output.getFacets().getColumnLineage();
}
ColumnLineageFacet columnLineageFacet = outputFacets.getColumnLineage();
if (columnLineageFacet == null || columnLineageFacet.getFields() == null) {
return columnLineages;
}

View file

@ -19,6 +19,7 @@ import org.openmetadata.schema.services.connections.database.MysqlConnection;
import org.openmetadata.schema.services.connections.database.PostgresConnection;
import org.openmetadata.schema.services.connections.database.SQLiteConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
import org.openmetadata.schema.services.connections.pipeline.BackendConnection;
import org.openmetadata.schema.utils.JsonUtils;
@ -31,6 +32,7 @@ public class AirflowConnectionClassConverter extends ClassConverter {
MysqlConnection.class,
PostgresConnection.class,
MssqlConnection.class,
AirflowRestApiConnection.class,
SQLiteConnection.class);
public AirflowConnectionClassConverter() {

View file

@ -0,0 +1,66 @@
/*
* Copyright 2021 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.secrets.converter;
import java.util.List;
import java.util.Map;
import org.openmetadata.schema.entity.utils.common.AccessTokenConfig;
import org.openmetadata.schema.entity.utils.common.BasicAuthConfig;
import org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig;
import org.openmetadata.schema.entity.utils.common.MWAAAuthConfig;
import org.openmetadata.schema.security.credentials.AWSCredentials;
import org.openmetadata.schema.security.credentials.GCPCredentials;
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
import org.openmetadata.schema.utils.JsonUtils;
/** Converter class to get an `AirflowRestApiConnection` object. */
public class AirflowRestApiConnectionClassConverter extends ClassConverter {
public AirflowRestApiConnectionClassConverter() {
super(AirflowRestApiConnection.class);
}
@Override
public Object convert(Object object) {
AirflowRestApiConnection conn =
(AirflowRestApiConnection) JsonUtils.convertValue(object, this.clazz);
if (!(conn.getAuthConfig() instanceof Map<?, ?> authMap)) {
return conn;
}
if (authMap.containsKey("username")) {
tryToConvertOrFail(authMap, List.of(BasicAuthConfig.class)).ifPresent(conn::setAuthConfig);
} else if (authMap.containsKey("token")) {
tryToConvertOrFail(authMap, List.of(AccessTokenConfig.class)).ifPresent(conn::setAuthConfig);
} else if (authMap.containsKey("credentials")) {
tryToConvertOrFail(authMap, List.of(GcpCredentialsConfig.class))
.ifPresent(conn::setAuthConfig);
if (conn.getAuthConfig() instanceof GcpCredentialsConfig gcpCfg) {
tryToConvertOrFail(gcpCfg.getCredentials(), List.of(GCPCredentials.class))
.ifPresent(obj -> gcpCfg.setCredentials((GCPCredentials) obj));
}
} else if (authMap.containsKey("mwaaConfig")) {
tryToConvertOrFail(authMap, List.of(MWAAAuthConfig.class)).ifPresent(conn::setAuthConfig);
if (conn.getAuthConfig() instanceof MWAAAuthConfig mwaaCfg) {
if (mwaaCfg.getMwaaConfig() != null && mwaaCfg.getMwaaConfig().getAwsConfig() != null) {
tryToConvertOrFail(mwaaCfg.getMwaaConfig().getAwsConfig(), List.of(AWSCredentials.class))
.ifPresent(obj -> mwaaCfg.getMwaaConfig().setAwsConfig((AWSCredentials) obj));
}
}
}
return conn;
}
}

View file

@ -55,6 +55,7 @@ import org.openmetadata.schema.services.connections.drive.GoogleDriveConnection;
import org.openmetadata.schema.services.connections.mlmodel.VertexAIConnection;
import org.openmetadata.schema.services.connections.pipeline.AirbyteConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
import org.openmetadata.schema.services.connections.pipeline.MatillionConnection;
import org.openmetadata.schema.services.connections.pipeline.MulesoftConnection;
import org.openmetadata.schema.services.connections.pipeline.NifiConnection;
@ -77,6 +78,7 @@ public final class ClassConverterFactory {
Map.ofEntries(
Map.entry(AirbyteConnection.class, new AirbyteConnectionClassConverter()),
Map.entry(AirflowConnection.class, new AirflowConnectionClassConverter()),
Map.entry(AirflowRestApiConnection.class, new AirflowRestApiConnectionClassConverter()),
Map.entry(BigQueryConnection.class, new BigQueryConnectionClassConverter()),
Map.entry(BigTableConnection.class, new BigTableConnectionClassConverter()),
Map.entry(DatalakeConnection.class, new DatalakeConnectionClassConverter()),

View file

@ -597,6 +597,125 @@ class OpenLineageMapperTest {
assertTrue(description.contains("my-job"));
}
@Test
void mapRunEvent_storageOutputDataset_resolvesContainer() {
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
OpenLineageInputDataset input = createInputDataset("ns", "schema.input_table");
OpenLineageOutputDataset output = createOutputDataset("gs://my-bucket", "data/output.csv");
event.setInputs(List.of(input));
event.setOutputs(List.of(output));
EntityReference inputRef = createEntityReference("i1", "svc.db.schema.input_table");
EntityReference containerRef =
new EntityReference()
.withId(
UUID.fromString(
"00000000-0000-0000-0000-"
+ String.format("%012d", "c1".hashCode() & 0xFFFFFFFFL)))
.withType("container")
.withFullyQualifiedName("storage.my-bucket.data_output");
when(entityResolver.resolveTable(input)).thenReturn(inputRef);
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(null);
when(entityResolver.isStorageDataset("gs://my-bucket")).thenReturn(true);
when(entityResolver.resolveContainer("gs://my-bucket", "data/output.csv"))
.thenReturn(containerRef);
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(inputRef);
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
.thenReturn(null);
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
assertEquals(1, result.size());
assertEquals(containerRef, result.get(0).getEdge().getToEntity());
}
@Test
void mapRunEvent_storageInputDataset_resolvesContainer() {
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
OpenLineageInputDataset input = createInputDataset("s3://my-bucket", "data/input.parquet");
OpenLineageOutputDataset output = createOutputDataset("ns", "schema.output_table");
event.setInputs(List.of(input));
event.setOutputs(List.of(output));
EntityReference outputRef = createEntityReference("o1", "svc.db.schema.output_table");
EntityReference containerRef =
new EntityReference()
.withId(
UUID.fromString(
"00000000-0000-0000-0000-"
+ String.format("%012d", "c2".hashCode() & 0xFFFFFFFFL)))
.withType("container")
.withFullyQualifiedName("storage.my-bucket.data_input");
when(entityResolver.resolveTable(input)).thenReturn(null);
when(entityResolver.isStorageDataset("s3://my-bucket")).thenReturn(true);
when(entityResolver.resolveContainer("s3://my-bucket", "data/input.parquet"))
.thenReturn(containerRef);
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(outputRef);
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(null);
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
.thenReturn(null);
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
assertEquals(1, result.size());
assertEquals(containerRef, result.get(0).getEdge().getFromEntity());
}
@Test
void mapRunEvent_columnLineageInDatasetFacets_extractsColumnLineage() {
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
String inputNamespace = "input-ns";
String inputName = "schema.input_table";
OpenLineageInputDataset input = createInputDataset(inputNamespace, inputName);
String outputNamespace = "output-ns";
String outputName = "schema.output_table";
OpenLineageOutputDataset output = createOutputDataset(outputNamespace, outputName);
InputField inputField =
new InputField().withNamespace(inputNamespace).withName(inputName).withField("src_col");
ColumnLineageField columnLineageField =
new ColumnLineageField()
.withInputFields(List.of(inputField))
.withTransformationDescription("IDENTITY");
Fields fields = new Fields();
fields.setAdditionalProperty("dst_col", columnLineageField);
ColumnLineageFacet columnLineageFacet = new ColumnLineageFacet().withFields(fields);
// Set column lineage on dataset facets (NOT outputFacets) to cover line 219
org.openmetadata.schema.api.lineage.openlineage.DatasetFacets datasetFacets =
new org.openmetadata.schema.api.lineage.openlineage.DatasetFacets()
.withColumnLineage(columnLineageFacet);
output.setFacets(datasetFacets);
event.setInputs(List.of(input));
event.setOutputs(List.of(output));
EntityReference inputRef = createEntityReference("i1", "service.db.schema.input_table");
EntityReference outputRef = createEntityReference("o1", "service.db.schema.output_table");
when(entityResolver.resolveTable(input)).thenReturn(inputRef);
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(outputRef);
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(inputRef);
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
.thenReturn(null);
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
assertEquals(1, result.size());
List<ColumnLineage> columnLineages =
result.get(0).getEdge().getLineageDetails().getColumnsLineage();
assertNotNull(columnLineages);
assertEquals(1, columnLineages.size());
assertEquals("service.db.schema.output_table.dst_col", columnLineages.get(0).getToColumn());
}
@Test
void mapRunEvent_noColumnLineageFacet_noColumnLineageInResult() {
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);

View file

@ -0,0 +1,129 @@
/*
* Copyright 2021 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.secrets.converter;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.openmetadata.schema.entity.utils.common.AccessTokenConfig;
import org.openmetadata.schema.entity.utils.common.BasicAuthConfig;
import org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig;
import org.openmetadata.schema.entity.utils.common.MWAAAuthConfig;
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
class AirflowRestApiConnectionClassConverterTest {
private final AirflowRestApiConnectionClassConverter converter =
new AirflowRestApiConnectionClassConverter();
@Test
void convert_basicAuth_convertsAuthConfig() {
Map<String, Object> authMap = new HashMap<>();
authMap.put("username", "admin");
authMap.put("password", "secret");
Map<String, Object> connMap = new HashMap<>();
connMap.put("authConfig", authMap);
Object result = converter.convert(connMap);
assertInstanceOf(AirflowRestApiConnection.class, result);
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
assertInstanceOf(BasicAuthConfig.class, conn.getAuthConfig());
BasicAuthConfig auth = (BasicAuthConfig) conn.getAuthConfig();
assertEquals("admin", auth.getUsername());
assertEquals("secret", auth.getPassword());
}
@Test
void convert_accessToken_convertsAuthConfig() {
Map<String, Object> authMap = new HashMap<>();
authMap.put("token", "my-access-token");
Map<String, Object> connMap = new HashMap<>();
connMap.put("authConfig", authMap);
Object result = converter.convert(connMap);
assertInstanceOf(AirflowRestApiConnection.class, result);
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
assertInstanceOf(AccessTokenConfig.class, conn.getAuthConfig());
AccessTokenConfig auth = (AccessTokenConfig) conn.getAuthConfig();
assertEquals("my-access-token", auth.getToken());
}
@Test
void convert_gcpCredentials_convertsAuthConfig() {
Map<String, Object> gcpValues = new HashMap<>();
gcpValues.put("type", "service_account");
gcpValues.put("projectId", "my-project");
Map<String, Object> gcpCreds = new HashMap<>();
gcpCreds.put("gcpConfig", gcpValues);
Map<String, Object> authMap = new HashMap<>();
authMap.put("credentials", gcpCreds);
Map<String, Object> connMap = new HashMap<>();
connMap.put("authConfig", authMap);
Object result = converter.convert(connMap);
assertInstanceOf(AirflowRestApiConnection.class, result);
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
assertInstanceOf(GcpCredentialsConfig.class, conn.getAuthConfig());
}
@Test
void convert_mwaaAuth_convertsAuthConfig() {
Map<String, Object> awsConfig = new HashMap<>();
awsConfig.put("awsRegion", "us-east-1");
awsConfig.put("awsAccessKeyId", "AKIAIOSFODNN7EXAMPLE");
awsConfig.put("awsSecretAccessKey", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
Map<String, Object> mwaaConfig = new HashMap<>();
mwaaConfig.put("mwaaEnvironmentName", "my-environment");
mwaaConfig.put("awsConfig", awsConfig);
Map<String, Object> authMap = new HashMap<>();
authMap.put("mwaaConfig", mwaaConfig);
Map<String, Object> connMap = new HashMap<>();
connMap.put("authConfig", authMap);
Object result = converter.convert(connMap);
assertInstanceOf(AirflowRestApiConnection.class, result);
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
assertInstanceOf(MWAAAuthConfig.class, conn.getAuthConfig());
MWAAAuthConfig auth = (MWAAAuthConfig) conn.getAuthConfig();
assertNotNull(auth.getMwaaConfig());
assertEquals("my-environment", auth.getMwaaConfig().getMwaaEnvironmentName());
}
@Test
void convert_nullAuthConfig_returnsConnectionWithoutConversion() {
// When authConfig is null, it's not a Map instance, so line 40 (early return) is hit
Map<String, Object> connMap = new HashMap<>();
connMap.put("authConfig", null);
Object result = converter.convert(connMap);
assertInstanceOf(AirflowRestApiConnection.class, result);
}
}

View file

@ -295,6 +295,9 @@
},
"ownership": {
"$ref": "#/definitions/ownershipFacet"
},
"columnLineage": {
"$ref": "#/definitions/columnLineageFacet"
}
},
"additionalProperties": true

View file

@ -33,9 +33,12 @@
"default": "10"
},
"connection": {
"title": "Metadata Database Connection",
"description": "Underlying database connection. See https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for supported backends.",
"title": "Airflow Connection",
"description": "Choose between database connection or REST API connection to fetch metadata from Airflow.",
"oneOf": [
{
"$ref": "../../../utils/airflowRestApiConnection.json"
},
{
"$ref": "backendConnection.json"
},

View file

@ -0,0 +1,57 @@
{
"$id": "https://open-metadata.org/schema/entity/utils/airflowRestApiConnection.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "AirflowRestApiConnection",
"description": "Airflow REST API Connection Config for connecting via REST API.",
"type": "object",
"javaType": "org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection",
"definitions": {
"ApiVersion": {
"description": "Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect the version automatically.",
"type": "string",
"enum": ["v1", "v2", "auto"],
"default": "auto"
}
},
"properties": {
"type": {
"title": "Service Type",
"description": "Service Type",
"type": "string",
"enum": ["RestAPI"],
"default": "RestAPI"
},
"authConfig": {
"title": "Authentication Configuration",
"description": "Choose an authentication method: Basic Auth (username/password), Access Token, GCP Service Account (for Cloud Composer), or AWS Credentials (for MWAA).",
"oneOf": [
{
"$ref": "./common/basicAuthConfig.json"
},
{
"$ref": "./common/accessTokenConfig.json"
},
{
"$ref": "./common/gcpCredentialsConfig.json"
},
{
"$ref": "./common/mwaaAuthConfig.json"
}
]
},
"apiVersion": {
"title": "API Version",
"description": "Airflow REST API version.",
"$ref": "#/definitions/ApiVersion",
"default": "auto"
},
"verifySSL": {
"title": "Verify SSL",
"description": "Whether to verify SSL certificates when connecting to the Airflow API.",
"type": "boolean",
"default": true
}
},
"required": ["authConfig"],
"additionalProperties": false
}

View file

@ -0,0 +1,18 @@
{
"$id": "https://open-metadata.org/schema/entity/utils/common/accessTokenConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Access Token",
"description": "Static access token for Airflow API authentication.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.utils.common.AccessTokenConfig",
"properties": {
"token": {
"title": "Token",
"description": "Static access token for Airflow API authentication.",
"type": "string",
"format": "password"
}
},
"required": ["token"],
"additionalProperties": false
}

View file

@ -0,0 +1,23 @@
{
"$id": "https://open-metadata.org/schema/entity/utils/common/basicAuthConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Basic Auth",
"description": "Username and password for Airflow API authentication.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.utils.common.BasicAuthConfig",
"properties": {
"username": {
"title": "Username",
"description": "Username for basic authentication to the Airflow API.",
"type": "string"
},
"password": {
"title": "Password",
"description": "Password for basic authentication to the Airflow API.",
"type": "string",
"format": "password"
}
},
"required": ["username", "password"],
"additionalProperties": false
}

View file

@ -0,0 +1,17 @@
{
"$id": "https://open-metadata.org/schema/entity/utils/common/gcpCredentialsConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "GCP Service Account",
"description": "GCP credentials for Google Cloud Composer. Supports service account values, credentials path, workload identity (external account), and ADC. Tokens are auto-refreshed at runtime.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig",
"properties": {
"credentials": {
"title": "GCP Credentials",
"description": "GCP credentials configuration.",
"$ref": "../../../security/credentials/gcpCredentials.json"
}
},
"required": ["credentials"],
"additionalProperties": false
}

View file

@ -0,0 +1,37 @@
{
"$id": "https://open-metadata.org/schema/entity/utils/common/mwaaAuthConfig.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "MWAA Authentication",
"description": "AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.utils.common.MWAAAuthConfig",
"properties": {
"mwaaConfig": {
"title": "MWAA Configuration",
"description": "MWAA credentials and environment configuration.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.utils.common.MWAAConfig",
"properties": {
"mwaaEnvironmentName": {
"title": "MWAA Environment Name",
"description": "The name of your MWAA environment.",
"type": "string"
},
"awsConfig": {
"title": "AWS Configuration",
"description": "AWS credentials for generating MWAA CLI token.",
"$ref": "../../../security/credentials/awsCredentials.json"
}
},
"required": [
"mwaaEnvironmentName",
"awsConfig"
],
"additionalProperties": false
}
},
"required": [
"mwaaConfig"
],
"additionalProperties": false
}

View file

@ -37,6 +37,165 @@ Note that the **Backend Connection** is only used to extract metadata from a DAG
$$
## Airflow REST API Connection
The REST API connection calls the Airflow web server over HTTP/HTTPS and does not require direct access to Airflow's metadata database. This makes it the right choice for managed deployments (Astronomer, GCP Cloud Composer, MWAA) and for any self-hosted Airflow where direct DB access is not available or desired.
$$note
The REST API connection fetches DAG topology, task structure, schedules, and run statuses. **Lineage is not captured through this connection.** To get table-level and column-level lineage in OpenMetadata, you must separately install the <a href="https://docs.open-metadata.org/connectors/pipeline/airflow/lineage-backend" target="_blank">OpenMetadata Lineage Backend</a> in Airflow (strategy 2) or use the <a href="https://docs.open-metadata.org/connectors/pipeline/airflow/lineage-operator" target="_blank">Lineage Operator</a> in your DAGs (strategy 3). Once those emit OpenLineage events, lineage edges will appear automatically in OpenMetadata.
$$
### Host URL Format by Deployment
| Deployment | Example Host and Port URL |
|---|---|
| Self-hosted / Docker (ingestion runs on the host) | `http://localhost:8080` |
| Self-hosted / Docker (ingestion runs inside Docker) | `http://host.docker.internal:8080` |
| Google Cloud Composer | `https://ko82752sdo9f7zjf811c682mw1e5uuc9-dot-us-east1.composer.googleusercontent.com` |
| Astronomer | `https://cmn4c1zax823t00qf36gnlquw.ay.astronomer.run/v13jlquw/` |
| Amazon MWAA | `https://a1234awd1-5324-6f89-9523-1sq41234adqa.c2.airflow.eu-north-1.on.aws` |
For **Cloud Composer**, find the web server URL in GCP Console → **Composer → Environments → Open Airflow UI**. Copy the base URL (omit any trailing path).
For **Astronomer**, find your deployment URL in the Astronomer UI → **Deployments → Open Airflow**. Do **not** include a trailing slash.
### When to Use REST API vs. a Database Connection
Use the **REST API connection** when:
- You are on Astronomer (DB access is unavailable).
- You are on Cloud Composer or MWAA (DB access is unavailable or impractical).
- You are running Airflow 3.x.
- You do not have direct network access to the underlying MySQL / Postgres / SQLite metadata DB.
Use a **Database connection** (MySQL / Postgres / SQLite sections below) when:
- You self-host Airflow and have direct access to the metadata DB.
- You want to read raw task-instance data directly from the DB rather than via the API.
- You are using the Backend Connection strategy (Airflow plugin / Lineage Backend approach).
$$section
### Authentication Configuration $(id="authConfig")
Select the authentication method for the Airflow REST API. Pick one of the three options from the dropdown — the corresponding fields will appear:
- **Basic Auth**: Enter a username and password. For Airflow 3.x, a short-lived JWT is automatically exchanged at startup; for Airflow 2.x, HTTP Basic auth is used directly.
- **Access Token**: Paste a static bearer token you have generated in Airflow.
- **GCP Service Account**: Recommended for **Google Cloud Composer**. GCP OAuth2 tokens are fetched and auto-refreshed at runtime via `google-auth` — tokens never expire mid-run.
- **MWAA Configuration**: AWS credentials used to authenticate with Amazon Managed Workflows for Apache Airflow (MWAA).
$$
### Authentication Quick Reference
| Deployment | Recommended Auth |
|---|---|
| Self-hosted Airflow 2.x or 3.x | Basic Auth |
| Astronomer | Access Token (Deployment API token) |
| Google Cloud Composer | GCP Service Account |
| Any deployment with a pre-generated bearer token | Access Token |
$$section
### Username $(id="username")
Username for Basic Auth. The user must have permission to call the Airflow REST API.
For Airflow 3.x this triggers an automatic JWT exchange (`POST /auth/token`). For Airflow 2.x, HTTP Basic auth is used directly.
$$
$$section
### Password $(id="password")
Password for Basic Auth.
$$
$$section
### Token $(id="token")
Static bearer token for Access Token authentication. Paste the token value here — it will be sent as `Authorization: Bearer <token>` on every request.
Use this when you have generated a long-lived API token in your Airflow deployment.
$$
### Generating an Astronomer Deployment Token
For **Astronomer** deployments, use Access Token auth with a Deployment API token:
1. Open the Astronomer UI and navigate to **Deployments**.
2. Select your deployment and go to **API Keys** or **Tokens** (the exact label depends on your Astronomer version).
3. Click **Add API Key** / **Generate Token**, give it a descriptive name (e.g. `openmetadata-ingestion`), and copy the value.
4. Paste it in the **Token** field above.
For self-hosted Airflow, you can generate an API token via the Airflow UI under **Admin → Users** or via the Airflow CLI.
$$section
### MWAA Configuration $(id="mwaaConfig")
AWS credentials used to authenticate with Amazon Managed Workflows for Apache Airflow (MWAA).
The authentication requires the MWAA Environment Name and an AWS configuration.
#### Configuration Fields
**MWAA Environment Name**: The name of the Amazon MWAA environment to connect to.
**AWS Region**: The AWS region where the MWAA environment is deployed.
**AWS Access Key ID**: The access key used to authenticate with AWS.
**AWS Secret Access Key**: The secret key associated with the AWS access key.
**AWS Session Token (Optional)**: Required when using temporary AWS credentials.
**Assume Role ARN (Optional)**: ARN of IAM role to assume for cross-account access.
**Assume Role Session Name (Optional)**: Session name for assumed role.
**Endpoint URL (Optional)**: Custom endpoint URL for AWS-compatible services (MinIO, LocalStack).
$$
$$section
### GCP Credentials $(id="credentials")
GCP credentials used to obtain short-lived OAuth2 tokens for authenticating with Google Cloud Composer. Tokens are automatically refreshed when they expire, so ingestion runs are never interrupted by token expiry.
Supports all four GCP authentication types:
- **GCP Credentials Values**: Paste the service account JSON fields directly (project ID, client email, private key, etc.).
- **GCP Credentials Path**: Provide a file path to a service account JSON key file on the ingestion host.
- **GCP External Account (Workload Identity Federation)**: For GKE or other workload identity setups.
- **GCP ADC (Application Default Credentials)**: Uses the credentials already available in the environment (e.g. via `gcloud auth application-default login` or the GCE metadata server).
You can also optionally configure **service account impersonation** via `gcpImpersonateServiceAccount`.
$$
### Finding Your Cloud Composer Airflow URL
In GCP Console, go to **Composer → Environments**, select your environment, and click **Open Airflow UI**. Copy the base URL (e.g. `https://<hash>-dot-<region>.composer.googleusercontent.com`) — this is what you enter in the **Host and Port** field above.
### Choosing a GCP Credential Type
| Credential Type | When to Use |
|---|---|
| **GCP Credentials Values** | Ingestion runs outside GCP (on-prem, local machine). Paste the service account JSON fields directly. |
| **GCP Credentials Path** | Ingestion runs on a host where the service account JSON key file already exists at a known local path. |
| **GCP ADC (Application Default Credentials)** | Ingestion runs on a GCE VM or GKE pod with an attached service account. Uses the GCE metadata server or `gcloud auth application-default login`. |
| **GCP External Account (Workload Identity Federation)** | Ingestion runs on GKE with Workload Identity, or on a non-GCP system using federated identity (e.g. AWS → GCP). |
$$section
### API Version $(id="apiVersion")
Airflow REST API version to use:
- **auto** (default): OpenMetadata tries `v2` first (Airflow 3.x), then falls back to `v1` (Airflow 2.x).
- **v1**: Force Airflow 2.x API.
- **v2**: Force Airflow 3.x API.
$$
$$section
### Verify SSL $(id="verifySSL")
Whether to verify SSL certificates when connecting to the Airflow REST API. Set to `false` only in development environments with self-signed certificates.
$$
## MySQL Connection

View file

@ -530,7 +530,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -1197,9 +1197,8 @@ export interface ConfigObject {
*
* Choose between API or database connection fetch metadata from superset.
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*
@ -2507,6 +2506,8 @@ export enum AuthProvider {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -2689,6 +2690,8 @@ export interface AuthenticationType {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3137,6 +3140,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3410,12 +3415,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -3625,9 +3634,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Mysql Database Connection Config
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -3738,7 +3748,10 @@ export interface ConfigConnection {
* SSL Configuration details.
*/
sslConfig?: ConnectionSSLConfig;
verifySSL?: VerifySSL;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -3797,6 +3810,15 @@ export interface ConfigConnection {
* Use slow logs to extract lineage.
*/
useSlowLogs?: boolean;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -3808,6 +3830,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -3873,6 +3981,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -4001,6 +4111,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -4025,6 +4136,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -4033,7 +4146,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4202,27 +4315,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -103,6 +103,7 @@ export interface OpenLineageInputDataset {
* Dataset facets containing metadata like schema.
*/
export interface DatasetFacets {
columnLineage?: ColumnLineageFacet;
datasource?: DatasourceFacet;
documentation?: DocumentationFacet;
ownership?: OwnershipFacet;
@ -111,6 +112,65 @@ export interface DatasetFacets {
[property: string]: any;
}
/**
* Column lineage facet describing how output columns are derived from input columns.
*
* Base facet that all facets extend from.
*/
export interface ColumnLineageFacet {
/**
* URI identifying the producer of this metadata.
*/
_producer?: string;
/**
* URI pointing to the schema definition for this facet.
*/
_schemaURL?: string;
/**
* Map of output field names to their lineage information.
*/
fields: { [key: string]: ColumnLineageField };
[property: string]: any;
}
/**
* Column lineage information for a single output field.
*/
export interface ColumnLineageField {
/**
* List of input fields that contribute to this output field.
*/
inputFields: InputField[];
/**
* Human-readable description of the transformation.
*/
transformationDescription?: string;
/**
* Type of transformation (e.g., DIRECT, AGGREGATION).
*/
transformationType?: string;
[property: string]: any;
}
/**
* A reference to an input column in column lineage.
*/
export interface InputField {
/**
* The name of the input field/column.
*/
field: string;
/**
* The name of the input dataset.
*/
name: string;
/**
* The namespace of the input dataset.
*/
namespace: string;
[property: string]: any;
}
/**
* Datasource facet providing connection details for the dataset.
*
@ -357,65 +417,6 @@ export interface OutputDatasetFacets {
[property: string]: any;
}
/**
* Column lineage facet describing how output columns are derived from input columns.
*
* Base facet that all facets extend from.
*/
export interface ColumnLineageFacet {
/**
* URI identifying the producer of this metadata.
*/
_producer?: string;
/**
* URI pointing to the schema definition for this facet.
*/
_schemaURL?: string;
/**
* Map of output field names to their lineage information.
*/
fields: { [key: string]: ColumnLineageField };
[property: string]: any;
}
/**
* Column lineage information for a single output field.
*/
export interface ColumnLineageField {
/**
* List of input fields that contribute to this output field.
*/
inputFields: InputField[];
/**
* Human-readable description of the transformation.
*/
transformationDescription?: string;
/**
* Type of transformation (e.g., DIRECT, AGGREGATION).
*/
transformationType?: string;
[property: string]: any;
}
/**
* A reference to an input column in column lineage.
*/
export interface InputField {
/**
* The name of the input field/column.
*/
field: string;
/**
* The name of the input dataset.
*/
name: string;
/**
* The namespace of the input dataset.
*/
namespace: string;
[property: string]: any;
}
/**
* The run this event is about.
*

View file

@ -93,6 +93,7 @@ export interface OpenLineageInputDataset {
* Dataset facets containing metadata like schema.
*/
export interface DatasetFacets {
columnLineage?: ColumnLineageFacet;
datasource?: DatasourceFacet;
documentation?: DocumentationFacet;
ownership?: OwnershipFacet;
@ -101,6 +102,65 @@ export interface DatasetFacets {
[property: string]: any;
}
/**
* Column lineage facet describing how output columns are derived from input columns.
*
* Base facet that all facets extend from.
*/
export interface ColumnLineageFacet {
/**
* URI identifying the producer of this metadata.
*/
_producer?: string;
/**
* URI pointing to the schema definition for this facet.
*/
_schemaURL?: string;
/**
* Map of output field names to their lineage information.
*/
fields: { [key: string]: ColumnLineageField };
[property: string]: any;
}
/**
* Column lineage information for a single output field.
*/
export interface ColumnLineageField {
/**
* List of input fields that contribute to this output field.
*/
inputFields: InputField[];
/**
* Human-readable description of the transformation.
*/
transformationDescription?: string;
/**
* Type of transformation (e.g., DIRECT, AGGREGATION).
*/
transformationType?: string;
[property: string]: any;
}
/**
* A reference to an input column in column lineage.
*/
export interface InputField {
/**
* The name of the input field/column.
*/
field: string;
/**
* The name of the input dataset.
*/
name: string;
/**
* The namespace of the input dataset.
*/
namespace: string;
[property: string]: any;
}
/**
* Datasource facet providing connection details for the dataset.
*
@ -347,65 +407,6 @@ export interface OutputDatasetFacets {
[property: string]: any;
}
/**
* Column lineage facet describing how output columns are derived from input columns.
*
* Base facet that all facets extend from.
*/
export interface ColumnLineageFacet {
/**
* URI identifying the producer of this metadata.
*/
_producer?: string;
/**
* URI pointing to the schema definition for this facet.
*/
_schemaURL?: string;
/**
* Map of output field names to their lineage information.
*/
fields: { [key: string]: ColumnLineageField };
[property: string]: any;
}
/**
* Column lineage information for a single output field.
*/
export interface ColumnLineageField {
/**
* List of input fields that contribute to this output field.
*/
inputFields: InputField[];
/**
* Human-readable description of the transformation.
*/
transformationDescription?: string;
/**
* Type of transformation (e.g., DIRECT, AGGREGATION).
*/
transformationType?: string;
[property: string]: any;
}
/**
* A reference to an input column in column lineage.
*/
export interface InputField {
/**
* The name of the input field/column.
*/
field: string;
/**
* The name of the input dataset.
*/
name: string;
/**
* The namespace of the input dataset.
*/
namespace: string;
[property: string]: any;
}
/**
* The run this event is about.
*

View file

@ -117,13 +117,12 @@ export interface PipelineConnection {
*/
export interface ConfigObject {
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
connection?: MetadataDatabaseConnection;
connection?: AirflowConnection;
/**
* Pipeline Service Management/UI URI.
*
@ -454,6 +453,8 @@ export interface FluffyAuthentication {
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*
* AWS credentials configuration.
@ -689,9 +690,10 @@ export interface AzureCredentials {
}
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -705,15 +707,28 @@ export interface AzureCredentials {
*
* Matillion ETL Auth Config.
*/
export interface MetadataDatabaseConnection {
export interface AirflowConnection {
/**
* Regex exclude pipelines.
* Airflow REST API version.
*/
pipelineFilterPattern?: FilterPattern;
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Service Type
*/
type?: Type;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean;
/**
* Regex exclude pipelines.
*/
pipelineFilterPattern?: FilterPattern;
/**
* Choose Auth Config Type.
*/
@ -827,6 +842,188 @@ export interface MetadataDatabaseConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GCPCredentials;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configuration.
*
* GCP credentials configs.
*/
export interface GCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*
* Pass the raw credential values provided by GCP
*
* Pass the path of file containing the GCP credentials info
*
* Use the application default credentials
*/
export interface GCPCredentialsConfiguration {
/**
* Google Cloud auth provider certificate.
*/
authProviderX509CertUrl?: string;
/**
* Google Cloud auth uri.
*/
authUri?: string;
/**
* Google Cloud email.
*/
clientEmail?: string;
/**
* Google Cloud Client ID.
*/
clientId?: string;
/**
* Google Cloud client certificate uri.
*/
clientX509CertUrl?: string;
/**
* Google Cloud private key.
*/
privateKey?: string;
/**
* Google Cloud private key id.
*/
privateKeyId?: string;
/**
* Project ID
*
* GCP Project ID to parse metadata from
*/
projectId?: string[] | string;
/**
* Google Cloud token uri.
*/
tokenUri?: string;
/**
* Google Cloud Platform account type.
*
* Google Cloud Platform ADC ( Application Default Credentials )
*/
type?: string;
/**
* Path of the file containing the GCP credentials info
*/
path?: string;
/**
* Google Security Token Service audience which contains the resource name for the workload
* identity pool and the provider identifier in that pool.
*/
audience?: string;
/**
* This object defines the mechanism used to retrieve the external credential from the local
* environment so that it can be exchanged for a GCP access token via the STS endpoint
*/
credentialSource?: { [key: string]: string };
/**
* Google Cloud Platform account type.
*/
externalType?: string;
/**
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
* spec.
*/
subjectTokenType?: string;
/**
* Google Security Token Service token exchange endpoint.
*/
tokenURL?: string;
[property: string]: any;
}
/**
* we enable the authenticated service account to impersonate another service account
*
* Pass the values to impersonate a service account of Google Cloud
*/
export interface GCPImpersonateServiceAccountValues {
/**
* The impersonated service account email
*/
impersonateServiceAccount?: string;
/**
* Number of seconds the delegated credential should be valid
*/
lifetime?: number;
[property: string]: any;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -908,6 +1105,8 @@ export interface DataStorageConfig {
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*
* AWS credentials configuration.
@ -993,6 +1192,7 @@ export enum Type {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
SQLite = "SQLite",
}

View file

@ -2235,6 +2235,8 @@ export interface DBTPrefixConfig {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -2249,6 +2251,8 @@ export interface DBTPrefixConfig {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface Credentials {
@ -3549,9 +3553,8 @@ export interface ConfigObject {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
@ -3772,7 +3775,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -5001,6 +5004,8 @@ export enum AuthProvider {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -5183,6 +5188,8 @@ export interface AuthenticationTypeForTableau {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -5631,6 +5638,8 @@ export interface IcebergFileSystem {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -5916,9 +5925,10 @@ export interface ConfigSourceConnection {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -5965,8 +5975,11 @@ export interface ConfigConnection {
* Username to connect to the Matillion. This user should have privileges to read all the
* metadata in Matillion.
*/
username?: string;
verifySSL?: VerifySSL;
username?: string;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -6086,6 +6099,15 @@ export interface ConfigConnection {
* <USERNAME> <PASSWORD>`
*/
userKey?: string;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -6097,6 +6119,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -6162,6 +6270,8 @@ export interface DataStorageConfig {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -6290,6 +6400,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -6314,6 +6425,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -6322,7 +6435,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -6491,27 +6604,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -412,7 +412,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -1079,9 +1079,8 @@ export interface ConfigObject {
*
* Choose between API or database connection fetch metadata from superset.
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*
@ -2389,6 +2388,8 @@ export enum AuthProvider {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -2571,6 +2572,8 @@ export interface AuthenticationType {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3019,6 +3022,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3292,12 +3297,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -3507,9 +3516,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Mysql Database Connection Config
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -3620,7 +3630,10 @@ export interface ConfigConnection {
* SSL Configuration details.
*/
sslConfig?: ConnectionSSLConfig;
verifySSL?: VerifySSL;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -3679,6 +3692,15 @@ export interface ConfigConnection {
* Use slow logs to extract lineage.
*/
useSlowLogs?: boolean;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -3690,6 +3712,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -3755,6 +3863,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -3883,6 +3993,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -3907,6 +4018,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -3915,7 +4028,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4084,27 +4197,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -1065,7 +1065,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -1732,9 +1732,8 @@ export interface ConfigObject {
*
* Choose between API or database connection fetch metadata from superset.
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*
@ -2940,6 +2939,8 @@ export enum AuthMechanismEnum {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -3122,6 +3123,8 @@ export interface AuthenticationType {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3531,6 +3534,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3804,12 +3809,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -4019,9 +4028,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Mysql Database Connection Config
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -4132,7 +4142,10 @@ export interface ConfigConnection {
* SSL Configuration details.
*/
sslConfig?: ConnectionSSLConfig;
verifySSL?: VerifySSL;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -4191,6 +4204,15 @@ export interface ConfigConnection {
* Use slow logs to extract lineage.
*/
useSlowLogs?: boolean;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -4202,6 +4224,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -4267,6 +4375,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -4395,6 +4505,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -4406,6 +4517,8 @@ export enum ConnectionType {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -4414,7 +4527,7 @@ export enum ConnectionType {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4583,27 +4696,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP Credentials
*
* GCP credentials configs.
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -15,11 +15,9 @@
*/
export interface AirflowConnection {
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from Airflow.
*/
connection: MetadataDatabaseConnection;
connection: AirflowConnectionClass;
/**
* Pipeline Service Management/UI URI.
*/
@ -40,9 +38,10 @@ export interface AirflowConnection {
}
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -52,15 +51,28 @@ export interface AirflowConnection {
*
* SQLite Database Connection Config
*/
export interface MetadataDatabaseConnection {
export interface AirflowConnectionClass {
/**
* Regex exclude pipelines.
* Airflow REST API version.
*/
pipelineFilterPattern?: FilterPattern;
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Service Type
*/
type?: Type;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean;
/**
* Regex exclude pipelines.
*/
pipelineFilterPattern?: FilterPattern;
/**
* Choose Auth Config Type.
*/
@ -168,24 +180,190 @@ export interface MetadataDatabaseConnection {
}
/**
* Choose Auth Config Type.
* Airflow REST API version.
*
* Common Database Connection Config
*
* IAM Auth Database Connection Config
*
* Azure Database Connection Config
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export interface AuthConfigurationType {
/**
* Password to connect to source.
*/
password?: string;
awsConfig?: AWSCredentials;
azureConfig?: AzureCredentials;
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GCPCredentials;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configuration.
*
* GCP credentials configs.
*/
export interface GCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*
* Pass the raw credential values provided by GCP
*
* Pass the path of file containing the GCP credentials info
*
* Use the application default credentials
*/
export interface GCPCredentialsConfiguration {
/**
* Google Cloud auth provider certificate.
*/
authProviderX509CertUrl?: string;
/**
* Google Cloud auth uri.
*/
authUri?: string;
/**
* Google Cloud email.
*/
clientEmail?: string;
/**
* Google Cloud Client ID.
*/
clientId?: string;
/**
* Google Cloud client certificate uri.
*/
clientX509CertUrl?: string;
/**
* Google Cloud private key.
*/
privateKey?: string;
/**
* Google Cloud private key id.
*/
privateKeyId?: string;
/**
* Project ID
*
* GCP Project ID to parse metadata from
*/
projectId?: string[] | string;
/**
* Google Cloud token uri.
*/
tokenUri?: string;
/**
* Google Cloud Platform account type.
*
* Google Cloud Platform ADC ( Application Default Credentials )
*/
type?: string;
/**
* Path of the file containing the GCP credentials info
*/
path?: string;
/**
* Google Security Token Service audience which contains the resource name for the workload
* identity pool and the provider identifier in that pool.
*/
audience?: string;
/**
* This object defines the mechanism used to retrieve the external credential from the local
* environment so that it can be exchanged for a GCP access token via the STS endpoint
*/
credentialSource?: { [key: string]: string };
/**
* Google Cloud Platform account type.
*/
externalType?: string;
/**
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
* spec.
*/
subjectTokenType?: string;
/**
* Google Security Token Service token exchange endpoint.
*/
tokenURL?: string;
[property: string]: any;
}
/**
* we enable the authenticated service account to impersonate another service account
*
* Pass the values to impersonate a service account of Google Cloud
*/
export interface GCPImpersonateServiceAccountValues {
/**
* The impersonated service account email
*/
impersonateServiceAccount?: string;
/**
* Number of seconds the delegated credential should be valid
*/
lifetime?: number;
[property: string]: any;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*/
export interface AWSCredentials {
@ -237,6 +415,24 @@ export interface AWSCredentials {
profileName?: string;
}
/**
* Choose Auth Config Type.
*
* Common Database Connection Config
*
* IAM Auth Database Connection Config
*
* Azure Database Connection Config
*/
export interface AuthConfigurationType {
/**
* Password to connect to source.
*/
password?: string;
awsConfig?: AWSCredentials;
azureConfig?: AzureCredentials;
}
/**
* Azure Cloud Credentials
*/
@ -324,6 +520,8 @@ export interface DataStorageConfig {
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*/
export interface AwsCredentials {
@ -428,6 +626,7 @@ export enum Type {
Backend = "Backend",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
SQLite = "SQLite",
}

View file

@ -837,9 +837,8 @@ export interface ConfigObject {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
@ -1060,7 +1059,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -2372,6 +2371,8 @@ export enum AuthProvider {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -2554,6 +2555,8 @@ export interface AuthenticationTypeForTableau {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3002,6 +3005,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3275,12 +3280,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -3492,9 +3501,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -3541,8 +3551,11 @@ export interface ConfigConnection {
* Username to connect to the Matillion. This user should have privileges to read all the
* metadata in Matillion.
*/
username?: string;
verifySSL?: VerifySSL;
username?: string;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -3662,6 +3675,15 @@ export interface ConfigConnection {
* <USERNAME> <PASSWORD>`
*/
userKey?: string;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -3673,6 +3695,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -3738,6 +3846,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -3866,6 +3976,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -3890,6 +4001,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -3898,7 +4011,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4067,27 +4180,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -2818,6 +2818,8 @@ export interface DBTPrefixConfig {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -2832,6 +2834,8 @@ export interface DBTPrefixConfig {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface Credentials {
@ -4132,9 +4136,8 @@ export interface ConfigObject {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
@ -4355,7 +4358,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -5565,6 +5568,8 @@ export enum AuthMechanismEnum {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -5747,6 +5752,8 @@ export interface AuthenticationTypeForTableau {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -6156,6 +6163,8 @@ export interface IcebergFileSystem {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -6441,9 +6450,10 @@ export interface ConfigSourceConnection {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -6490,8 +6500,11 @@ export interface ConfigConnection {
* Username to connect to the Matillion. This user should have privileges to read all the
* metadata in Matillion.
*/
username?: string;
verifySSL?: VerifySSL;
username?: string;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -6611,6 +6624,15 @@ export interface ConfigConnection {
* <USERNAME> <PASSWORD>`
*/
userKey?: string;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -6622,6 +6644,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -6687,6 +6795,8 @@ export interface DataStorageConfig {
*
* AWS credentials required to access the S3 file.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -6815,6 +6925,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -6826,6 +6937,8 @@ export enum ConnectionType {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -6834,7 +6947,7 @@ export enum ConnectionType {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -7003,27 +7116,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -235,13 +235,12 @@ export interface PipelineConnection {
*/
export interface ConfigObject {
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
connection?: MetadataDatabaseConnection;
connection?: AirflowConnection;
/**
* Pipeline Service Management/UI URI.
*
@ -572,6 +571,8 @@ export interface FluffyAuthentication {
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*
* AWS credentials configuration.
@ -807,9 +808,10 @@ export interface AzureCredentials {
}
/**
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -823,15 +825,28 @@ export interface AzureCredentials {
*
* Matillion ETL Auth Config.
*/
export interface MetadataDatabaseConnection {
export interface AirflowConnection {
/**
* Regex exclude pipelines.
* Airflow REST API version.
*/
pipelineFilterPattern?: FilterPattern;
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Service Type
*/
type?: Type;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean;
/**
* Regex exclude pipelines.
*/
pipelineFilterPattern?: FilterPattern;
/**
* Choose Auth Config Type.
*/
@ -945,6 +960,188 @@ export interface MetadataDatabaseConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GCPCredentials;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configuration.
*
* GCP credentials configs.
*/
export interface GCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*
* Pass the raw credential values provided by GCP
*
* Pass the path of file containing the GCP credentials info
*
* Use the application default credentials
*/
export interface GCPCredentialsConfiguration {
/**
* Google Cloud auth provider certificate.
*/
authProviderX509CertUrl?: string;
/**
* Google Cloud auth uri.
*/
authUri?: string;
/**
* Google Cloud email.
*/
clientEmail?: string;
/**
* Google Cloud Client ID.
*/
clientId?: string;
/**
* Google Cloud client certificate uri.
*/
clientX509CertUrl?: string;
/**
* Google Cloud private key.
*/
privateKey?: string;
/**
* Google Cloud private key id.
*/
privateKeyId?: string;
/**
* Project ID
*
* GCP Project ID to parse metadata from
*/
projectId?: string[] | string;
/**
* Google Cloud token uri.
*/
tokenUri?: string;
/**
* Google Cloud Platform account type.
*
* Google Cloud Platform ADC ( Application Default Credentials )
*/
type?: string;
/**
* Path of the file containing the GCP credentials info
*/
path?: string;
/**
* Google Security Token Service audience which contains the resource name for the workload
* identity pool and the provider identifier in that pool.
*/
audience?: string;
/**
* This object defines the mechanism used to retrieve the external credential from the local
* environment so that it can be exchanged for a GCP access token via the STS endpoint
*/
credentialSource?: { [key: string]: string };
/**
* Google Cloud Platform account type.
*/
externalType?: string;
/**
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
* spec.
*/
subjectTokenType?: string;
/**
* Google Security Token Service token exchange endpoint.
*/
tokenURL?: string;
[property: string]: any;
}
/**
* we enable the authenticated service account to impersonate another service account
*
* Pass the values to impersonate a service account of Google Cloud
*/
export interface GCPImpersonateServiceAccountValues {
/**
* The impersonated service account email
*/
impersonateServiceAccount?: string;
/**
* Number of seconds the delegated credential should be valid
*/
lifetime?: number;
[property: string]: any;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -1026,6 +1223,8 @@ export interface DataStorageConfig {
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*
* AWS credentials configuration.
@ -1111,6 +1310,7 @@ export enum Type {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
SQLite = "SQLite",
}

View file

@ -0,0 +1,277 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Airflow REST API Connection Config for connecting via REST API.
*/
export interface AirflowRESTAPIConnection {
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig: AuthenticationConfiguration;
/**
* Service Type
*/
type?: ServiceType;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GCPCredentials;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configuration.
*
* GCP credentials configs.
*/
export interface GCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*
* Pass the raw credential values provided by GCP
*
* Pass the path of file containing the GCP credentials info
*
* Use the application default credentials
*/
export interface GCPCredentialsConfiguration {
/**
* Google Cloud auth provider certificate.
*/
authProviderX509CertUrl?: string;
/**
* Google Cloud auth uri.
*/
authUri?: string;
/**
* Google Cloud email.
*/
clientEmail?: string;
/**
* Google Cloud Client ID.
*/
clientId?: string;
/**
* Google Cloud client certificate uri.
*/
clientX509CertUrl?: string;
/**
* Google Cloud private key.
*/
privateKey?: string;
/**
* Google Cloud private key id.
*/
privateKeyId?: string;
/**
* Project ID
*
* GCP Project ID to parse metadata from
*/
projectId?: string[] | string;
/**
* Google Cloud token uri.
*/
tokenUri?: string;
/**
* Google Cloud Platform account type.
*
* Google Cloud Platform ADC ( Application Default Credentials )
*/
type?: string;
/**
* Path of the file containing the GCP credentials info
*/
path?: string;
/**
* Google Security Token Service audience which contains the resource name for the workload
* identity pool and the provider identifier in that pool.
*/
audience?: string;
/**
* This object defines the mechanism used to retrieve the external credential from the local
* environment so that it can be exchanged for a GCP access token via the STS endpoint
*/
credentialSource?: { [key: string]: string };
/**
* Google Cloud Platform account type.
*/
externalType?: string;
/**
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
* spec.
*/
subjectTokenType?: string;
/**
* Google Security Token Service token exchange endpoint.
*/
tokenURL?: string;
[property: string]: any;
}
/**
* we enable the authenticated service account to impersonate another service account
*
* Pass the values to impersonate a service account of Google Cloud
*/
export interface GCPImpersonateServiceAccountValues {
/**
* The impersonated service account email
*/
impersonateServiceAccount?: string;
/**
* Number of seconds the delegated credential should be valid
*/
lifetime?: number;
[property: string]: any;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*/
export interface AWSCredentials {
/**
* The Amazon Resource Name (ARN) of the role to assume. Required Field in case of Assume
* Role
*/
assumeRoleArn?: string;
/**
* An identifier for the assumed role session. Use the role session name to uniquely
* identify a session when the same role is assumed by different principals or for different
* reasons. Required Field in case of Assume Role
*/
assumeRoleSessionName?: string;
/**
* The Amazon Resource Name (ARN) of the role to assume. Optional Field in case of Assume
* Role
*/
assumeRoleSourceIdentity?: string;
/**
* AWS Access key ID.
*/
awsAccessKeyId?: string;
/**
* AWS Region
*/
awsRegion: string;
/**
* AWS Secret Access Key.
*/
awsSecretAccessKey?: string;
/**
* AWS Session Token.
*/
awsSessionToken?: string;
/**
* Enable AWS IAM authentication. When enabled, uses the default credential provider chain
* (environment variables, instance profile, etc.). Defaults to false for backward
* compatibility.
*/
enabled?: boolean;
/**
* EndPoint URL for the AWS
*/
endPointURL?: string;
/**
* The name of a profile to use with the boto session.
*/
profileName?: string;
}
/**
* Service Type
*/
export enum ServiceType {
RESTAPI = "RestAPI",
}

View file

@ -0,0 +1,21 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Static access token for Airflow API authentication.
*/
export interface AccessTokenConfig {
/**
* Static access token for Airflow API authentication.
*/
token: string;
}

View file

@ -0,0 +1,25 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Username and password for Airflow API authentication.
*/
export interface BasicAuthConfig {
/**
* Password for basic authentication to the Airflow API.
*/
password: string;
/**
* Username for basic authentication to the Airflow API.
*/
username: string;
}

View file

@ -0,0 +1,141 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at runtime.
*/
export interface GcpCredentialsConfig {
/**
* GCP credentials configuration.
*/
credentials: GCPCredentials;
}
/**
* GCP credentials configuration.
*
* GCP credentials configs.
*/
export interface GCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*
* Pass the raw credential values provided by GCP
*
* Pass the path of file containing the GCP credentials info
*
* Use the application default credentials
*/
export interface GCPCredentialsConfiguration {
/**
* Google Cloud auth provider certificate.
*/
authProviderX509CertUrl?: string;
/**
* Google Cloud auth uri.
*/
authUri?: string;
/**
* Google Cloud email.
*/
clientEmail?: string;
/**
* Google Cloud Client ID.
*/
clientId?: string;
/**
* Google Cloud client certificate uri.
*/
clientX509CertUrl?: string;
/**
* Google Cloud private key.
*/
privateKey?: string;
/**
* Google Cloud private key id.
*/
privateKeyId?: string;
/**
* Project ID
*
* GCP Project ID to parse metadata from
*/
projectId?: string[] | string;
/**
* Google Cloud token uri.
*/
tokenUri?: string;
/**
* Google Cloud Platform account type.
*
* Google Cloud Platform ADC ( Application Default Credentials )
*/
type?: string;
/**
* Path of the file containing the GCP credentials info
*/
path?: string;
/**
* Google Security Token Service audience which contains the resource name for the workload
* identity pool and the provider identifier in that pool.
*/
audience?: string;
/**
* This object defines the mechanism used to retrieve the external credential from the local
* environment so that it can be exchanged for a GCP access token via the STS endpoint
*/
credentialSource?: { [key: string]: string };
/**
* Google Cloud Platform account type.
*/
externalType?: string;
/**
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
* spec.
*/
subjectTokenType?: string;
/**
* Google Security Token Service token exchange endpoint.
*/
tokenURL?: string;
[property: string]: any;
}
/**
* we enable the authenticated service account to impersonate another service account
*
* Pass the values to impersonate a service account of Google Cloud
*/
export interface GCPImpersonateServiceAccountValues {
/**
* The impersonated service account email
*/
impersonateServiceAccount?: string;
/**
* Number of seconds the delegated credential should be valid
*/
lifetime?: number;
[property: string]: any;
}

View file

@ -0,0 +1,89 @@
/*
* Copyright 2026 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface MwaaAuthConfig {
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig: MWAAConfiguration;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configs.
*/
export interface AWSCredentials {
/**
* The Amazon Resource Name (ARN) of the role to assume. Required Field in case of Assume
* Role
*/
assumeRoleArn?: string;
/**
* An identifier for the assumed role session. Use the role session name to uniquely
* identify a session when the same role is assumed by different principals or for different
* reasons. Required Field in case of Assume Role
*/
assumeRoleSessionName?: string;
/**
* The Amazon Resource Name (ARN) of the role to assume. Optional Field in case of Assume
* Role
*/
assumeRoleSourceIdentity?: string;
/**
* AWS Access key ID.
*/
awsAccessKeyId?: string;
/**
* AWS Region
*/
awsRegion: string;
/**
* AWS Secret Access Key.
*/
awsSecretAccessKey?: string;
/**
* AWS Session Token.
*/
awsSessionToken?: string;
/**
* Enable AWS IAM authentication. When enabled, uses the default credential provider chain
* (environment variables, instance profile, etc.). Defaults to false for backward
* compatibility.
*/
enabled?: boolean;
/**
* EndPoint URL for the AWS
*/
endPointURL?: string;
/**
* The name of a profile to use with the boto session.
*/
profileName?: string;
}

View file

@ -881,9 +881,8 @@ export interface ConfigObject {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
@ -1104,7 +1103,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -2416,6 +2415,8 @@ export enum AuthProvider {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -2598,6 +2599,8 @@ export interface AuthenticationTypeForTableau {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3046,6 +3049,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3319,12 +3324,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -3536,9 +3545,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -3585,8 +3595,11 @@ export interface ConfigConnection {
* Username to connect to the Matillion. This user should have privileges to read all the
* metadata in Matillion.
*/
username?: string;
verifySSL?: VerifySSL;
username?: string;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -3706,6 +3719,15 @@ export interface ConfigConnection {
* <USERNAME> <PASSWORD>`
*/
userKey?: string;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -3717,6 +3739,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -3782,6 +3890,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -3910,6 +4020,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -3934,6 +4045,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -3942,7 +4055,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4111,27 +4224,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.

View file

@ -926,9 +926,8 @@ export interface ConfigObject {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Matillion Auth Configuration
*/
@ -1149,7 +1148,7 @@ export interface ConfigObject {
*
* GCP Credentials for Google Drive API
*/
credentials?: CredentialsClass;
credentials?: PurpleGCPCredentials;
/**
* Regex to only include/exclude databases that matches the pattern.
*
@ -2474,6 +2473,8 @@ export enum AuthProvider {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Authentication type to connect to Apache Ranger.
@ -2656,6 +2657,8 @@ export interface AuthenticationTypeForTableau {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AWSCredentials {
@ -3104,6 +3107,8 @@ export interface IcebergFileSystem {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*
* Azure Cloud Credentials
@ -3377,12 +3382,16 @@ export interface ConfigSourceConnection {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* AWS credentials required to access the S3 file.
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface Credentials {
@ -3594,9 +3603,10 @@ export interface GCPImpersonateServiceAccountValues {
*
* Choose between mysql and postgres connection for alation database
*
* Underlying database connection. See
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
* supported backends.
* Choose between database connection or REST API connection to fetch metadata from
* Airflow.
*
* Airflow REST API Connection Config for connecting via REST API.
*
* Lineage Backend Connection Config
*
@ -3643,8 +3653,11 @@ export interface ConfigConnection {
* Username to connect to the Matillion. This user should have privileges to read all the
* metadata in Matillion.
*/
username?: string;
verifySSL?: VerifySSL;
username?: string;
/**
* Whether to verify SSL certificates when connecting to the Airflow API.
*/
verifySSL?: boolean | VerifySSL;
/**
* Choose Auth Config Type.
*/
@ -3764,6 +3777,15 @@ export interface ConfigConnection {
* <USERNAME> <PASSWORD>`
*/
userKey?: string;
/**
* Airflow REST API version.
*/
apiVersion?: APIVersion;
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*/
authConfig?: AuthenticationConfiguration;
/**
* Regex exclude pipelines.
*/
@ -3775,6 +3797,92 @@ export interface ConfigConnection {
supportsViewLineageExtraction?: boolean;
}
/**
* Airflow REST API version.
*
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
* the version automatically.
*/
export enum APIVersion {
Auto = "auto",
V1 = "v1",
V2 = "v2",
}
/**
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
*
* Username and password for Airflow API authentication.
*
* Static access token for Airflow API authentication.
*
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
* runtime.
*
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
*/
export interface AuthenticationConfiguration {
/**
* Password for basic authentication to the Airflow API.
*/
password?: string;
/**
* Username for basic authentication to the Airflow API.
*/
username?: string;
/**
* Static access token for Airflow API authentication.
*/
token?: string;
/**
* GCP credentials configuration.
*/
credentials?: GcpConfigClass;
/**
* MWAA credentials and environment configuration.
*/
mwaaConfig?: MWAAConfiguration;
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* MWAA credentials and environment configuration.
*/
export interface MWAAConfiguration {
/**
* AWS credentials for generating MWAA CLI token.
*/
awsConfig: AWSCredentials;
/**
* The name of your MWAA environment.
*/
mwaaEnvironmentName: string;
}
/**
* Choose Auth Config Type.
*
@ -3840,6 +3948,8 @@ export interface DataStorageConfig {
*
* AWS credentials configs.
*
* AWS credentials for generating MWAA CLI token.
*
* AWS credentials configuration.
*/
export interface AwsCredentials {
@ -3968,6 +4078,7 @@ export enum ConnectionType {
MatillionETL = "MatillionETL",
Mysql = "Mysql",
Postgres = "Postgres",
RESTAPI = "RestAPI",
S3 = "S3",
SQLite = "SQLite",
}
@ -3992,6 +4103,8 @@ export enum VerifySSL {
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP credentials configuration.
*
* GCP Credentials for Google Drive API
*
* Azure Cloud Credentials
@ -4000,7 +4113,7 @@ export enum VerifySSL {
*
* Azure Credentials
*/
export interface CredentialsClass {
export interface PurpleGCPCredentials {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
@ -4169,27 +4282,6 @@ export enum FHIRVersion {
Stu3 = "STU3",
}
/**
* GCP credentials configs.
*
* GCP Credentials
*
* GCP credentials configuration for authenticating with Pub/Sub.
*
* GCP Credentials for Google Drive API
*/
export interface GcpConfigClass {
/**
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
* Credentials Path
*/
gcpConfig: GCPCredentialsConfiguration;
/**
* we enable the authenticated service account to impersonate another service account
*/
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
}
/**
* Do not set any credentials. Note that credentials are required to extract .lkml views and
* their lineage.