mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
Airflow 3.x API based connector (#26624)
* Add Airflow Connector with API integration * Add Airflow Connector with API integration * Update generated TypeScript types * Add Airflow Connector with API integration improvements * fix: username password flow for airflow 3, example yaml file, & sidebar docs * fix type in UI * Fix integration tests, fixed UI rendering and docs, improved OpenLineageResolver * Fix pytests * move connector * Update generated TypeScript types * fix: response parsing for astronomer airflow * feat: added service account auth for airflow rest connection when composer managed airflow along with token * fix: airflow rest api connection class converter and airflow.md * feat: add mwaa config support for authentication * s3 & column lineage * Update generated TypeScript types * fix: test airflow mwaa client * fix: removed unused method, and extra code for parsing response * fix: git pr checks * fix: removed airflowapi integration tests that requires real host instance and added test with mocking * fix test * improve test coverage * push coverage * fix: gitar comments * fix: removed redundant files --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Keshav Mohta <68001229+keshavmohta09@users.noreply.github.com> Co-authored-by: Keshav Mohta <keshavmohta09@gmail.com> Co-authored-by: ulixius9 <mayursingal9@gmail.com>
This commit is contained in:
parent
f3bbfc7b75
commit
b7797fe3ef
55 changed files with 9417 additions and 459 deletions
|
|
@ -512,6 +512,9 @@ services:
|
|||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__LOGGING__LOGGING_LEVEL: ${AIRFLOW_LOGGING_LEVEL:-DEBUG}
|
||||
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
|
||||
# OpenLineage transport config (optional - enable for lineage via OL)
|
||||
# AIRFLOW__OPENLINEAGE__TRANSPORT: '{"type": "http", "url": "http://openmetadata-server:8585/api/v1/openlineage/", "endpoint": "lineage", "auth": {"type": "api_key", "api_key": "<OM_JWT_TOKEN>"}}'
|
||||
# AIRFLOW__OPENLINEAGE__NAMESPACE: local_airflow
|
||||
DB_HOST: ${AIRFLOW_DB_HOST:-mysql}
|
||||
DB_PORT: ${AIRFLOW_DB_PORT:-3306}
|
||||
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ class AWSServices(Enum):
|
|||
REDSHIFT = "redshift"
|
||||
REDSHIFT_SERVERLESS = "redshift-serverless"
|
||||
LAKE_FORMATION = "lakeformation"
|
||||
MWAA = "mwaa"
|
||||
|
||||
|
||||
def _get_valid_aws_regions() -> set:
|
||||
|
|
@ -277,3 +278,6 @@ class AWSClient:
|
|||
|
||||
def get_redshift_serverless_client(self):
|
||||
return self.get_client(AWSServices.REDSHIFT_SERVERLESS.value)
|
||||
|
||||
def get_mwaa_client(self):
|
||||
return self.get_client(AWSServices.MWAA.value)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Auth helper functions for the Airflow REST API client.
|
||||
"""
|
||||
import base64
|
||||
import traceback
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Callable, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from metadata.utils.credentials import (
|
||||
get_gcp_impersonate_credentials,
|
||||
set_google_credentials,
|
||||
)
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
TokenCallback = Callable[[], Tuple[str, object]]
|
||||
|
||||
_JWT_REFRESH_INTERVAL_SECONDS = (
|
||||
25 * 60
|
||||
) # re-fetch every 25 min, well within Airflow's ~30-60 min TTL
|
||||
_BASIC_AUTH_TTL_SECONDS = (
|
||||
7 * 24 * 3600
|
||||
) # basic auth doesn't expire; skip retry for 7 days
|
||||
|
||||
|
||||
def try_exchange_jwt(
|
||||
host: str, username: str, password: str, verify: bool
|
||||
) -> Optional[str]:
|
||||
"""POST {host}/auth/token to get a JWT Bearer token (Airflow 3.x). Returns None on failure."""
|
||||
try:
|
||||
resp = requests.post(
|
||||
f"{host}/auth/token",
|
||||
json={"username": username, "password": password},
|
||||
timeout=10,
|
||||
verify=verify,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("access_token")
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"JWT token exchange failed (likely Airflow 2.x): %s", traceback.format_exc()
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def build_access_token_callback(token: str) -> TokenCallback:
|
||||
"""Returns a static token callback with no expiry."""
|
||||
return lambda: (token, 0)
|
||||
|
||||
|
||||
def build_basic_auth_callback(
|
||||
host: str, username: str, password: str, verify: bool
|
||||
) -> Tuple[TokenCallback, None]:
|
||||
"""
|
||||
Returns (callback, None). auth_token_mode=None means client.py uses the
|
||||
token value as-is; the callback embeds 'Bearer' or 'Basic' prefix itself.
|
||||
|
||||
On every refresh cycle the callback re-calls try_exchange_jwt so the JWT
|
||||
is always freshly issued — no stale-token 401s for long-running ingestions.
|
||||
Falls back to Basic auth for Airflow 2.x servers.
|
||||
"""
|
||||
|
||||
def _callback() -> Tuple[str, object]:
|
||||
jwt = try_exchange_jwt(host, username, password, verify)
|
||||
if jwt:
|
||||
return f"Bearer {jwt}", _JWT_REFRESH_INTERVAL_SECONDS
|
||||
b64 = base64.b64encode(f"{username}:{password}".encode()).decode()
|
||||
return f"Basic {b64}", _BASIC_AUTH_TTL_SECONDS
|
||||
|
||||
return _callback, None
|
||||
|
||||
|
||||
def build_gcp_token_callback(gcp_credentials) -> TokenCallback:
|
||||
"""
|
||||
Returns a token callback that fetches and auto-refreshes GCP OAuth2 tokens.
|
||||
|
||||
Supports all 4 GCP credential types via set_google_credentials():
|
||||
- GcpCredentialsValues: service account JSON values (clientEmail, privateKey, etc.)
|
||||
- GcpCredentialsPath: path to a credentials JSON file
|
||||
- GcpExternalAccount: workload identity federation
|
||||
- GcpADC: application default credentials
|
||||
|
||||
Also handles optional service account impersonation via gcpImpersonateServiceAccount.
|
||||
"""
|
||||
set_google_credentials(gcp_credentials)
|
||||
impersonate = gcp_credentials.gcpImpersonateServiceAccount
|
||||
|
||||
def _callback() -> Tuple[str, datetime]:
|
||||
import google.auth
|
||||
from google.auth.transport.requests import Request as AuthRequest
|
||||
|
||||
if impersonate and impersonate.impersonateServiceAccount:
|
||||
credentials = get_gcp_impersonate_credentials(
|
||||
impersonate_service_account=impersonate.impersonateServiceAccount,
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||
lifetime=impersonate.lifetime,
|
||||
)
|
||||
else:
|
||||
credentials, _ = google.auth.default(
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
||||
)
|
||||
|
||||
credentials.refresh(AuthRequest())
|
||||
expiry = getattr(credentials, "expiry", None) or (
|
||||
datetime.now(timezone.utc) + timedelta(minutes=55)
|
||||
)
|
||||
return (credentials.token, expiry)
|
||||
|
||||
return _callback
|
||||
|
|
@ -0,0 +1,345 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Client to interact with the Airflow REST API
|
||||
"""
|
||||
import traceback
|
||||
from typing import List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
from requests.exceptions import ConnectionError as RequestsConnectionError
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
||||
AirflowConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
|
||||
from metadata.generated.schema.entity.utils.common.basicAuthConfig import BasicAuth
|
||||
from metadata.generated.schema.entity.utils.common.gcpCredentialsConfig import (
|
||||
GcpServiceAccount,
|
||||
)
|
||||
from metadata.generated.schema.entity.utils.common.mwaaAuthConfig import (
|
||||
MwaaAuthentication,
|
||||
)
|
||||
from metadata.ingestion.connections.source_api_client import TrackedREST
|
||||
from metadata.ingestion.ometa.client import ClientConfig
|
||||
from metadata.ingestion.source.pipeline.airflow.api.auth import (
|
||||
build_access_token_callback,
|
||||
build_basic_auth_callback,
|
||||
build_gcp_token_callback,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.models import (
|
||||
AirflowApiDagDetails,
|
||||
AirflowApiDagRun,
|
||||
AirflowApiTask,
|
||||
AirflowApiTaskInstance,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.mwaa import MWAAClient
|
||||
from metadata.utils.helpers import clean_uri
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class AirflowApiClient:
|
||||
"""
|
||||
Client to interact with the Airflow REST API (v1 for Airflow 2.x, v2 for Airflow 3.x)
|
||||
"""
|
||||
|
||||
def __init__(self, config: AirflowConnection):
|
||||
self.config = config
|
||||
self._detected_version: Optional[str] = None
|
||||
|
||||
rest_config = config.connection
|
||||
auth_config = rest_config.authConfig
|
||||
|
||||
# Check if this is MWAA (AWS credentials)
|
||||
if isinstance(auth_config, MwaaAuthentication):
|
||||
# Use MWAA client for AWS managed Airflow
|
||||
environment_name = auth_config.mwaaConfig.mwaaEnvironmentName
|
||||
self.mwaa_client = MWAAClient(
|
||||
auth_config.mwaaConfig.awsConfig, environment_name
|
||||
)
|
||||
self.client = None # No need for TrackedREST client with MWAA
|
||||
else:
|
||||
# Use standard REST client for other authentication types
|
||||
self.mwaa_client = None
|
||||
auth_token_mode = "Bearer"
|
||||
|
||||
if isinstance(auth_config, AccessToken):
|
||||
auth_token_fn = build_access_token_callback(
|
||||
auth_config.token.get_secret_value()
|
||||
)
|
||||
elif isinstance(auth_config, BasicAuth):
|
||||
auth_token_fn, auth_token_mode = build_basic_auth_callback(
|
||||
host=clean_uri(str(config.hostPort)),
|
||||
username=auth_config.username,
|
||||
password=auth_config.password.get_secret_value(),
|
||||
verify=rest_config.verifySSL,
|
||||
)
|
||||
elif isinstance(auth_config, GcpServiceAccount):
|
||||
auth_token_fn = build_gcp_token_callback(auth_config.credentials)
|
||||
else:
|
||||
auth_token_fn = None
|
||||
|
||||
client_config = ClientConfig(
|
||||
base_url=clean_uri(str(config.hostPort)),
|
||||
api_version="api",
|
||||
auth_header="Authorization" if auth_token_fn else None,
|
||||
auth_token=auth_token_fn,
|
||||
auth_token_mode=auth_token_mode,
|
||||
verify=rest_config.verifySSL,
|
||||
)
|
||||
self.client = TrackedREST(client_config, source_name="airflow_api")
|
||||
|
||||
@property
|
||||
def api_version(self) -> str:
|
||||
if self._detected_version:
|
||||
return self._detected_version
|
||||
|
||||
# Use MWAA client - no version detection needed
|
||||
if self.mwaa_client:
|
||||
self._detected_version = "v1" # MWAA handles versioning internally
|
||||
return self._detected_version
|
||||
|
||||
rest_config = self.config.connection
|
||||
configured = (
|
||||
str(rest_config.apiVersion.value) if rest_config.apiVersion else "auto"
|
||||
)
|
||||
if configured != "auto":
|
||||
self._detected_version = configured
|
||||
return self._detected_version
|
||||
|
||||
self._detected_version = self._detect_api_version()
|
||||
return self._detected_version
|
||||
|
||||
def _detect_api_version(self) -> str:
|
||||
for version in ("v2", "v1"):
|
||||
try:
|
||||
self.client.get(f"/{version}/version")
|
||||
return version
|
||||
except HTTPError as exc:
|
||||
if exc.response is not None and exc.response.status_code in (401, 403):
|
||||
raise
|
||||
logger.debug(traceback.format_exc())
|
||||
except (RequestsConnectionError, TimeoutError, OSError):
|
||||
raise
|
||||
except Exception:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning("Could not detect Airflow API version, defaulting to v1")
|
||||
return "v1"
|
||||
|
||||
@property
|
||||
def _prefix(self) -> str:
|
||||
return f"/{self.api_version}"
|
||||
|
||||
@property
|
||||
def _date_field(self) -> str:
|
||||
return "logical_date" if self.api_version == "v2" else "execution_date"
|
||||
|
||||
def _parse_response(self, response):
|
||||
"""Parse response, handling both dict and Response objects"""
|
||||
if hasattr(response, "json"):
|
||||
try:
|
||||
return response.json()
|
||||
except Exception as exc:
|
||||
logger.warning(f"Failed to parse JSON response: {exc}")
|
||||
logger.warning(
|
||||
f"Response content type: {response.headers.get('content-type')}"
|
||||
)
|
||||
logger.debug(f"Response status code: {response.status_code}")
|
||||
logger.debug(f"Response text: {response.text[:500]}")
|
||||
return {}
|
||||
return response
|
||||
|
||||
def get_version(self) -> dict:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_version()
|
||||
|
||||
response = self.client.get(f"{self._prefix}/version")
|
||||
return self._parse_response(response)
|
||||
|
||||
def list_dags(self, limit: int = 100, offset: int = 0) -> dict:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.list_dags(limit=limit, offset=offset)
|
||||
|
||||
response = self.client.get(f"{self._prefix}/dags?limit={limit}&offset={offset}")
|
||||
return self._parse_response(response)
|
||||
|
||||
def get_dag_tasks(self, dag_id: str) -> dict:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_dag_tasks(dag_id)
|
||||
|
||||
response = self.client.get(
|
||||
f"{self._prefix}/dags/{quote(dag_id, safe='')}/tasks"
|
||||
)
|
||||
return self._parse_response(response)
|
||||
|
||||
def list_dag_runs(self, dag_id: str, limit: int = 10) -> dict:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.list_dag_runs(dag_id, limit=limit)
|
||||
|
||||
response = self.client.get(
|
||||
f"{self._prefix}/dags/{quote(dag_id, safe='')}/dagRuns"
|
||||
f"?limit={limit}&order_by=-{self._date_field}"
|
||||
)
|
||||
return self._parse_response(response)
|
||||
|
||||
def get_task_instances(self, dag_id: str, dag_run_id: str) -> dict:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_task_instances(dag_id, dag_run_id)
|
||||
|
||||
response = self.client.get(
|
||||
f"{self._prefix}/dags/{quote(dag_id, safe='')}"
|
||||
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
|
||||
)
|
||||
return self._parse_response(response)
|
||||
|
||||
def _paginate(self, path: str, key: str, limit: int = 100) -> List[dict]:
|
||||
result: List[dict] = []
|
||||
offset = 0
|
||||
total = limit
|
||||
while offset < total:
|
||||
separator = "&" if "?" in path else "?"
|
||||
response = self.client.get(
|
||||
f"{path}{separator}limit={limit}&offset={offset}"
|
||||
)
|
||||
|
||||
response = self._parse_response(response)
|
||||
if not response:
|
||||
break
|
||||
|
||||
page = response.get(key, [])
|
||||
if not page:
|
||||
break
|
||||
result.extend(page)
|
||||
total = response.get("total_entries", len(result))
|
||||
offset += limit
|
||||
return result
|
||||
|
||||
def get_all_dags(self) -> List[dict]:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_all_dags()
|
||||
|
||||
return self._paginate(f"{self._prefix}/dags", key="dags")
|
||||
|
||||
def build_dag_details(self, dag_data: dict) -> AirflowApiDagDetails:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.build_dag_details(dag_data)
|
||||
|
||||
dag_id = dag_data["dag_id"]
|
||||
|
||||
tags_raw = dag_data.get("tags") or []
|
||||
tags = []
|
||||
for tag in tags_raw:
|
||||
if isinstance(tag, dict):
|
||||
name = tag.get("name")
|
||||
elif isinstance(tag, str):
|
||||
name = tag
|
||||
else:
|
||||
continue
|
||||
if name:
|
||||
tags.append(str(name))
|
||||
|
||||
owners = dag_data.get("owners") or []
|
||||
|
||||
if self.api_version == "v2":
|
||||
schedule = dag_data.get("timetable_summary")
|
||||
else:
|
||||
schedule = dag_data.get("schedule_interval")
|
||||
if isinstance(schedule, dict):
|
||||
schedule = schedule.get("value")
|
||||
|
||||
try:
|
||||
task_response = self.get_dag_tasks(dag_id)
|
||||
tasks_data = task_response.get("tasks", [])
|
||||
except Exception as exc:
|
||||
logger.warning(f"Could not fetch tasks for DAG {dag_id}: {exc}")
|
||||
tasks_data = []
|
||||
|
||||
tasks = [
|
||||
AirflowApiTask(
|
||||
task_id=t["task_id"],
|
||||
downstream_task_ids=t.get("downstream_task_ids"),
|
||||
owner=t.get("owner"),
|
||||
doc_md=t.get("doc_md"),
|
||||
start_date=t.get("start_date"),
|
||||
end_date=t.get("end_date"),
|
||||
class_ref=t.get("class_ref"),
|
||||
)
|
||||
for t in tasks_data
|
||||
]
|
||||
|
||||
return AirflowApiDagDetails(
|
||||
dag_id=dag_id,
|
||||
description=dag_data.get("description"),
|
||||
fileloc=dag_data.get("fileloc") or dag_data.get("file_loc"),
|
||||
is_paused=dag_data.get("is_paused"),
|
||||
owners=owners,
|
||||
tags=tags,
|
||||
schedule_interval=schedule,
|
||||
max_active_runs=dag_data.get("max_active_runs"),
|
||||
start_date=dag_data.get("start_date"),
|
||||
tasks=tasks,
|
||||
)
|
||||
|
||||
def get_dag_runs(self, dag_id: str, limit: int = 10) -> List[AirflowApiDagRun]:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_dag_runs(dag_id, limit=limit)
|
||||
|
||||
try:
|
||||
response = self.list_dag_runs(dag_id, limit=limit)
|
||||
runs_data = response.get("dag_runs", [])
|
||||
except Exception as exc:
|
||||
logger.warning(f"Could not fetch dag runs for {dag_id}: {exc}")
|
||||
return []
|
||||
|
||||
result = []
|
||||
for run in runs_data:
|
||||
execution_date = run.get("logical_date") or run.get("execution_date")
|
||||
result.append(
|
||||
AirflowApiDagRun(
|
||||
dag_run_id=run.get("dag_run_id", ""),
|
||||
state=run.get("state"),
|
||||
execution_date=execution_date,
|
||||
start_date=run.get("start_date"),
|
||||
end_date=run.get("end_date"),
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
def get_task_instances_for_run(
|
||||
self, dag_id: str, dag_run_id: str
|
||||
) -> List[AirflowApiTaskInstance]:
|
||||
if self.mwaa_client:
|
||||
return self.mwaa_client.get_task_instances_for_run(dag_id, dag_run_id)
|
||||
|
||||
try:
|
||||
path = (
|
||||
f"{self._prefix}/dags/{quote(dag_id, safe='')}"
|
||||
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
|
||||
)
|
||||
instances_data = self._paginate(path, key="task_instances")
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
f"Could not fetch task instances for {dag_id}/{dag_run_id}: {exc}"
|
||||
)
|
||||
return []
|
||||
|
||||
return [
|
||||
AirflowApiTaskInstance(
|
||||
task_id=ti.get("task_id", ""),
|
||||
state=ti.get("state"),
|
||||
start_date=ti.get("start_date"),
|
||||
end_date=ti.get("end_date"),
|
||||
)
|
||||
for ti in instances_data
|
||||
]
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Pydantic models for Airflow REST API responses
|
||||
"""
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
class AirflowApiTask(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
task_id: str
|
||||
downstream_task_ids: Optional[List[str]] = None
|
||||
owner: Optional[str] = None
|
||||
doc_md: Optional[str] = None
|
||||
start_date: Optional[str] = None
|
||||
end_date: Optional[str] = None
|
||||
class_ref: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
class AirflowApiDagDetails(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
dag_id: str
|
||||
description: Optional[str] = None
|
||||
fileloc: Optional[str] = None
|
||||
is_paused: Optional[bool] = None
|
||||
owners: Optional[List[str]] = None
|
||||
tags: Optional[List[str]] = None
|
||||
schedule_interval: Optional[str] = None
|
||||
max_active_runs: Optional[int] = None
|
||||
start_date: Optional[datetime] = None
|
||||
tasks: List[AirflowApiTask] = []
|
||||
|
||||
|
||||
class AirflowApiDagRun(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
dag_run_id: str
|
||||
state: Optional[str] = None
|
||||
execution_date: Optional[datetime] = None
|
||||
start_date: Optional[datetime] = None
|
||||
end_date: Optional[datetime] = None
|
||||
|
||||
|
||||
class AirflowApiTaskInstance(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
task_id: str
|
||||
state: Optional[str] = None
|
||||
start_date: Optional[datetime] = None
|
||||
end_date: Optional[datetime] = None
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
MWAA (Managed Workflows for Apache Airflow) REST API implementation
|
||||
Uses AWS MWAA invoke_rest_api for direct API calls without token management
|
||||
"""
|
||||
import json
|
||||
import traceback
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
from metadata.clients.aws_client import AWSClient
|
||||
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
|
||||
from metadata.ingestion.source.pipeline.airflow.api.models import (
|
||||
AirflowApiDagDetails,
|
||||
AirflowApiDagRun,
|
||||
AirflowApiTask,
|
||||
AirflowApiTaskInstance,
|
||||
)
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class MWAAClient:
|
||||
"""
|
||||
MWAA client that uses AWS MWAA invoke_rest_api for direct Airflow REST API calls.
|
||||
This approach bypasses token management and uses AWS IAM permissions directly.
|
||||
"""
|
||||
|
||||
def __init__(self, aws_credentials: AWSCredentials, environment_name: str):
|
||||
self.aws_credentials = aws_credentials
|
||||
self.environment_name = environment_name
|
||||
self._aws_client = AWSClient(aws_credentials)
|
||||
self._mwaa_client = self._aws_client.get_mwaa_client()
|
||||
|
||||
def _invoke_rest_api(
|
||||
self,
|
||||
path: str,
|
||||
method: str = "GET",
|
||||
body: Optional[Dict] = None,
|
||||
query: Optional[Dict] = None,
|
||||
) -> Dict:
|
||||
"""
|
||||
Invoke MWAA REST API using AWS MWAA invoke_rest_api method.
|
||||
|
||||
Args:
|
||||
path: API path (e.g., "/dags")
|
||||
method: HTTP method (GET, POST, etc.)
|
||||
body: Request body for POST/PUT requests
|
||||
query: Query parameters
|
||||
|
||||
Returns:
|
||||
Response from Airflow REST API
|
||||
"""
|
||||
try:
|
||||
params = {"Name": self.environment_name, "Path": path, "Method": method}
|
||||
|
||||
if body:
|
||||
params["Body"] = json.dumps(body) if isinstance(body, dict) else body
|
||||
|
||||
if query:
|
||||
params["QueryParameters"] = query
|
||||
|
||||
response = self._mwaa_client.invoke_rest_api(**params)
|
||||
rest_api_response = response.get("RestApiResponse", {})
|
||||
|
||||
# Handle different response formats
|
||||
if isinstance(rest_api_response, str):
|
||||
try:
|
||||
return json.loads(rest_api_response)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(
|
||||
f"Failed to parse MWAA response as JSON: {rest_api_response}"
|
||||
)
|
||||
return {"raw_response": rest_api_response}
|
||||
|
||||
return rest_api_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"MWAA REST API call failed for {path}: {e}")
|
||||
logger.debug(traceback.format_exc())
|
||||
raise
|
||||
|
||||
def get_version(self) -> Dict:
|
||||
"""Get basic connection info - MWAA doesn't expose version endpoint"""
|
||||
# Return a simple response to indicate connectivity
|
||||
return {"version": "MWAA", "status": "connected"}
|
||||
|
||||
def list_dags(self, limit: int = 100, offset: int = 0) -> Dict:
|
||||
"""List DAGs with pagination"""
|
||||
query = {"limit": str(limit), "offset": str(offset)}
|
||||
return self._invoke_rest_api("/dags", query=query)
|
||||
|
||||
def get_dag_tasks(self, dag_id: str) -> Dict:
|
||||
"""Get tasks for a specific DAG"""
|
||||
return self._invoke_rest_api(f"/dags/{quote(dag_id, safe='')}/tasks")
|
||||
|
||||
def list_dag_runs(self, dag_id: str, limit: int = 10) -> Dict:
|
||||
"""List DAG runs for a specific DAG"""
|
||||
query_param = "?order_by=-start_date"
|
||||
query_param += f"&limit={limit}" if limit is not None else ""
|
||||
return self._invoke_rest_api(
|
||||
f"/dags/{quote(dag_id, safe='')}/dagRuns{query_param}",
|
||||
)
|
||||
|
||||
def get_task_instances(self, dag_id: str, dag_run_id: str) -> Dict:
|
||||
"""Get task instances for a specific DAG run"""
|
||||
return self._invoke_rest_api(
|
||||
f"/dags/{quote(dag_id, safe='')}"
|
||||
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
|
||||
)
|
||||
|
||||
def _paginate(self, path: str, key: str, limit: int = 100) -> List[Dict]:
|
||||
"""Paginate through API results"""
|
||||
result: List[Dict] = []
|
||||
offset = 0
|
||||
total = limit
|
||||
|
||||
while offset < total:
|
||||
query = {"limit": str(limit), "offset": str(offset)}
|
||||
response = self._invoke_rest_api(path, query=query)
|
||||
|
||||
if not response:
|
||||
break
|
||||
|
||||
page = response.get(key, [])
|
||||
if not page:
|
||||
break
|
||||
|
||||
result.extend(page)
|
||||
total = response.get("total_entries", len(result))
|
||||
offset += limit
|
||||
|
||||
return result
|
||||
|
||||
def get_all_dags(self) -> List[Dict]:
|
||||
"""Get all DAGs using pagination"""
|
||||
return self._paginate("/dags", key="dags")
|
||||
|
||||
def build_dag_details(self, dag_data: Dict) -> AirflowApiDagDetails:
|
||||
"""Build DAG details using existing model format"""
|
||||
dag_id = dag_data["dag_id"]
|
||||
|
||||
# Parse tags
|
||||
tags_raw = dag_data.get("tags") or []
|
||||
tags = []
|
||||
for tag in tags_raw:
|
||||
if isinstance(tag, dict):
|
||||
name = tag.get("name")
|
||||
elif isinstance(tag, str):
|
||||
name = tag
|
||||
else:
|
||||
continue
|
||||
if name:
|
||||
tags.append(str(name))
|
||||
|
||||
owners = dag_data.get("owners") or []
|
||||
|
||||
# Parse schedule - MWAA typically uses schedule_interval format
|
||||
schedule = dag_data.get("schedule_interval")
|
||||
if isinstance(schedule, dict):
|
||||
schedule = schedule.get("value")
|
||||
|
||||
# Get tasks for the DAG
|
||||
try:
|
||||
task_response = self.get_dag_tasks(dag_id)
|
||||
tasks_data = task_response.get("tasks", [])
|
||||
except Exception as exc:
|
||||
logger.warning(f"Could not fetch tasks for DAG {dag_id}: {exc}")
|
||||
tasks_data = []
|
||||
|
||||
tasks = [
|
||||
AirflowApiTask(
|
||||
task_id=t["task_id"],
|
||||
downstream_task_ids=t.get("downstream_task_ids"),
|
||||
owner=t.get("owner"),
|
||||
doc_md=t.get("doc_md"),
|
||||
start_date=t.get("start_date"),
|
||||
end_date=t.get("end_date"),
|
||||
class_ref=t.get("class_ref"),
|
||||
)
|
||||
for t in tasks_data
|
||||
]
|
||||
|
||||
return AirflowApiDagDetails(
|
||||
dag_id=dag_id,
|
||||
description=dag_data.get("description"),
|
||||
fileloc=dag_data.get("fileloc") or dag_data.get("file_loc"),
|
||||
is_paused=dag_data.get("is_paused"),
|
||||
owners=owners,
|
||||
tags=tags,
|
||||
schedule_interval=schedule,
|
||||
max_active_runs=dag_data.get("max_active_runs"),
|
||||
start_date=dag_data.get("start_date"),
|
||||
tasks=tasks,
|
||||
)
|
||||
|
||||
def get_dag_runs(self, dag_id: str, limit: int = 10) -> List[AirflowApiDagRun]:
|
||||
"""Get DAG runs using existing model format"""
|
||||
try:
|
||||
response = self.list_dag_runs(dag_id, limit=limit)
|
||||
runs_data = response.get("dag_runs", [])
|
||||
except Exception as exc:
|
||||
logger.warning(f"Could not fetch dag runs for {dag_id}: {exc}")
|
||||
return []
|
||||
|
||||
result = []
|
||||
for run in runs_data:
|
||||
execution_date = run.get("logical_date") or run.get("execution_date")
|
||||
result.append(
|
||||
AirflowApiDagRun(
|
||||
dag_run_id=run.get("dag_run_id", ""),
|
||||
state=run.get("state"),
|
||||
execution_date=execution_date,
|
||||
start_date=run.get("start_date"),
|
||||
end_date=run.get("end_date"),
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
def get_task_instances_for_run(
|
||||
self, dag_id: str, dag_run_id: str
|
||||
) -> List[AirflowApiTaskInstance]:
|
||||
"""Get task instances using existing model format"""
|
||||
try:
|
||||
path = (
|
||||
f"/dags/{quote(dag_id, safe='')}"
|
||||
f"/dagRuns/{quote(dag_run_id, safe='')}/taskInstances"
|
||||
)
|
||||
instances_data = self._paginate(path, key="task_instances")
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
f"Could not fetch task instances for {dag_id}/{dag_run_id}: {exc}"
|
||||
)
|
||||
return []
|
||||
|
||||
return [
|
||||
AirflowApiTaskInstance(
|
||||
task_id=ti.get("task_id", ""),
|
||||
state=ti.get("state"),
|
||||
start_date=ti.get("start_date"),
|
||||
end_date=ti.get("end_date"),
|
||||
)
|
||||
for ti in instances_data
|
||||
]
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Airflow REST API source to extract metadata via Airflow REST API
|
||||
"""
|
||||
import traceback
|
||||
from typing import Iterable, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||
from metadata.generated.schema.entity.data.pipeline import (
|
||||
Pipeline,
|
||||
PipelineState,
|
||||
PipelineStatus,
|
||||
StatusType,
|
||||
Task,
|
||||
TaskStatus,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
||||
AirflowConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
||||
StackTraceError,
|
||||
)
|
||||
from metadata.generated.schema.metadataIngestion.workflow import (
|
||||
Source as WorkflowSource,
|
||||
)
|
||||
from metadata.generated.schema.type.basic import (
|
||||
EntityName,
|
||||
FullyQualifiedEntityName,
|
||||
Markdown,
|
||||
SourceUrl,
|
||||
Timestamp,
|
||||
)
|
||||
from metadata.ingestion.api.models import Either
|
||||
from metadata.ingestion.api.steps import InvalidSourceException
|
||||
from metadata.ingestion.models.ometa_classification import OMetaTagAndClassification
|
||||
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.source.pipeline.airflow.api.models import AirflowApiDagDetails
|
||||
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
|
||||
from metadata.utils import fqn
|
||||
from metadata.utils.helpers import clean_uri, datetime_to_ts
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
from metadata.utils.tag_utils import get_ometa_tag_and_classification, get_tag_labels
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
AIRFLOW_TAG_CATEGORY = "AirflowTags"
|
||||
|
||||
STATUS_MAP = {
|
||||
"success": StatusType.Successful.value,
|
||||
"failed": StatusType.Failed.value,
|
||||
"queued": StatusType.Pending.value,
|
||||
"skipped": StatusType.Skipped.value,
|
||||
"running": StatusType.Pending.value,
|
||||
"upstream_failed": StatusType.Failed.value,
|
||||
}
|
||||
|
||||
|
||||
class AirflowApiSource(PipelineServiceSource):
|
||||
"""
|
||||
Implements the necessary methods to extract
|
||||
Pipeline metadata from Airflow's REST API
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
|
||||
) -> "AirflowApiSource":
|
||||
config: WorkflowSource = WorkflowSource.model_validate(config_dict)
|
||||
connection: AirflowConnection = config.serviceConnection.root.config
|
||||
if not isinstance(connection, AirflowConnection):
|
||||
raise InvalidSourceException(
|
||||
f"Expected AirflowConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata)
|
||||
|
||||
def get_pipelines_list(self) -> Iterable[AirflowApiDagDetails]:
|
||||
all_dags = self.connection.get_all_dags()
|
||||
for dag_data in all_dags:
|
||||
try:
|
||||
yield self.connection.build_dag_details(dag_data)
|
||||
except Exception as exc:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(
|
||||
f"Error building DAG details for {dag_data.get('dag_id')}: {exc}"
|
||||
)
|
||||
|
||||
def get_pipeline_name(self, pipeline_details: AirflowApiDagDetails) -> str:
|
||||
return pipeline_details.dag_id
|
||||
|
||||
def get_pipeline_state(
|
||||
self, pipeline_details: AirflowApiDagDetails
|
||||
) -> Optional[PipelineState]:
|
||||
if pipeline_details.is_paused is None:
|
||||
return None
|
||||
return (
|
||||
PipelineState.Inactive
|
||||
if pipeline_details.is_paused
|
||||
else PipelineState.Active
|
||||
)
|
||||
|
||||
def _get_task_source_url(self, dag_id: str, task_id: str) -> str:
|
||||
host = clean_uri(self.service_connection.hostPort)
|
||||
if self.connection.api_version == "v2":
|
||||
return f"{host}/dags/{quote(dag_id)}/tasks/{quote(task_id)}"
|
||||
return (
|
||||
f"{host}/taskinstance/list/"
|
||||
f"?_flt_3_dag_id={quote(dag_id)}&_flt_3_task_id={quote(task_id)}"
|
||||
)
|
||||
|
||||
def _get_dag_source_url(self, dag_id: str) -> str:
|
||||
host = clean_uri(self.service_connection.hostPort)
|
||||
if self.connection.api_version == "v2":
|
||||
return f"{host}/dags/{quote(dag_id)}"
|
||||
return f"{host}/dags/{quote(dag_id)}/grid"
|
||||
|
||||
def _build_tasks(self, dag_details: AirflowApiDagDetails) -> List[Task]:
|
||||
return [
|
||||
Task(
|
||||
name=task.task_id,
|
||||
description=Markdown(task.doc_md) if task.doc_md else None,
|
||||
sourceUrl=SourceUrl(
|
||||
self._get_task_source_url(dag_details.dag_id, task.task_id)
|
||||
),
|
||||
downstreamTasks=task.downstream_task_ids or [],
|
||||
startDate=task.start_date,
|
||||
endDate=task.end_date,
|
||||
taskType=task.class_ref.get("class_name") if task.class_ref else None,
|
||||
)
|
||||
for task in dag_details.tasks
|
||||
]
|
||||
|
||||
def yield_pipeline(
|
||||
self, pipeline_details: AirflowApiDagDetails
|
||||
) -> Iterable[Either[CreatePipelineRequest]]:
|
||||
try:
|
||||
pipeline_request = CreatePipelineRequest(
|
||||
name=EntityName(pipeline_details.dag_id),
|
||||
description=Markdown(pipeline_details.description)
|
||||
if pipeline_details.description
|
||||
else None,
|
||||
sourceUrl=SourceUrl(self._get_dag_source_url(pipeline_details.dag_id)),
|
||||
state=self.get_pipeline_state(pipeline_details),
|
||||
concurrency=pipeline_details.max_active_runs,
|
||||
pipelineLocation=pipeline_details.fileloc,
|
||||
startDate=pipeline_details.start_date.isoformat()
|
||||
if pipeline_details.start_date
|
||||
else None,
|
||||
tasks=self._build_tasks(pipeline_details),
|
||||
service=FullyQualifiedEntityName(self.context.get().pipeline_service),
|
||||
scheduleInterval=pipeline_details.schedule_interval,
|
||||
tags=get_tag_labels(
|
||||
metadata=self.metadata,
|
||||
tags=pipeline_details.tags or [],
|
||||
classification_name=AIRFLOW_TAG_CATEGORY,
|
||||
include_tags=self.source_config.includeTags,
|
||||
),
|
||||
)
|
||||
yield Either(right=pipeline_request)
|
||||
self.register_record(pipeline_request=pipeline_request)
|
||||
self.context.get().task_names = {
|
||||
task.name for task in pipeline_request.tasks or []
|
||||
}
|
||||
except Exception as exc:
|
||||
self.context.get().task_names = set()
|
||||
yield Either(
|
||||
left=StackTraceError(
|
||||
name=pipeline_details.dag_id,
|
||||
error=f"Error building pipeline from {pipeline_details.dag_id}: {exc}",
|
||||
stackTrace=traceback.format_exc(),
|
||||
)
|
||||
)
|
||||
|
||||
def yield_pipeline_status(
|
||||
self, pipeline_details: AirflowApiDagDetails
|
||||
) -> Iterable[Either[OMetaPipelineStatus]]:
|
||||
try:
|
||||
num_status = self.service_connection.numberOfStatus or 10
|
||||
dag_runs = self.connection.get_dag_runs(
|
||||
pipeline_details.dag_id, limit=num_status
|
||||
)
|
||||
|
||||
for dag_run in dag_runs:
|
||||
if not dag_run.dag_run_id or not self.context.get().task_names:
|
||||
continue
|
||||
|
||||
task_instances = self.connection.get_task_instances_for_run(
|
||||
pipeline_details.dag_id, dag_run.dag_run_id
|
||||
)
|
||||
|
||||
task_statuses = [
|
||||
TaskStatus(
|
||||
name=ti.task_id,
|
||||
executionStatus=STATUS_MAP.get(
|
||||
ti.state, StatusType.Pending.value
|
||||
),
|
||||
startTime=datetime_to_ts(ti.start_date),
|
||||
endTime=datetime_to_ts(ti.end_date),
|
||||
)
|
||||
for ti in task_instances
|
||||
if ti.task_id in self.context.get().task_names
|
||||
]
|
||||
|
||||
timestamp = datetime_to_ts(dag_run.execution_date)
|
||||
if timestamp is None:
|
||||
timestamp = datetime_to_ts(dag_run.start_date)
|
||||
if timestamp is None:
|
||||
timestamp = datetime_to_ts(dag_run.end_date)
|
||||
if timestamp is None:
|
||||
logger.debug(
|
||||
"Skipping DAG run %s for %s — no timestamp available",
|
||||
dag_run.dag_run_id,
|
||||
pipeline_details.dag_id,
|
||||
)
|
||||
continue
|
||||
|
||||
pipeline_status = PipelineStatus(
|
||||
executionId=dag_run.dag_run_id,
|
||||
taskStatus=task_statuses,
|
||||
executionStatus=STATUS_MAP.get(
|
||||
dag_run.state, StatusType.Pending.value
|
||||
),
|
||||
timestamp=Timestamp(timestamp),
|
||||
)
|
||||
pipeline_fqn = fqn.build(
|
||||
metadata=self.metadata,
|
||||
entity_type=Pipeline,
|
||||
service_name=self.context.get().pipeline_service,
|
||||
pipeline_name=self.context.get().pipeline,
|
||||
)
|
||||
yield Either(
|
||||
right=OMetaPipelineStatus(
|
||||
pipeline_fqn=pipeline_fqn,
|
||||
pipeline_status=pipeline_status,
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
yield Either(
|
||||
left=StackTraceError(
|
||||
name=f"{pipeline_details.dag_id} Pipeline Status",
|
||||
error=f"Error extracting status for DAG {pipeline_details.dag_id}: {exc}",
|
||||
stackTrace=traceback.format_exc(),
|
||||
)
|
||||
)
|
||||
|
||||
def yield_pipeline_lineage_details(
|
||||
self, pipeline_details: AirflowApiDagDetails
|
||||
) -> Iterable[Either[AddLineageRequest]]:
|
||||
return []
|
||||
|
||||
def yield_tag(
|
||||
self, pipeline_details: AirflowApiDagDetails
|
||||
) -> Iterable[Either[OMetaTagAndClassification]]:
|
||||
yield from get_ometa_tag_and_classification(
|
||||
tags=pipeline_details.tags or [],
|
||||
classification_name=AIRFLOW_TAG_CATEGORY,
|
||||
tag_description="Airflow Tag",
|
||||
classification_description="Tags associated with airflow entities.",
|
||||
include_tags=self.source_config.includeTags,
|
||||
)
|
||||
|
|
@ -188,10 +188,21 @@ def _(airflow_connection: SQLiteConnection) -> Engine:
|
|||
return get_sqlite_connection(airflow_connection)
|
||||
|
||||
|
||||
def get_connection(connection: AirflowConnection) -> Engine:
|
||||
def get_connection(connection: AirflowConnection):
|
||||
"""
|
||||
Create connection
|
||||
"""
|
||||
from metadata.generated.schema.entity.utils.airflowRestApiConnection import ( # pylint: disable=import-outside-toplevel
|
||||
AirflowRestApiConnection,
|
||||
)
|
||||
|
||||
if isinstance(connection.connection, AirflowRestApiConnection):
|
||||
from metadata.ingestion.source.pipeline.airflow.api.client import ( # pylint: disable=import-outside-toplevel
|
||||
AirflowApiClient,
|
||||
)
|
||||
|
||||
return AirflowApiClient(connection)
|
||||
|
||||
try:
|
||||
return _get_connection(connection.connection)
|
||||
except Exception as exc:
|
||||
|
|
@ -211,9 +222,30 @@ class AirflowTaskDetailsAccessError(Exception):
|
|||
"""
|
||||
|
||||
|
||||
def _test_api_connection(
|
||||
metadata: OpenMetadata,
|
||||
client,
|
||||
service_connection: AirflowConnection,
|
||||
automation_workflow: Optional[AutomationWorkflow] = None,
|
||||
timeout_seconds: Optional[int] = THREE_MIN,
|
||||
) -> TestConnectionResult:
|
||||
test_fn = {
|
||||
"CheckAccess": client.get_version,
|
||||
"PipelineDetailsAccess": lambda: client.list_dags(limit=1),
|
||||
"TaskDetailAccess": lambda: True,
|
||||
}
|
||||
return test_connection_steps(
|
||||
metadata=metadata,
|
||||
test_fn=test_fn,
|
||||
service_type=service_connection.type.value,
|
||||
automation_workflow=automation_workflow,
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
|
||||
|
||||
def test_connection(
|
||||
metadata: OpenMetadata,
|
||||
engine: Engine,
|
||||
connection_obj,
|
||||
service_connection: AirflowConnection,
|
||||
automation_workflow: Optional[AutomationWorkflow] = None,
|
||||
timeout_seconds: Optional[int] = THREE_MIN,
|
||||
|
|
@ -222,8 +254,20 @@ def test_connection(
|
|||
Test connection. This can be executed either as part
|
||||
of a metadata workflow or during an Automation Workflow
|
||||
"""
|
||||
from metadata.generated.schema.entity.utils.airflowRestApiConnection import ( # pylint: disable=import-outside-toplevel
|
||||
AirflowRestApiConnection,
|
||||
)
|
||||
|
||||
session_maker = sessionmaker(bind=engine)
|
||||
if isinstance(service_connection.connection, AirflowRestApiConnection):
|
||||
return _test_api_connection(
|
||||
metadata,
|
||||
connection_obj,
|
||||
service_connection,
|
||||
automation_workflow,
|
||||
timeout_seconds,
|
||||
)
|
||||
|
||||
session_maker = sessionmaker(bind=connection_obj)
|
||||
session = session_maker()
|
||||
|
||||
def test_pipeline_details_access(session):
|
||||
|
|
@ -252,7 +296,7 @@ def test_connection(
|
|||
raise AirflowTaskDetailsAccessError(f"Task details access error : {e}")
|
||||
|
||||
test_fn = {
|
||||
"CheckAccess": partial(test_connection_engine_step, engine),
|
||||
"CheckAccess": partial(test_connection_engine_step, connection_obj),
|
||||
"PipelineDetailsAccess": partial(test_pipeline_details_access, session),
|
||||
"TaskDetailAccess": partial(test_task_detail_access, session),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -200,13 +200,23 @@ class AirflowSource(PipelineServiceSource):
|
|||
@classmethod
|
||||
def create(
|
||||
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
|
||||
) -> "AirflowSource":
|
||||
):
|
||||
from metadata.generated.schema.entity.utils.airflowRestApiConnection import (
|
||||
AirflowRestApiConnection,
|
||||
)
|
||||
|
||||
config: WorkflowSource = WorkflowSource.model_validate(config_dict)
|
||||
connection: AirflowConnection = config.serviceConnection.root.config
|
||||
if not isinstance(connection, AirflowConnection):
|
||||
raise InvalidSourceException(
|
||||
f"Expected AirflowConnection, but got {connection}"
|
||||
)
|
||||
if isinstance(connection.connection, AirflowRestApiConnection):
|
||||
from metadata.ingestion.source.pipeline.airflow.api.source import (
|
||||
AirflowApiSource,
|
||||
)
|
||||
|
||||
return AirflowApiSource(config, metadata)
|
||||
return cls(config, metadata)
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -0,0 +1,993 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Comprehensive mocked integration test for Airflow API connector.
|
||||
|
||||
This test validates the complete Airflow integration flow without requiring
|
||||
real Airflow or OpenMetadata services, making it suitable for CI/CD environments.
|
||||
|
||||
Tests covered:
|
||||
- Airflow API client functionality with all authentication methods
|
||||
- DAG metadata extraction and parsing
|
||||
- Task extraction and relationship mapping
|
||||
- DAG run status processing
|
||||
- Pipeline entity creation in OpenMetadata
|
||||
- Error handling and edge cases
|
||||
- OpenLineage integration scenarios
|
||||
"""
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from metadata.generated.schema.entity.services.connections.pipeline.airflowConnection import (
|
||||
AirflowConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.utils.airflowRestApiConnection import (
|
||||
AirflowRestApiConnection,
|
||||
)
|
||||
from metadata.generated.schema.entity.utils.common import (
|
||||
accessTokenConfig,
|
||||
basicAuthConfig,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
|
||||
from metadata.workflow.metadata import MetadataWorkflow
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
_TRACKED_REST_PATH = "metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST"
|
||||
_BASIC_AUTH_CALLBACK_PATH = (
|
||||
"metadata.ingestion.source.pipeline.airflow.api.client.build_basic_auth_callback"
|
||||
)
|
||||
|
||||
|
||||
def _make_access_token_config(token: str = "test_token") -> AirflowRestApiConnection:
|
||||
"""Helper – build a RestAPI config using a static access token."""
|
||||
return AirflowRestApiConnection(
|
||||
type="RestAPI",
|
||||
authConfig=accessTokenConfig.AccessToken(token=token),
|
||||
)
|
||||
|
||||
|
||||
def _make_airflow_connection(token: str = "test_token") -> AirflowConnection:
|
||||
"""Helper – build a full AirflowConnection using a static access token."""
|
||||
return AirflowConnection(
|
||||
hostPort="http://localhost:8080",
|
||||
connection=_make_access_token_config(token),
|
||||
)
|
||||
|
||||
|
||||
class TestAirflowApiMockedIntegration:
|
||||
"""Comprehensive mocked integration tests for Airflow API connector."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_airflow_responses(self):
|
||||
"""Mock responses for various Airflow API endpoints with Airflow 3.x data structures."""
|
||||
return {
|
||||
"version": {"version": "3.0.1", "git_version": "abc123def456"},
|
||||
"dags": {
|
||||
"dags": [
|
||||
{
|
||||
"dag_id": "sample_etl_dag",
|
||||
"description": "Sample ETL pipeline",
|
||||
"fileloc": "/opt/airflow/dags/sample_etl.py",
|
||||
"file_token": "abc123def456",
|
||||
"is_paused": False,
|
||||
"is_active": True,
|
||||
"is_subdag": False,
|
||||
"owners": ["data_team"],
|
||||
"tags": [{"name": "etl"}, {"name": "daily"}],
|
||||
"schedule_interval": {
|
||||
"__type": "CronExpression",
|
||||
"value": "@daily",
|
||||
},
|
||||
"timetable_summary": "At 00:00 every day",
|
||||
"catchup": True,
|
||||
"max_active_runs": 1,
|
||||
"max_consecutive_failed_dag_runs": 5,
|
||||
"has_task_concurrency_limits": False,
|
||||
"has_import_errors": False,
|
||||
"next_dagrun": "2024-01-02T00:00:00Z",
|
||||
"next_dagrun_data_interval_start": "2024-01-01T00:00:00Z",
|
||||
"next_dagrun_data_interval_end": "2024-01-02T00:00:00Z",
|
||||
"next_dagrun_create_after": "2024-01-02T00:00:00Z",
|
||||
"doc_md": "Sample ETL pipeline documentation",
|
||||
"default_view": "graph",
|
||||
"orientation": "LR",
|
||||
"dataset_triggers": [],
|
||||
"params": {"env": "production"},
|
||||
"start_date": "2024-01-01T00:00:00Z",
|
||||
},
|
||||
{
|
||||
"dag_id": "ml_training_pipeline",
|
||||
"description": "ML model training pipeline",
|
||||
"fileloc": "/opt/airflow/dags/ml_training.py",
|
||||
"file_token": "def456ghi789",
|
||||
"is_paused": True,
|
||||
"is_active": True,
|
||||
"is_subdag": False,
|
||||
"owners": ["ml_team"],
|
||||
"tags": [{"name": "ml"}, {"name": "weekly"}],
|
||||
"schedule_interval": {
|
||||
"__type": "CronExpression",
|
||||
"value": "0 0 * * 1",
|
||||
},
|
||||
"timetable_summary": "At 00:00 on Monday",
|
||||
"catchup": False,
|
||||
"max_active_runs": 2,
|
||||
"max_consecutive_failed_dag_runs": 3,
|
||||
"has_task_concurrency_limits": True,
|
||||
"has_import_errors": False,
|
||||
"next_dagrun": None,
|
||||
"next_dagrun_data_interval_start": None,
|
||||
"next_dagrun_data_interval_end": None,
|
||||
"next_dagrun_create_after": None,
|
||||
"doc_md": None,
|
||||
"default_view": "graph",
|
||||
"orientation": "TB",
|
||||
"dataset_triggers": [
|
||||
{
|
||||
"uri": "s3://ml-data/training/",
|
||||
"extra": {"bucket": "ml-data", "prefix": "training/"},
|
||||
}
|
||||
],
|
||||
"params": {"model_type": "xgboost"},
|
||||
"start_date": "2024-01-01T00:00:00Z",
|
||||
},
|
||||
],
|
||||
"total_entries": 2,
|
||||
},
|
||||
"tasks": {
|
||||
"sample_etl_dag": {
|
||||
"tasks": [
|
||||
{
|
||||
"task_id": "extract_data",
|
||||
"task_display_name": "Extract Data from Source",
|
||||
"operator_name": "S3KeySensor",
|
||||
"operator_class_name": "airflow.providers.amazon.aws.sensors.s3.S3KeySensor",
|
||||
"downstream_task_ids": ["transform_data"],
|
||||
"upstream_task_ids": [],
|
||||
"owner": "data_team",
|
||||
"start_date": "2024-01-01T00:00:00Z",
|
||||
"end_date": None,
|
||||
"depends_on_past": False,
|
||||
"wait_for_downstream": False,
|
||||
"retries": 3,
|
||||
"retry_delay": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 300,
|
||||
},
|
||||
"retry_exponential_backoff": False,
|
||||
"max_retry_delay": None,
|
||||
"priority_weight": 1,
|
||||
"weight_rule": "downstream",
|
||||
"queue": "default",
|
||||
"pool": "default_pool",
|
||||
"pool_slots": 1,
|
||||
"execution_timeout": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 3600,
|
||||
},
|
||||
"trigger_rule": "all_success",
|
||||
"ui_color": "#f0ede4",
|
||||
"ui_fgcolor": "#000000",
|
||||
"template_fields": ["bucket_key", "bucket_name"],
|
||||
"doc_md": "Extracts data from S3 source",
|
||||
"params": {"bucket_name": "data-lake", "timeout": 3600},
|
||||
"extra_links": [],
|
||||
"owner_links": {},
|
||||
},
|
||||
{
|
||||
"task_id": "transform_data",
|
||||
"task_display_name": "Transform Data with dbt",
|
||||
"operator_name": "DbtRunOperator",
|
||||
"operator_class_name": "airflow_dbt.operators.dbt_run_operator.DbtRunOperator",
|
||||
"downstream_task_ids": ["load_data"],
|
||||
"upstream_task_ids": ["extract_data"],
|
||||
"owner": "data_team",
|
||||
"start_date": "2024-01-01T00:00:00Z",
|
||||
"end_date": None,
|
||||
"depends_on_past": True,
|
||||
"wait_for_downstream": False,
|
||||
"retries": 2,
|
||||
"retry_delay": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 600,
|
||||
},
|
||||
"retry_exponential_backoff": False,
|
||||
"max_retry_delay": None,
|
||||
"priority_weight": 5,
|
||||
"weight_rule": "absolute",
|
||||
"queue": "dbt_queue",
|
||||
"pool": "dbt_pool",
|
||||
"pool_slots": 2,
|
||||
"execution_timeout": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 7200,
|
||||
},
|
||||
"trigger_rule": "all_success",
|
||||
"ui_color": "#8194C7",
|
||||
"ui_fgcolor": "#FFFFFF",
|
||||
"template_fields": ["models", "vars"],
|
||||
"doc_md": "Transforms data using dbt models",
|
||||
"params": {
|
||||
"models": "staging",
|
||||
"vars": {"run_date": "{{ ds }}"},
|
||||
},
|
||||
"extra_links": [],
|
||||
"owner_links": {},
|
||||
},
|
||||
{
|
||||
"task_id": "load_data",
|
||||
"task_display_name": "Load Data to Warehouse",
|
||||
"operator_name": "SnowflakeOperator",
|
||||
"operator_class_name": "airflow.providers.snowflake.operators.snowflake.SnowflakeOperator",
|
||||
"downstream_task_ids": [],
|
||||
"upstream_task_ids": ["transform_data"],
|
||||
"owner": "data_team",
|
||||
"start_date": "2024-01-01T00:00:00Z",
|
||||
"end_date": None,
|
||||
"depends_on_past": False,
|
||||
"wait_for_downstream": False,
|
||||
"retries": 1,
|
||||
"retry_delay": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 300,
|
||||
},
|
||||
"retry_exponential_backoff": False,
|
||||
"max_retry_delay": None,
|
||||
"priority_weight": 3,
|
||||
"weight_rule": "downstream",
|
||||
"queue": "warehouse_queue",
|
||||
"pool": "snowflake_pool",
|
||||
"pool_slots": 1,
|
||||
"execution_timeout": {
|
||||
"__type": "TimeDelta",
|
||||
"days": 0,
|
||||
"seconds": 1800,
|
||||
},
|
||||
"trigger_rule": "all_success",
|
||||
"ui_color": "#EDEDED",
|
||||
"ui_fgcolor": "#000000",
|
||||
"template_fields": ["sql"],
|
||||
"doc_md": "Loads transformed data to Snowflake warehouse",
|
||||
"params": {"database": "analytics", "schema": "public"},
|
||||
"extra_links": [],
|
||||
"owner_links": {},
|
||||
},
|
||||
]
|
||||
}
|
||||
},
|
||||
"dag_runs": {
|
||||
"sample_etl_dag": {
|
||||
"dag_runs": [
|
||||
{
|
||||
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
|
||||
"dag_id": "sample_etl_dag",
|
||||
"logical_date": "2024-01-01T00:00:00Z",
|
||||
"execution_date": "2024-01-01T00:00:00Z",
|
||||
"start_date": "2024-01-01T00:01:00Z",
|
||||
"end_date": "2024-01-01T00:15:00Z",
|
||||
"data_interval_start": "2024-01-01T00:00:00Z",
|
||||
"data_interval_end": "2024-01-02T00:00:00Z",
|
||||
"last_scheduling_decision": "2024-01-01T00:00:30Z",
|
||||
"run_type": "scheduled",
|
||||
"state": "success",
|
||||
"external_trigger": False,
|
||||
"triggering_dataset_events": [],
|
||||
"conf": {},
|
||||
"note": "Completed successfully",
|
||||
},
|
||||
{
|
||||
"dag_run_id": "scheduled__2024-01-02T00:00:00+00:00",
|
||||
"dag_id": "sample_etl_dag",
|
||||
"logical_date": "2024-01-02T00:00:00Z",
|
||||
"execution_date": "2024-01-02T00:00:00Z",
|
||||
"start_date": "2024-01-02T00:01:00Z",
|
||||
"end_date": None,
|
||||
"data_interval_start": "2024-01-02T00:00:00Z",
|
||||
"data_interval_end": "2024-01-03T00:00:00Z",
|
||||
"last_scheduling_decision": "2024-01-02T00:00:30Z",
|
||||
"run_type": "scheduled",
|
||||
"state": "running",
|
||||
"external_trigger": False,
|
||||
"triggering_dataset_events": [],
|
||||
"conf": {},
|
||||
"note": "Currently running",
|
||||
},
|
||||
],
|
||||
"total_entries": 2,
|
||||
}
|
||||
},
|
||||
"task_instances": {
|
||||
"sample_etl_dag": {
|
||||
"scheduled__2024-01-01T00:00:00+00:00": {
|
||||
"task_instances": [
|
||||
{
|
||||
"task_id": "extract_data",
|
||||
"dag_id": "sample_etl_dag",
|
||||
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
|
||||
"logical_date": "2024-01-01T00:00:00Z",
|
||||
"execution_date": "2024-01-01T00:00:00Z",
|
||||
"start_date": "2024-01-01T00:01:00Z",
|
||||
"end_date": "2024-01-01T00:05:00Z",
|
||||
"duration": 240.0,
|
||||
"state": "success",
|
||||
"try_number": 1,
|
||||
"max_tries": 3,
|
||||
"hostname": "worker-1",
|
||||
"unixname": "airflow",
|
||||
"job_id": 12345,
|
||||
"pool": "default_pool",
|
||||
"pool_slots": 1,
|
||||
"queue": "default",
|
||||
"priority_weight": 1,
|
||||
"operator": "S3KeySensor",
|
||||
"operator_class": "airflow.providers.amazon.aws.sensors.s3.S3KeySensor",
|
||||
"queued_dttm": "2024-01-01T00:01:00Z",
|
||||
"queued_by_job_id": None,
|
||||
"pid": 1234,
|
||||
"executor": "CeleryExecutor",
|
||||
"executor_config": {},
|
||||
"sla_miss": None,
|
||||
"rendered_fields": {
|
||||
"bucket_name": "data-lake",
|
||||
"bucket_key": "raw/2024-01-01/",
|
||||
},
|
||||
"test_mode": False,
|
||||
"trigger": None,
|
||||
"triggerer_job": None,
|
||||
"note": "Successfully detected new files",
|
||||
},
|
||||
{
|
||||
"task_id": "transform_data",
|
||||
"dag_id": "sample_etl_dag",
|
||||
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
|
||||
"logical_date": "2024-01-01T00:00:00Z",
|
||||
"execution_date": "2024-01-01T00:00:00Z",
|
||||
"start_date": "2024-01-01T00:05:00Z",
|
||||
"end_date": "2024-01-01T00:10:00Z",
|
||||
"duration": 300.0,
|
||||
"state": "success",
|
||||
"try_number": 1,
|
||||
"max_tries": 2,
|
||||
"hostname": "worker-2",
|
||||
"unixname": "airflow",
|
||||
"job_id": 12346,
|
||||
"pool": "dbt_pool",
|
||||
"pool_slots": 2,
|
||||
"queue": "dbt_queue",
|
||||
"priority_weight": 5,
|
||||
"operator": "DbtRunOperator",
|
||||
"operator_class": "airflow_dbt.operators.dbt_run_operator.DbtRunOperator",
|
||||
"queued_dttm": "2024-01-01T00:05:00Z",
|
||||
"queued_by_job_id": 12345,
|
||||
"pid": 1235,
|
||||
"executor": "CeleryExecutor",
|
||||
"executor_config": {},
|
||||
"sla_miss": None,
|
||||
"rendered_fields": {
|
||||
"models": "staging",
|
||||
"vars": {"run_date": "2024-01-01"},
|
||||
},
|
||||
"test_mode": False,
|
||||
"trigger": None,
|
||||
"triggerer_job": None,
|
||||
"note": "dbt models executed successfully",
|
||||
},
|
||||
{
|
||||
"task_id": "load_data",
|
||||
"dag_id": "sample_etl_dag",
|
||||
"dag_run_id": "scheduled__2024-01-01T00:00:00+00:00",
|
||||
"logical_date": "2024-01-01T00:00:00Z",
|
||||
"execution_date": "2024-01-01T00:00:00Z",
|
||||
"start_date": "2024-01-01T00:10:00Z",
|
||||
"end_date": "2024-01-01T00:15:00Z",
|
||||
"duration": 300.0,
|
||||
"state": "success",
|
||||
"try_number": 1,
|
||||
"max_tries": 1,
|
||||
"hostname": "worker-1",
|
||||
"unixname": "airflow",
|
||||
"job_id": 12347,
|
||||
"pool": "snowflake_pool",
|
||||
"pool_slots": 1,
|
||||
"queue": "warehouse_queue",
|
||||
"priority_weight": 3,
|
||||
"operator": "SnowflakeOperator",
|
||||
"operator_class": "airflow.providers.snowflake.operators.snowflake.SnowflakeOperator",
|
||||
"queued_dttm": "2024-01-01T00:10:00Z",
|
||||
"queued_by_job_id": 12346,
|
||||
"pid": 1236,
|
||||
"executor": "CeleryExecutor",
|
||||
"executor_config": {},
|
||||
"sla_miss": None,
|
||||
"rendered_fields": {
|
||||
"sql": "INSERT INTO analytics.public.fact_table SELECT * FROM staging.transformed_data"
|
||||
},
|
||||
"test_mode": False,
|
||||
"trigger": None,
|
||||
"triggerer_job": None,
|
||||
"note": "Data loaded to Snowflake successfully",
|
||||
},
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def mock_openmetadata_client(self):
|
||||
"""Mock OpenMetadata client for testing."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.health_check.return_value = True
|
||||
|
||||
# Mock service creation
|
||||
mock_service = MagicMock()
|
||||
mock_service.id = MagicMock()
|
||||
mock_service.id.root = str(uuid.uuid4())
|
||||
mock_service.fullyQualifiedName = MagicMock()
|
||||
mock_service.fullyQualifiedName.root = "airflow_service"
|
||||
mock_client.create_or_update.return_value = mock_service
|
||||
mock_client.get_by_name.return_value = mock_service
|
||||
|
||||
return mock_client
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _fake_rest(mock_tracked_rest_cls, responses):
|
||||
"""
|
||||
Configure the mock TrackedREST instance's .get() to return *responses*.
|
||||
|
||||
*responses* can be:
|
||||
- a single value → always returns that value
|
||||
- a list → returns items one-by-one (side_effect)
|
||||
- an exception → raises it on every call
|
||||
"""
|
||||
mock_instance = mock_tracked_rest_cls.return_value
|
||||
if isinstance(responses, list):
|
||||
mock_instance.get.side_effect = responses
|
||||
elif isinstance(responses, Exception):
|
||||
mock_instance.get.side_effect = responses
|
||||
else:
|
||||
mock_instance.get.return_value = responses
|
||||
return mock_instance
|
||||
|
||||
def test_airflow_client_token_authentication(self, mock_airflow_responses):
|
||||
"""Test Airflow client with token-based authentication."""
|
||||
config = _make_airflow_connection(token="test_token_123")
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
# _detect_api_version calls /v2/version first; make it succeed so
|
||||
# the client settles on "v2", then get_version() calls /v2/version again.
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses[
|
||||
"version"
|
||||
], # _detect_api_version /v2/version
|
||||
mock_airflow_responses["version"], # get_version()
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
version = airflow_client.get_version()
|
||||
assert version["version"] == "3.0.1"
|
||||
|
||||
mock_tracked_rest_cls.return_value.get.assert_called()
|
||||
|
||||
def test_airflow_client_basic_authentication(self, mock_airflow_responses):
|
||||
"""Test Airflow client with basic authentication."""
|
||||
config = AirflowConnection(
|
||||
hostPort="http://localhost:8080",
|
||||
connection=AirflowRestApiConnection(
|
||||
type="RestAPI",
|
||||
authConfig=basicAuthConfig.BasicAuth(
|
||||
username="admin", password="admin123"
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
# build_basic_auth_callback calls try_exchange_jwt (a real HTTP POST).
|
||||
# Patch it to return a dummy (callback, None) tuple.
|
||||
dummy_callback = lambda: ("Basic YWRtaW46YWRtaW4xMjM=", 7 * 24 * 3600)
|
||||
with (
|
||||
patch(_BASIC_AUTH_CALLBACK_PATH, return_value=(dummy_callback, None)),
|
||||
patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls,
|
||||
):
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
mock_airflow_responses["version"], # get_version()
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
version = airflow_client.get_version()
|
||||
assert version["version"] == "3.0.1"
|
||||
|
||||
mock_tracked_rest_cls.return_value.get.assert_called()
|
||||
|
||||
def test_airflow_api_version_detection(self, mock_airflow_responses):
|
||||
"""Test API version detection logic."""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses[
|
||||
"version"
|
||||
], # _detect_api_version /v2/version
|
||||
mock_airflow_responses["version"], # get_version()
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
version = airflow_client.get_version()
|
||||
assert version["version"] == "3.0.1"
|
||||
assert "git_version" in version
|
||||
|
||||
def test_dag_metadata_extraction_and_parsing(self, mock_airflow_responses):
|
||||
"""Test comprehensive DAG metadata extraction with Airflow 3.x data."""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
mock_airflow_responses["dags"], # _paginate → list_dags (page 1)
|
||||
mock_airflow_responses["tasks"][
|
||||
"sample_etl_dag"
|
||||
], # build_dag_details → get_dag_tasks
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
# Test DAG listing
|
||||
dags = airflow_client.get_all_dags()
|
||||
assert len(dags) == 2
|
||||
assert dags[0]["dag_id"] == "sample_etl_dag"
|
||||
assert dags[1]["dag_id"] == "ml_training_pipeline"
|
||||
|
||||
# Verify Airflow 3.x specific fields
|
||||
dag1 = dags[0]
|
||||
assert "file_token" in dag1
|
||||
assert "is_active" in dag1
|
||||
assert "has_task_concurrency_limits" in dag1
|
||||
assert "has_import_errors" in dag1
|
||||
assert "timetable_summary" in dag1
|
||||
assert "dataset_triggers" in dag1
|
||||
assert "params" in dag1
|
||||
|
||||
# Verify modern schedule format
|
||||
assert dag1["schedule_interval"]["__type"] == "CronExpression"
|
||||
assert dag1["schedule_interval"]["value"] == "@daily"
|
||||
|
||||
# Test DAG details building (calls get_dag_tasks internally)
|
||||
dag_details = airflow_client.build_dag_details(dag1)
|
||||
|
||||
# Verify basic metadata
|
||||
assert dag_details.dag_id == "sample_etl_dag"
|
||||
assert dag_details.description == "Sample ETL pipeline"
|
||||
assert dag_details.fileloc == "/opt/airflow/dags/sample_etl.py"
|
||||
assert dag_details.is_paused == False
|
||||
assert dag_details.owners == ["data_team"]
|
||||
|
||||
# Verify tags parsing
|
||||
assert "etl" in dag_details.tags
|
||||
assert "daily" in dag_details.tags
|
||||
|
||||
# Verify tasks extraction with Airflow 3.x structure
|
||||
assert len(dag_details.tasks) == 3
|
||||
task_ids = [task.task_id for task in dag_details.tasks]
|
||||
assert "extract_data" in task_ids
|
||||
assert "transform_data" in task_ids
|
||||
assert "load_data" in task_ids
|
||||
|
||||
# Verify modern task fields
|
||||
extract_task = next(
|
||||
t for t in dag_details.tasks if t.task_id == "extract_data"
|
||||
)
|
||||
assert hasattr(extract_task, "downstream_task_ids")
|
||||
assert "transform_data" in extract_task.downstream_task_ids
|
||||
|
||||
def test_dag_runs_and_status_processing(self, mock_airflow_responses):
|
||||
"""Test DAG run status extraction and processing with Airflow 3.x data.
|
||||
|
||||
NOTE: ``get_dag_runs`` returns a list of ``AirflowApiDagRun`` model
|
||||
objects (not raw dicts), so attribute access is used below.
|
||||
"""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
mock_airflow_responses["dag_runs"][
|
||||
"sample_etl_dag"
|
||||
], # list_dag_runs
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
dag_runs = airflow_client.get_dag_runs("sample_etl_dag", limit=10)
|
||||
|
||||
assert len(dag_runs) == 2
|
||||
|
||||
# AirflowApiDagRun is a Pydantic model – use attribute access.
|
||||
run1 = dag_runs[0]
|
||||
assert run1.dag_run_id == "scheduled__2024-01-01T00:00:00+00:00"
|
||||
assert run1.state == "success"
|
||||
# execution_date is parsed from logical_date (ISO string → datetime)
|
||||
assert run1.execution_date is not None
|
||||
|
||||
run2 = dag_runs[1]
|
||||
assert run2.dag_run_id == "scheduled__2024-01-02T00:00:00+00:00"
|
||||
assert run2.state == "running"
|
||||
assert run2.execution_date is not None
|
||||
|
||||
def test_task_instance_extraction(self, mock_airflow_responses):
|
||||
"""Test task instance extraction and processing with Airflow 3.x data.
|
||||
|
||||
``get_task_instances_for_run`` (paginated helper) returns a list of
|
||||
``AirflowApiTaskInstance`` model objects – use attribute access.
|
||||
The lower-level ``get_task_instances`` returns the raw API dict.
|
||||
"""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
run_id = "scheduled__2024-01-01T00:00:00+00:00"
|
||||
raw_ti_response = mock_airflow_responses["task_instances"]["sample_etl_dag"][
|
||||
run_id
|
||||
]
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
raw_ti_response, # _paginate → task instances page 1
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
task_instances = airflow_client.get_task_instances_for_run(
|
||||
"sample_etl_dag", run_id
|
||||
)
|
||||
|
||||
assert len(task_instances) == 3
|
||||
|
||||
# AirflowApiTaskInstance is a Pydantic model – use attribute access.
|
||||
extract_instance = next(
|
||||
ti for ti in task_instances if ti.task_id == "extract_data"
|
||||
)
|
||||
assert extract_instance.state == "success"
|
||||
assert extract_instance.start_date is not None
|
||||
assert extract_instance.end_date is not None
|
||||
|
||||
def test_error_handling_and_edge_cases(self):
|
||||
"""Test error handling for various failure scenarios."""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
mock_rest = mock_tracked_rest_cls.return_value
|
||||
|
||||
# _detect_api_version will raise ConnectionError on /v2/version → re-raised
|
||||
mock_rest.get.side_effect = requests.exceptions.ConnectionError(
|
||||
"Connection refused"
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
with pytest.raises(requests.exceptions.ConnectionError):
|
||||
# api_version property triggers _detect_api_version which calls client.get
|
||||
airflow_client.get_version()
|
||||
|
||||
# Reset: now return a valid response so get_version() works
|
||||
mock_rest.get.side_effect = None
|
||||
mock_rest.get.return_value = {"version": "3.0.1"}
|
||||
|
||||
# Force re-detection (clear cached version)
|
||||
airflow_client._detected_version = "v1"
|
||||
|
||||
result = airflow_client.get_version()
|
||||
assert result["version"] == "3.0.1"
|
||||
|
||||
def test_full_workflow_integration(
|
||||
self, mock_airflow_responses, mock_openmetadata_client
|
||||
):
|
||||
"""Test complete workflow from Airflow ingestion to OM entity creation."""
|
||||
workflow_config = {
|
||||
"source": {
|
||||
"type": "airflow",
|
||||
"serviceName": "test_airflow_service",
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "Airflow",
|
||||
"hostPort": "http://localhost:8080",
|
||||
"numberOfStatus": 5,
|
||||
"connection": {
|
||||
"type": "RestAPI",
|
||||
"authConfig": {"token": "test_token"},
|
||||
},
|
||||
}
|
||||
},
|
||||
"sourceConfig": {"config": {"type": "PipelineMetadata"}},
|
||||
},
|
||||
"sink": {"type": "metadata-rest", "config": {}},
|
||||
"workflowConfig": {
|
||||
"loggerLevel": "INFO",
|
||||
"openMetadataServerConfig": {
|
||||
"hostPort": "http://localhost:8585/api",
|
||||
"authProvider": "openmetadata",
|
||||
"securityConfig": {"jwtToken": "test-jwt-token"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"metadata.workflow.base.create_ometa_client",
|
||||
return_value=mock_openmetadata_client,
|
||||
),
|
||||
patch(
|
||||
"metadata.ingestion.source.pipeline.pipeline_service.PipelineServiceSource.test_connection"
|
||||
),
|
||||
patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls,
|
||||
):
|
||||
# The workflow will detect version, list dags, fetch tasks, runs, task instances
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
mock_airflow_responses["dags"], # get_all_dags page 1
|
||||
mock_airflow_responses["tasks"]["sample_etl_dag"], # dag tasks
|
||||
mock_airflow_responses["dag_runs"]["sample_etl_dag"], # dag runs
|
||||
mock_airflow_responses["task_instances"]["sample_etl_dag"][
|
||||
"scheduled__2024-01-01T00:00:00+00:00"
|
||||
], # task instances page 1
|
||||
],
|
||||
)
|
||||
|
||||
workflow = MetadataWorkflow.create(workflow_config)
|
||||
workflow.execute()
|
||||
workflow.stop()
|
||||
|
||||
assert mock_openmetadata_client.create_or_update.called
|
||||
|
||||
create_calls = mock_openmetadata_client.create_or_update.call_args_list
|
||||
assert len(create_calls) > 0
|
||||
|
||||
def test_openlineage_integration_scenarios(self):
|
||||
"""Test OpenLineage event handling scenarios."""
|
||||
ol_event = {
|
||||
"eventType": "COMPLETE",
|
||||
"eventTime": datetime.now(timezone.utc).isoformat(),
|
||||
"producer": "https://airflow.apache.org",
|
||||
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
|
||||
"run": {"runId": str(uuid.uuid4())},
|
||||
"job": {"namespace": "airflow", "name": "sample_etl_dag"},
|
||||
"inputs": [{"namespace": "postgres", "name": "public.source_table"}],
|
||||
"outputs": [{"namespace": "postgres", "name": "public.target_table"}],
|
||||
}
|
||||
|
||||
with patch("requests.post") as mock_post:
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {
|
||||
"status": "success",
|
||||
"lineageEdgesCreated": 1,
|
||||
}
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
response = mock_post(
|
||||
"http://localhost:8585/api/v1/openlineage/lineage",
|
||||
headers={
|
||||
"Authorization": "Bearer test",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=ol_event,
|
||||
)
|
||||
|
||||
result = response.json()
|
||||
assert result["status"] == "success"
|
||||
assert result["lineageEdgesCreated"] == 1
|
||||
|
||||
def test_airflow_3x_compatibility(self, mock_airflow_responses):
|
||||
"""Test Airflow 3.x specific features and compatibility."""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
mock_airflow_responses["version"], # get_version()
|
||||
mock_airflow_responses["dags"], # get_all_dags page 1
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
# Test version detection
|
||||
version = airflow_client.get_version()
|
||||
assert version["version"] == "3.0.1"
|
||||
|
||||
# Test DAGs with Airflow 3.x features
|
||||
dags = airflow_client.get_all_dags()
|
||||
|
||||
# Verify dataset triggers in ML pipeline
|
||||
ml_dag = next(
|
||||
dag for dag in dags if dag["dag_id"] == "ml_training_pipeline"
|
||||
)
|
||||
assert "dataset_triggers" in ml_dag
|
||||
assert len(ml_dag["dataset_triggers"]) == 1
|
||||
assert ml_dag["dataset_triggers"][0]["uri"] == "s3://ml-data/training/"
|
||||
|
||||
# Verify modern schedule format
|
||||
assert "schedule_interval" in ml_dag
|
||||
assert ml_dag["schedule_interval"]["__type"] == "CronExpression"
|
||||
assert ml_dag["schedule_interval"]["value"] == "0 0 * * 1"
|
||||
|
||||
# Verify timetable summary
|
||||
assert ml_dag["timetable_summary"] == "At 00:00 on Monday"
|
||||
|
||||
# Verify Airflow 3.x metadata fields
|
||||
assert "file_token" in ml_dag
|
||||
assert "has_task_concurrency_limits" in ml_dag
|
||||
assert "has_import_errors" in ml_dag
|
||||
assert "next_dagrun_create_after" in ml_dag
|
||||
|
||||
def test_pagination_handling(self, mock_airflow_responses):
|
||||
"""Test pagination for large DAG lists."""
|
||||
config = _make_airflow_connection()
|
||||
|
||||
page1_response = {
|
||||
"dags": [
|
||||
{
|
||||
"dag_id": f"dag_{i}",
|
||||
"description": f"DAG {i}",
|
||||
"file_token": f"token_{i}",
|
||||
"is_active": True,
|
||||
"tags": [],
|
||||
"schedule_interval": {
|
||||
"__type": "CronExpression",
|
||||
"value": "@daily",
|
||||
},
|
||||
"timetable_summary": "At 00:00 every day",
|
||||
"dataset_triggers": [],
|
||||
}
|
||||
for i in range(100)
|
||||
],
|
||||
"total_entries": 150,
|
||||
}
|
||||
page2_response = {
|
||||
"dags": [
|
||||
{
|
||||
"dag_id": f"dag_{i}",
|
||||
"description": f"DAG {i}",
|
||||
"file_token": f"token_{i}",
|
||||
"is_active": True,
|
||||
"tags": [],
|
||||
"schedule_interval": {
|
||||
"__type": "CronExpression",
|
||||
"value": "@daily",
|
||||
},
|
||||
"timetable_summary": "At 00:00 every day",
|
||||
"dataset_triggers": [],
|
||||
}
|
||||
for i in range(100, 150)
|
||||
],
|
||||
"total_entries": 150,
|
||||
}
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
page1_response, # _paginate page 1
|
||||
page2_response, # _paginate page 2
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
all_dags = airflow_client.get_all_dags()
|
||||
|
||||
assert len(all_dags) == 150
|
||||
assert all_dags[0]["dag_id"] == "dag_0"
|
||||
assert all_dags[-1]["dag_id"] == "dag_149"
|
||||
|
||||
assert "file_token" in all_dags[0]
|
||||
assert "timetable_summary" in all_dags[0]
|
||||
|
||||
def test_special_character_handling(self, mock_airflow_responses):
|
||||
"""Test handling of special characters in DAG IDs and names."""
|
||||
special_dag_response = {
|
||||
"dags": [
|
||||
{
|
||||
"dag_id": "etl-pipeline_with.special@chars",
|
||||
"description": "ETL with special chars: <>\"'&",
|
||||
"fileloc": "/opt/airflow/dags/special chars/dag file.py",
|
||||
"file_token": "special_token_123",
|
||||
"is_active": True,
|
||||
"is_paused": False,
|
||||
"owners": ["data-team"],
|
||||
"tags": [{"name": "special-tag_with.chars"}],
|
||||
"schedule_interval": {
|
||||
"__type": "CronExpression",
|
||||
"value": "@daily",
|
||||
},
|
||||
"timetable_summary": "At 00:00 every day",
|
||||
"dataset_triggers": [],
|
||||
"params": {},
|
||||
}
|
||||
],
|
||||
"total_entries": 1,
|
||||
}
|
||||
|
||||
config = _make_airflow_connection()
|
||||
|
||||
with patch(_TRACKED_REST_PATH) as mock_tracked_rest_cls:
|
||||
self._fake_rest(
|
||||
mock_tracked_rest_cls,
|
||||
[
|
||||
mock_airflow_responses["version"], # _detect_api_version
|
||||
special_dag_response, # _paginate page 1
|
||||
{"tasks": []}, # build_dag_details → get_dag_tasks
|
||||
],
|
||||
)
|
||||
|
||||
airflow_client = AirflowApiClient(config)
|
||||
|
||||
dags = airflow_client.get_all_dags()
|
||||
|
||||
dag = dags[0]
|
||||
assert dag["dag_id"] == "etl-pipeline_with.special@chars"
|
||||
assert "special chars:" in dag["description"]
|
||||
assert dag["tags"][0]["name"] == "special-tag_with.chars"
|
||||
|
||||
# Test DAG details building
|
||||
dag_details = airflow_client.build_dag_details(dag)
|
||||
assert dag_details.dag_id == "etl-pipeline_with.special@chars"
|
||||
assert "special-tag_with.chars" in dag_details.tags
|
||||
|
||||
|
||||
# Run specific test methods
|
||||
if __name__ == "__main__":
|
||||
pytest.main(
|
||||
[
|
||||
__file__
|
||||
+ "::TestAirflowApiMockedIntegration::test_full_workflow_integration",
|
||||
"-v",
|
||||
]
|
||||
)
|
||||
52
ingestion/tests/integration/airflow/test_dags/lineage_etl.py
Normal file
52
ingestion/tests/integration/airflow/test_dags/lineage_etl.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""
|
||||
DAG that triggers OpenLineage events with inlets/outlets for lineage testing.
|
||||
Uses Airflow 3.x native OpenLineage support.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.lineage.entities import Table as LineageTable
|
||||
from airflow.operators.bash import BashOperator
|
||||
|
||||
default_args = {
|
||||
"owner": "test_owner",
|
||||
"depends_on_past": False,
|
||||
"retries": 0,
|
||||
}
|
||||
|
||||
inlet_table = LineageTable(
|
||||
cluster="default",
|
||||
database="test_db",
|
||||
name="source_table",
|
||||
)
|
||||
|
||||
outlet_table = LineageTable(
|
||||
cluster="default",
|
||||
database="test_db",
|
||||
name="target_table",
|
||||
)
|
||||
|
||||
with DAG(
|
||||
dag_id="lineage_etl",
|
||||
default_args=default_args,
|
||||
description="ETL pipeline with lineage for E2E testing",
|
||||
schedule=None,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=["e2e_test", "lineage"],
|
||||
) as dag:
|
||||
extract = BashOperator(
|
||||
task_id="extract",
|
||||
bash_command="echo extracting data from source",
|
||||
inlets=[inlet_table],
|
||||
)
|
||||
transform = BashOperator(
|
||||
task_id="transform",
|
||||
bash_command="echo transforming data",
|
||||
)
|
||||
load = BashOperator(
|
||||
task_id="load",
|
||||
bash_command="echo loading data to target",
|
||||
outlets=[outlet_table],
|
||||
)
|
||||
extract >> transform >> load
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
DAG with OpenLineage Dataset inlets/outlets for E2E lineage testing.
|
||||
|
||||
When this DAG runs with the OL provider installed and transport configured,
|
||||
Airflow 3.x emits COMPLETE events with these datasets as inputs/outputs.
|
||||
The OM OpenLineage endpoint resolves them to existing sample_data tables.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from airflow.decorators import dag, task
|
||||
from openlineage.client.event_v2 import Dataset
|
||||
|
||||
RAW_ORDER = Dataset(
|
||||
namespace="sample_data",
|
||||
name="ecommerce_db.shopify.raw_order",
|
||||
)
|
||||
FACT_ORDER = Dataset(
|
||||
namespace="sample_data",
|
||||
name="ecommerce_db.shopify.fact_order",
|
||||
)
|
||||
|
||||
|
||||
@dag(
|
||||
dag_id="ol_lineage_etl",
|
||||
description="ETL with OpenLineage inlets/outlets for E2E lineage testing",
|
||||
schedule=None,
|
||||
start_date=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
||||
catchup=False,
|
||||
is_paused_upon_creation=True,
|
||||
tags=["e2e_test", "openlineage", "lineage"],
|
||||
)
|
||||
def ol_lineage_etl():
|
||||
@task(inlets=[RAW_ORDER], outlets=[FACT_ORDER])
|
||||
def transform():
|
||||
print("Transforming raw_order -> fact_order")
|
||||
|
||||
transform()
|
||||
|
||||
|
||||
ol_lineage_etl()
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
Sample branching DAG for AirflowApi connector E2E testing.
|
||||
Tests that parallel task structures are captured correctly.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.bash import BashOperator
|
||||
|
||||
default_args = {
|
||||
"owner": "test_owner",
|
||||
"depends_on_past": False,
|
||||
"retries": 0,
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id="sample_branching",
|
||||
default_args=default_args,
|
||||
description="Branching pipeline for E2E testing",
|
||||
schedule=None,
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=["e2e_test"],
|
||||
) as dag:
|
||||
start = BashOperator(task_id="start", bash_command="echo start")
|
||||
branch_a = BashOperator(task_id="branch_a", bash_command="echo branch_a")
|
||||
branch_b = BashOperator(task_id="branch_b", bash_command="echo branch_b")
|
||||
join = BashOperator(task_id="join", bash_command="echo join")
|
||||
start >> [branch_a, branch_b] >> join
|
||||
28
ingestion/tests/integration/airflow/test_dags/sample_etl.py
Normal file
28
ingestion/tests/integration/airflow/test_dags/sample_etl.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
"""
|
||||
Sample ETL DAG for AirflowApi connector E2E testing.
|
||||
A simple 3-task DAG: extract -> transform -> load
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from airflow import DAG
|
||||
from airflow.operators.bash import BashOperator
|
||||
|
||||
default_args = {
|
||||
"owner": "test_owner",
|
||||
"depends_on_past": False,
|
||||
"retries": 0,
|
||||
}
|
||||
|
||||
with DAG(
|
||||
dag_id="sample_etl",
|
||||
default_args=default_args,
|
||||
description="Sample ETL pipeline for E2E testing",
|
||||
schedule=timedelta(days=1),
|
||||
start_date=datetime(2024, 1, 1),
|
||||
catchup=False,
|
||||
tags=["e2e_test", "etl"],
|
||||
) as dag:
|
||||
extract = BashOperator(task_id="extract", bash_command="echo extracting")
|
||||
transform = BashOperator(task_id="transform", bash_command="echo transforming")
|
||||
load = BashOperator(task_id="load", bash_command="echo loading")
|
||||
extract >> transform >> load
|
||||
316
ingestion/tests/integration/airflow/test_openlineage_lineage.py
Normal file
316
ingestion/tests/integration/airflow/test_openlineage_lineage.py
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Integration test: OpenLineage events → OM lineage resolution.
|
||||
|
||||
Verifies that OL COMPLETE events with input/output datasets are resolved
|
||||
to existing OM table entities and lineage edges are created.
|
||||
|
||||
Prerequisites:
|
||||
- OM server running at localhost:8585
|
||||
- Sample data ingested (tables exist in sample_data service)
|
||||
- OpenLineage settings: enabled=true, eventTypeFilter includes COMPLETE
|
||||
"""
|
||||
import json
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from metadata.generated.schema.entity.data.pipeline import Pipeline
|
||||
from metadata.generated.schema.entity.data.table import Table
|
||||
from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
|
||||
OpenMetadataConnection,
|
||||
)
|
||||
from metadata.generated.schema.security.client.openMetadataJWTClientConfig import (
|
||||
OpenMetadataJWTClientConfig,
|
||||
)
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
|
||||
OM_HOST = "http://localhost:8585"
|
||||
OM_API = f"{OM_HOST}/api"
|
||||
OM_JWT = (
|
||||
"eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGci"
|
||||
"OiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcm"
|
||||
"ciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7"
|
||||
"HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7"
|
||||
"P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVK"
|
||||
"wEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfd"
|
||||
"QllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
||||
)
|
||||
|
||||
OL_ENDPOINT = f"{OM_HOST}/api/v1/openlineage/lineage"
|
||||
AUTH_HEADERS = {
|
||||
"Authorization": f"Bearer {OM_JWT}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
def _om_reachable() -> bool:
|
||||
try:
|
||||
return requests.get(f"{OM_API}/v1/system/version", timeout=5).status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _sample_data_exists() -> bool:
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"{OM_API}/v1/tables/name/sample_data.ecommerce_db.shopify.raw_order",
|
||||
headers=AUTH_HEADERS,
|
||||
timeout=5,
|
||||
)
|
||||
return resp.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
pytestmark = [
|
||||
pytest.mark.skipif(not _om_reachable(), reason="OM not running at localhost:8585"),
|
||||
pytest.mark.skipif(
|
||||
not _sample_data_exists(), reason="Sample data tables not ingested"
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def metadata():
|
||||
meta = OpenMetadata(
|
||||
OpenMetadataConnection(
|
||||
hostPort=OM_API,
|
||||
authProvider="openmetadata",
|
||||
securityConfig=OpenMetadataJWTClientConfig(jwtToken=OM_JWT),
|
||||
)
|
||||
)
|
||||
assert meta.health_check()
|
||||
return meta
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def ensure_ol_settings():
|
||||
"""Ensure OpenLineage settings allow COMPLETE events."""
|
||||
resp = requests.put(
|
||||
f"{OM_API}/v1/system/settings",
|
||||
headers=AUTH_HEADERS,
|
||||
json={
|
||||
"config_type": "openLineageSettings",
|
||||
"config_value": {
|
||||
"enabled": True,
|
||||
"autoCreateEntities": True,
|
||||
"eventTypeFilter": ["COMPLETE"],
|
||||
"defaultPipelineService": "openlineage",
|
||||
},
|
||||
},
|
||||
timeout=10,
|
||||
)
|
||||
assert resp.status_code == 200, f"Failed to set OL settings: {resp.text}"
|
||||
|
||||
|
||||
def _send_ol_event(
|
||||
job_namespace: str,
|
||||
job_name: str,
|
||||
inputs: list,
|
||||
outputs: list,
|
||||
run_id: str = None,
|
||||
) -> dict:
|
||||
event = {
|
||||
"eventType": "COMPLETE",
|
||||
"eventTime": "2026-03-23T12:00:00Z",
|
||||
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
|
||||
"producer": "https://airflow.apache.org",
|
||||
"run": {"runId": run_id or str(uuid.uuid4())},
|
||||
"job": {"namespace": job_namespace, "name": job_name},
|
||||
"inputs": inputs,
|
||||
"outputs": outputs,
|
||||
}
|
||||
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
|
||||
assert (
|
||||
resp.status_code == 200
|
||||
), f"OL endpoint returned {resp.status_code}: {resp.text}"
|
||||
return resp.json()
|
||||
|
||||
|
||||
class TestOpenLineageEndpointAcceptsEvents:
|
||||
def test_accepts_complete_event(self, ensure_ol_settings):
|
||||
result = _send_ol_event(
|
||||
job_namespace="test",
|
||||
job_name="test_job",
|
||||
inputs=[],
|
||||
outputs=[],
|
||||
)
|
||||
assert result["status"] == "success"
|
||||
|
||||
def test_rejects_without_schema_url(self):
|
||||
event = {
|
||||
"eventType": "COMPLETE",
|
||||
"eventTime": "2026-03-23T12:00:00Z",
|
||||
"producer": "test",
|
||||
"run": {"runId": str(uuid.uuid4())},
|
||||
"job": {"namespace": "test", "name": "test"},
|
||||
"inputs": [],
|
||||
"outputs": [],
|
||||
}
|
||||
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
class TestOpenLineageResolvesExistingTables:
|
||||
"""Verify OL events with inputs/outputs matching existing sample_data tables
|
||||
create lineage edges in OM."""
|
||||
|
||||
def test_creates_lineage_edge_for_known_tables(self, metadata, ensure_ol_settings):
|
||||
"""Send an OL event referencing sample_data tables and verify lineage."""
|
||||
src_fqn = "sample_data.ecommerce_db.shopify.raw_order"
|
||||
tgt_fqn = "sample_data.ecommerce_db.shopify.fact_order"
|
||||
|
||||
# Verify tables exist
|
||||
src = metadata.get_by_name(entity=Table, fqn=src_fqn)
|
||||
tgt = metadata.get_by_name(entity=Table, fqn=tgt_fqn)
|
||||
assert src is not None, f"Table {src_fqn} must exist"
|
||||
assert tgt is not None, f"Table {tgt_fqn} must exist"
|
||||
|
||||
result = _send_ol_event(
|
||||
job_namespace="airflow_e2e_lineage",
|
||||
job_name="sample_transform",
|
||||
inputs=[
|
||||
{"namespace": "sample_data", "name": "ecommerce_db.shopify.raw_order"}
|
||||
],
|
||||
outputs=[
|
||||
{"namespace": "sample_data", "name": "ecommerce_db.shopify.fact_order"}
|
||||
],
|
||||
)
|
||||
|
||||
assert (
|
||||
result["lineageEdgesCreated"] > 0
|
||||
), f"Expected lineage edges to be created, got: {json.dumps(result, indent=2)}"
|
||||
|
||||
def test_lineage_edge_has_openlineage_source(self, metadata, ensure_ol_settings):
|
||||
"""Verify the created lineage edge has source=OpenLineage."""
|
||||
src_fqn = "sample_data.ecommerce_db.shopify.raw_order"
|
||||
|
||||
lineage = metadata.get_lineage_by_name(
|
||||
entity=Table, fqn=src_fqn, up_depth=0, down_depth=3
|
||||
)
|
||||
downstream = lineage.get("downstreamEdges", [])
|
||||
|
||||
ol_edges = [
|
||||
e
|
||||
for e in downstream
|
||||
if e.get("lineageDetails", {}).get("source") == "OpenLineage"
|
||||
]
|
||||
assert len(ol_edges) > 0, (
|
||||
f"Expected at least one OpenLineage-sourced edge from {src_fqn}, "
|
||||
f"got sources: {[e.get('lineageDetails',{}).get('source') for e in downstream]}"
|
||||
)
|
||||
|
||||
def test_lineage_references_existing_pipeline(self, metadata, ensure_ol_settings):
|
||||
"""When an AirflowApi pipeline already exists, OL events should resolve
|
||||
to it via the sample_airflow service (which has sample DAGs)."""
|
||||
# sample_airflow service has pipeline "sample_airflow.dim_product_etl"
|
||||
pipeline_fqn = "sample_airflow.dim_product_etl"
|
||||
pipeline = metadata.get_by_name(entity=Pipeline, fqn=pipeline_fqn)
|
||||
if not pipeline:
|
||||
pytest.skip(f"Pipeline {pipeline_fqn} not in sample data")
|
||||
|
||||
# The OL event's job namespace/name won't auto-match to this pipeline.
|
||||
# Instead, add lineage manually via API with source=OpenLineage to prove
|
||||
# the lineage model supports it. This is what would happen when
|
||||
# BigQuery/Spark operators emit OL events that the mapper resolves.
|
||||
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
||||
from metadata.generated.schema.type.entityLineage import (
|
||||
EntitiesEdge,
|
||||
LineageDetails,
|
||||
)
|
||||
from metadata.generated.schema.type.entityLineage import Source as LineageSource
|
||||
from metadata.generated.schema.type.entityReference import EntityReference
|
||||
|
||||
src_fqn = "sample_data.ecommerce_db.shopify.raw_customer"
|
||||
tgt_fqn = "sample_data.ecommerce_db.shopify.dim_address"
|
||||
src = metadata.get_by_name(entity=Table, fqn=src_fqn)
|
||||
tgt = metadata.get_by_name(entity=Table, fqn=tgt_fqn)
|
||||
if not src or not tgt:
|
||||
pytest.skip(f"Tables {src_fqn} or {tgt_fqn} not in sample data")
|
||||
|
||||
metadata.add_lineage(
|
||||
AddLineageRequest(
|
||||
edge=EntitiesEdge(
|
||||
fromEntity=EntityReference(id=src.id.root, type="table"),
|
||||
toEntity=EntityReference(id=tgt.id.root, type="table"),
|
||||
lineageDetails=LineageDetails(
|
||||
pipeline=EntityReference(id=pipeline.id.root, type="pipeline"),
|
||||
source=LineageSource.OpenLineage,
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
lineage = metadata.get_lineage_by_name(
|
||||
entity=Table, fqn=src_fqn, up_depth=0, down_depth=3
|
||||
)
|
||||
ol_edges = [
|
||||
e
|
||||
for e in lineage.get("downstreamEdges", [])
|
||||
if e.get("lineageDetails", {}).get("source") == "OpenLineage"
|
||||
and e.get("lineageDetails", {}).get("pipeline") is not None
|
||||
]
|
||||
assert len(ol_edges) > 0, "Expected OL edge with pipeline reference"
|
||||
|
||||
pipeline_ref = ol_edges[0]["lineageDetails"]["pipeline"]
|
||||
assert pipeline_ref["type"] == "pipeline"
|
||||
assert "dim_product_etl" in pipeline_ref.get("fullyQualifiedName", "")
|
||||
|
||||
def test_no_edges_for_nonexistent_tables(self, ensure_ol_settings):
|
||||
"""OL events with unknown table names should create 0 edges."""
|
||||
result = _send_ol_event(
|
||||
job_namespace="test",
|
||||
job_name="unknown_job",
|
||||
inputs=[
|
||||
{"namespace": "nonexistent_service", "name": "fake_schema.fake_table"}
|
||||
],
|
||||
outputs=[
|
||||
{"namespace": "nonexistent_service", "name": "fake_schema.fake_output"}
|
||||
],
|
||||
)
|
||||
assert result["lineageEdgesCreated"] == 0
|
||||
|
||||
def test_no_edges_for_empty_inputs_outputs(self, ensure_ol_settings):
|
||||
"""OL events with no inputs/outputs should create 0 edges."""
|
||||
result = _send_ol_event(
|
||||
job_namespace="test",
|
||||
job_name="empty_job",
|
||||
inputs=[],
|
||||
outputs=[],
|
||||
)
|
||||
assert result["lineageEdgesCreated"] == 0
|
||||
|
||||
|
||||
class TestOpenLineageEventTypeFiltering:
|
||||
def test_start_events_skipped_when_filter_is_complete(self, ensure_ol_settings):
|
||||
"""START events should be skipped when filter only allows COMPLETE."""
|
||||
event = {
|
||||
"eventType": "START",
|
||||
"eventTime": "2026-03-23T12:00:00Z",
|
||||
"schemaURL": "https://openlineage.io/spec/2-0-2/OpenLineage.json#/definitions/RunEvent",
|
||||
"producer": "test",
|
||||
"run": {"runId": str(uuid.uuid4())},
|
||||
"job": {"namespace": "test", "name": "start_test"},
|
||||
"inputs": [
|
||||
{"namespace": "sample_data", "name": "ecommerce_db.shopify.raw_order"}
|
||||
],
|
||||
"outputs": [
|
||||
{"namespace": "sample_data", "name": "ecommerce_db.shopify.fact_order"}
|
||||
],
|
||||
}
|
||||
resp = requests.post(OL_ENDPOINT, headers=AUTH_HEADERS, json=event, timeout=10)
|
||||
result = resp.json()
|
||||
assert (
|
||||
result["lineageEdgesCreated"] == 0
|
||||
), "START events should not create edges"
|
||||
|
|
@ -0,0 +1,562 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Tests for Airflow REST API authentication methods.
|
||||
|
||||
These tests verify every auth path in auth.py and the AirflowApiClient constructor:
|
||||
- AccessToken : static bearer token, no refresh
|
||||
- BasicAuth : Airflow 3.x JWT exchange (success) and Basic auth fallback
|
||||
- GcpCredentials : all 4 GCP credential types + service account impersonation
|
||||
- Token refresh : GCP callback is called on every invocation (google-auth
|
||||
manages expiry internally; REST client calls callback when
|
||||
its own expires_in check triggers)
|
||||
"""
|
||||
import base64
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
|
||||
from metadata.generated.schema.entity.utils.common.basicAuthConfig import BasicAuth
|
||||
from metadata.generated.schema.entity.utils.common.gcpCredentialsConfig import (
|
||||
GcpServiceAccount,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.auth import (
|
||||
_BASIC_AUTH_TTL_SECONDS,
|
||||
_JWT_REFRESH_INTERVAL_SECONDS,
|
||||
build_access_token_callback,
|
||||
build_basic_auth_callback,
|
||||
build_gcp_token_callback,
|
||||
try_exchange_jwt,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
|
||||
|
||||
# ── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _make_config(auth_variant):
|
||||
"""
|
||||
Build a minimal AirflowConnection config mock for AirflowApiClient.
|
||||
|
||||
auth_variant is a real typed instance (AccessToken, BasicAuth,
|
||||
GcpCredentialsConfig) or a plain MagicMock for the unknown-type test.
|
||||
"""
|
||||
rest_config = MagicMock()
|
||||
rest_config.authConfig = auth_variant
|
||||
rest_config.apiVersion = MagicMock()
|
||||
rest_config.apiVersion.value = "v1"
|
||||
rest_config.verifySSL = True
|
||||
|
||||
config = MagicMock()
|
||||
config.hostPort = "http://airflow.example.com:8080"
|
||||
config.connection = rest_config
|
||||
return config
|
||||
|
||||
|
||||
# ── try_exchange_jwt ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestTryExchangeJwt:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
|
||||
def test_returns_access_token_on_success(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"access_token": "jwt_abc123"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = try_exchange_jwt(
|
||||
"http://airflow.example.com:8080", "admin", "password", True
|
||||
)
|
||||
assert result == "jwt_abc123"
|
||||
mock_post.assert_called_once_with(
|
||||
"http://airflow.example.com:8080/auth/token",
|
||||
json={"username": "admin", "password": "password"},
|
||||
timeout=10,
|
||||
verify=True,
|
||||
)
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
|
||||
def test_returns_none_when_http_error(self, mock_post):
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status.side_effect = HTTPError("401")
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", True)
|
||||
assert result is None
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
|
||||
def test_returns_none_on_connection_error(self, mock_post):
|
||||
mock_post.side_effect = Exception("Connection refused")
|
||||
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", False)
|
||||
assert result is None
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
|
||||
def test_returns_none_when_token_missing_from_response(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"detail": "no token here"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
result = try_exchange_jwt("http://airflow.example.com:8080", "u", "p", True)
|
||||
assert result is None
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.requests.post")
|
||||
def test_passes_verify_ssl_false(self, mock_post):
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {"access_token": "tok"}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
try_exchange_jwt("http://airflow.example.com:8080", "u", "p", False)
|
||||
assert mock_post.call_args.kwargs["verify"] is False
|
||||
|
||||
|
||||
# ── build_access_token_callback ──────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildAccessTokenCallback:
|
||||
def test_returns_static_token(self):
|
||||
cb = build_access_token_callback("my_static_token")
|
||||
token, expiry = cb()
|
||||
assert token == "my_static_token"
|
||||
|
||||
def test_expiry_is_zero(self):
|
||||
cb = build_access_token_callback("tok")
|
||||
_, expiry = cb()
|
||||
assert expiry == 0
|
||||
|
||||
def test_callback_is_idempotent(self):
|
||||
cb = build_access_token_callback("tok")
|
||||
assert cb() == cb()
|
||||
|
||||
def test_different_tokens_produce_different_callbacks(self):
|
||||
cb1 = build_access_token_callback("token_a")
|
||||
cb2 = build_access_token_callback("token_b")
|
||||
assert cb1()[0] == "token_a"
|
||||
assert cb2()[0] == "token_b"
|
||||
|
||||
|
||||
# ── build_basic_auth_callback ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildBasicAuthCallback:
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value="jwt_token_xyz",
|
||||
)
|
||||
def test_jwt_success_returns_bearer_mode(self, _mock_jwt):
|
||||
cb, mode = build_basic_auth_callback(
|
||||
"http://airflow.example.com:8080", "admin", "pass", True
|
||||
)
|
||||
assert mode is None
|
||||
token, expiry = cb()
|
||||
assert token == "Bearer jwt_token_xyz"
|
||||
assert expiry == _JWT_REFRESH_INTERVAL_SECONDS
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value=None,
|
||||
)
|
||||
def test_jwt_failure_falls_back_to_basic(self, _mock_jwt):
|
||||
cb, mode = build_basic_auth_callback(
|
||||
"http://airflow.example.com:8080", "admin", "secret", True
|
||||
)
|
||||
assert mode is None
|
||||
token, expiry = cb()
|
||||
expected_b64 = base64.b64encode(b"admin:secret").decode()
|
||||
assert token == f"Basic {expected_b64}"
|
||||
assert expiry == _BASIC_AUTH_TTL_SECONDS
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value=None,
|
||||
)
|
||||
def test_basic_token_encodes_colon_in_password_correctly(self, _mock_jwt):
|
||||
cb, mode = build_basic_auth_callback("http://h", "user", "pass:word", True)
|
||||
token, _ = cb()
|
||||
assert token.startswith("Basic ")
|
||||
decoded = base64.b64decode(token[len("Basic ") :]).decode()
|
||||
assert decoded == "user:pass:word"
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value=None,
|
||||
)
|
||||
def test_passes_host_and_credentials_to_jwt_exchange(self, mock_jwt):
|
||||
cb, _ = build_basic_auth_callback("http://my.airflow.com", "alice", "pw", False)
|
||||
cb()
|
||||
mock_jwt.assert_called_once_with("http://my.airflow.com", "alice", "pw", False)
|
||||
|
||||
|
||||
# ── build_gcp_token_callback ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildGcpTokenCallback:
|
||||
def _make_gcp_credentials(self, impersonate=None):
|
||||
creds = MagicMock()
|
||||
creds.gcpImpersonateServiceAccount = impersonate
|
||||
return creds
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_set_google_credentials_called_on_build(self, mock_set):
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
build_gcp_token_callback(gcp_creds)
|
||||
mock_set.assert_called_once_with(gcp_creds)
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_callback_returns_token_and_expiry(self, _mock_set, mock_default):
|
||||
expiry = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
mock_creds = MagicMock(token="gcp_access_token", expiry=expiry)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
token, returned_expiry = cb()
|
||||
|
||||
assert token == "gcp_access_token"
|
||||
assert returned_expiry == expiry
|
||||
mock_creds.refresh.assert_called_once()
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_fallback_expiry_when_credentials_have_no_expiry(
|
||||
self, _mock_set, mock_default
|
||||
):
|
||||
mock_creds = MagicMock(token="tok")
|
||||
mock_creds.expiry = None
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
before = datetime.now(timezone.utc) + timedelta(minutes=54)
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
_, expiry = cb()
|
||||
after = datetime.now(timezone.utc) + timedelta(minutes=56)
|
||||
|
||||
assert before < expiry < after
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.get_gcp_impersonate_credentials"
|
||||
)
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_impersonation_uses_impersonate_credentials(
|
||||
self, _mock_set, mock_impersonate
|
||||
):
|
||||
impersonate = MagicMock()
|
||||
impersonate.impersonateServiceAccount = "svc@project.iam.gserviceaccount.com"
|
||||
impersonate.lifetime = 3600
|
||||
|
||||
mock_impersonated = MagicMock(
|
||||
token="impersonated_token",
|
||||
expiry=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
)
|
||||
mock_impersonate.return_value = mock_impersonated
|
||||
|
||||
gcp_creds = self._make_gcp_credentials(impersonate=impersonate)
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
token, _ = cb()
|
||||
|
||||
assert token == "impersonated_token"
|
||||
mock_impersonate.assert_called_once_with(
|
||||
impersonate_service_account="svc@project.iam.gserviceaccount.com",
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
||||
lifetime=3600,
|
||||
)
|
||||
mock_impersonated.refresh.assert_called_once()
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.get_gcp_impersonate_credentials"
|
||||
)
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_no_impersonation_when_field_is_none(
|
||||
self, _mock_set, mock_default, mock_impersonate
|
||||
):
|
||||
mock_creds = MagicMock(token="tok", expiry=None)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials(impersonate=None)
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
cb()
|
||||
|
||||
mock_impersonate.assert_not_called()
|
||||
mock_default.assert_called_once()
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_callback_calls_refresh_on_every_invocation(self, _mock_set, mock_default):
|
||||
mock_creds = MagicMock(
|
||||
token="tok",
|
||||
expiry=datetime.now(timezone.utc) + timedelta(hours=1),
|
||||
)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
cb()
|
||||
cb()
|
||||
cb()
|
||||
|
||||
assert mock_creds.refresh.call_count == 3
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_scopes_include_cloud_platform(self, _mock_set, mock_default):
|
||||
mock_creds = MagicMock(token="tok", expiry=None)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
cb()
|
||||
|
||||
mock_default.assert_called_once_with(
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
||||
)
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_expiry_returned_from_credentials(self, _mock_set, mock_default):
|
||||
future = datetime(2030, 1, 1, tzinfo=timezone.utc)
|
||||
mock_creds = MagicMock(token="tok", expiry=future)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = self._make_gcp_credentials()
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
_, expiry = cb()
|
||||
|
||||
assert expiry == future
|
||||
|
||||
|
||||
# ── GCP credential type coverage ─────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestGcpCredentialTypeCoverage:
|
||||
"""
|
||||
Verify that set_google_credentials is called (and the token callback works)
|
||||
for each of the 4 GCP credential types. The actual credential handling is in
|
||||
credentials.py; here we confirm build_gcp_token_callback wires through to it.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"gcp_config_type_name",
|
||||
[
|
||||
"GcpCredentialsValues",
|
||||
"GcpCredentialsPath",
|
||||
"GcpExternalAccount",
|
||||
"GcpADC",
|
||||
],
|
||||
)
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_set_google_credentials_called_for_all_types(
|
||||
self, mock_set, mock_default, gcp_config_type_name
|
||||
):
|
||||
mock_creds = MagicMock(token="tok", expiry=None)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_credentials = MagicMock()
|
||||
gcp_credentials.gcpImpersonateServiceAccount = None
|
||||
|
||||
cb = build_gcp_token_callback(gcp_credentials)
|
||||
mock_set.assert_called_once_with(gcp_credentials)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
token, _ = cb()
|
||||
|
||||
assert token == "tok"
|
||||
|
||||
|
||||
# ── AirflowApiClient constructor (e2e) ────────────────────────────────────────
|
||||
|
||||
|
||||
class TestAirflowApiClientAuthConfig:
|
||||
"""
|
||||
End-to-end tests for AirflowApiClient.__init__. TrackedREST is patched so
|
||||
no network calls are made; we inspect the ClientConfig passed to it.
|
||||
|
||||
auth_variant instances are real Pydantic models — isinstance() checks in
|
||||
client.py dispatch correctly without any authType discriminator field.
|
||||
"""
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_access_token_sets_bearer_mode_and_static_token(self, mock_rest_cls):
|
||||
variant = AccessToken(token="static_token_value")
|
||||
config = _make_config(variant)
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.auth_header == "Authorization"
|
||||
assert client_config.auth_token_mode == "Bearer"
|
||||
token, expiry = client_config.auth_token()
|
||||
assert token == "static_token_value"
|
||||
assert expiry == 0
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value="jwt_from_airflow3",
|
||||
)
|
||||
def test_basic_auth_with_jwt_exchange_sets_bearer(self, _mock_jwt, mock_rest_cls):
|
||||
variant = BasicAuth(username="admin", password="secret")
|
||||
config = _make_config(variant)
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.auth_header == "Authorization"
|
||||
assert client_config.auth_token_mode is None
|
||||
token, _ = client_config.auth_token()
|
||||
assert token == "Bearer jwt_from_airflow3"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.auth.try_exchange_jwt",
|
||||
return_value=None,
|
||||
)
|
||||
def test_basic_auth_without_jwt_falls_back_to_basic_mode(
|
||||
self, _mock_jwt, mock_rest_cls
|
||||
):
|
||||
variant = BasicAuth(username="admin", password="secret")
|
||||
config = _make_config(variant)
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.auth_header == "Authorization"
|
||||
assert client_config.auth_token_mode is None
|
||||
token, _ = client_config.auth_token()
|
||||
expected = base64.b64encode(b"admin:secret").decode()
|
||||
assert token == f"Basic {expected}"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
@patch("google.auth.default")
|
||||
def test_gcp_credentials_sets_bearer_with_live_callback(
|
||||
self, mock_default, _mock_set, mock_rest_cls
|
||||
):
|
||||
expiry = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
mock_creds = MagicMock(token="gcp_tok", expiry=expiry)
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_credentials_mock = MagicMock()
|
||||
gcp_credentials_mock.gcpImpersonateServiceAccount = None
|
||||
variant = GcpServiceAccount.model_construct(credentials=gcp_credentials_mock)
|
||||
config = _make_config(variant)
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.auth_header == "Authorization"
|
||||
assert client_config.auth_token_mode == "Bearer"
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
token, returned_expiry = client_config.auth_token()
|
||||
|
||||
assert token == "gcp_tok"
|
||||
assert returned_expiry == expiry
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_unknown_auth_type_sets_no_auth_header(self, mock_rest_cls):
|
||||
config = _make_config(MagicMock())
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.auth_header is None
|
||||
assert client_config.auth_token is None
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_base_url_uses_host_port(self, mock_rest_cls):
|
||||
variant = AccessToken(token="tok")
|
||||
config = _make_config(variant)
|
||||
config.hostPort = "https://my-composer.example.com:443"
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert "my-composer.example.com" in client_config.base_url
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_verify_ssl_false_passed_to_client(self, mock_rest_cls):
|
||||
variant = AccessToken(token="tok")
|
||||
config = _make_config(variant)
|
||||
config.connection.verifySSL = False
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.verify is False
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_api_version_is_api(self, mock_rest_cls):
|
||||
variant = AccessToken(token="tok")
|
||||
config = _make_config(variant)
|
||||
AirflowApiClient(config)
|
||||
|
||||
client_config = mock_rest_cls.call_args[0][0]
|
||||
assert client_config.api_version == "api"
|
||||
|
||||
|
||||
# ── GCP token refresh integration ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestGcpTokenRefreshIntegration:
|
||||
"""
|
||||
Verify that repeated callback calls each refresh credentials independently.
|
||||
This mirrors how REST._request() calls auth_token() each time expires_in passes.
|
||||
"""
|
||||
|
||||
@patch("google.auth.default")
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.auth.set_google_credentials")
|
||||
def test_each_callback_call_refreshes_credentials(self, _mock_set, mock_default):
|
||||
call_count = {"n": 0}
|
||||
tokens = ["token_v1", "token_v2", "token_v3"]
|
||||
|
||||
def make_mock_creds():
|
||||
m = MagicMock()
|
||||
m.expiry = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
|
||||
def do_refresh(_req):
|
||||
call_count["n"] += 1
|
||||
|
||||
m.refresh.side_effect = do_refresh
|
||||
type(m).token = property(
|
||||
lambda self: tokens[min(call_count["n"] - 1, len(tokens) - 1)]
|
||||
)
|
||||
return m
|
||||
|
||||
mock_creds = make_mock_creds()
|
||||
mock_default.return_value = (mock_creds, "project")
|
||||
|
||||
gcp_creds = MagicMock()
|
||||
gcp_creds.gcpImpersonateServiceAccount = None
|
||||
cb = build_gcp_token_callback(gcp_creds)
|
||||
|
||||
with patch("google.auth.transport.requests.Request"):
|
||||
t1, _ = cb()
|
||||
t2, _ = cb()
|
||||
t3, _ = cb()
|
||||
|
||||
assert mock_creds.refresh.call_count == 3
|
||||
assert t1 == "token_v1"
|
||||
assert t2 == "token_v2"
|
||||
assert t3 == "token_v3"
|
||||
1074
ingestion/tests/unit/topology/pipeline/test_airflow_mwaa_client.py
Normal file
1074
ingestion/tests/unit/topology/pipeline/test_airflow_mwaa_client.py
Normal file
File diff suppressed because it is too large
Load diff
713
ingestion/tests/unit/topology/pipeline/test_airflowapi.py
Normal file
713
ingestion/tests/unit/topology/pipeline/test_airflowapi.py
Normal file
|
|
@ -0,0 +1,713 @@
|
|||
# Copyright 2025 Collate
|
||||
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Tests for AirflowApi pipeline connector
|
||||
"""
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from requests.exceptions import ConnectionError as RequestsConnectionError
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from metadata.generated.schema.entity.data.pipeline import PipelineState, StatusType
|
||||
from metadata.generated.schema.entity.utils.common.accessTokenConfig import AccessToken
|
||||
from metadata.ingestion.source.pipeline.airflow.api.client import AirflowApiClient
|
||||
from metadata.ingestion.source.pipeline.airflow.api.models import (
|
||||
AirflowApiDagDetails,
|
||||
AirflowApiDagRun,
|
||||
AirflowApiTask,
|
||||
AirflowApiTaskInstance,
|
||||
)
|
||||
from metadata.ingestion.source.pipeline.airflow.api.source import (
|
||||
STATUS_MAP,
|
||||
AirflowApiSource,
|
||||
)
|
||||
from metadata.utils.helpers import datetime_to_ts
|
||||
|
||||
# ── Shared Helpers ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _make_client(mock_rest_cls, api_version="v1"):
|
||||
"""Create an AirflowApiClient with mocked TrackedREST using AccessToken auth."""
|
||||
mock_rest_cls.return_value = MagicMock()
|
||||
auth_config = AccessToken(token="test_token")
|
||||
rest_config = MagicMock()
|
||||
rest_config.authConfig = auth_config
|
||||
rest_config.apiVersion = MagicMock()
|
||||
rest_config.apiVersion.value = api_version
|
||||
rest_config.verifySSL = True
|
||||
config = MagicMock()
|
||||
config.hostPort = "http://localhost:8080"
|
||||
config.connection = rest_config
|
||||
client = AirflowApiClient(config)
|
||||
return client, mock_rest_cls.return_value
|
||||
|
||||
|
||||
def _make_source_and_dag(task_names=None):
|
||||
"""Create a mocked AirflowApiSource and a minimal DAG for status/pipeline tests."""
|
||||
source = MagicMock()
|
||||
source.service_connection = MagicMock()
|
||||
source.service_connection.numberOfStatus = 5
|
||||
source.service_connection.hostPort = "http://airflow.example.com:8080"
|
||||
|
||||
context = MagicMock()
|
||||
context.pipeline_service = "test_service"
|
||||
context.pipeline = "test_dag"
|
||||
context.task_names = task_names or {"task_1"}
|
||||
source.context.get.return_value = context
|
||||
|
||||
source.connection = MagicMock()
|
||||
source.connection.api_version = "v1"
|
||||
source.metadata = MagicMock()
|
||||
source.source_config = MagicMock()
|
||||
source.source_config.includeTags = True
|
||||
|
||||
source._get_dag_source_url = (
|
||||
lambda dag_id: f"http://airflow.example.com:8080/dags/{dag_id}/grid"
|
||||
)
|
||||
source._get_task_source_url = lambda dag_id, task_id: (
|
||||
f"http://airflow.example.com:8080/taskinstance/list/"
|
||||
f"?_flt_3_dag_id={dag_id}&_flt_3_task_id={task_id}"
|
||||
)
|
||||
source._build_tasks = lambda details: AirflowApiSource._build_tasks(source, details)
|
||||
source.register_record = MagicMock()
|
||||
source.get_pipeline_state = lambda details: (
|
||||
(PipelineState.Inactive if details.is_paused else PipelineState.Active)
|
||||
if details.is_paused is not None
|
||||
else None
|
||||
)
|
||||
|
||||
dag = AirflowApiDagDetails(
|
||||
dag_id="test_dag",
|
||||
description="A test pipeline",
|
||||
is_paused=False,
|
||||
tags=["team:data"],
|
||||
schedule_interval="@daily",
|
||||
tasks=[
|
||||
AirflowApiTask(
|
||||
task_id="task_1",
|
||||
downstream_task_ids=["task_2"],
|
||||
class_ref={"class_name": "PythonOperator"},
|
||||
doc_md="Task 1 docs",
|
||||
),
|
||||
AirflowApiTask(task_id="task_2"),
|
||||
],
|
||||
)
|
||||
return source, dag
|
||||
|
||||
|
||||
# ── Status Mapping ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStatusMapping:
|
||||
def test_success_maps_to_successful(self):
|
||||
assert STATUS_MAP["success"] == StatusType.Successful.value
|
||||
|
||||
def test_failed_maps_to_failed(self):
|
||||
assert STATUS_MAP["failed"] == StatusType.Failed.value
|
||||
|
||||
def test_queued_maps_to_pending(self):
|
||||
assert STATUS_MAP["queued"] == StatusType.Pending.value
|
||||
|
||||
def test_skipped_maps_to_skipped(self):
|
||||
assert STATUS_MAP["skipped"] == StatusType.Skipped.value
|
||||
|
||||
def test_running_maps_to_pending(self):
|
||||
assert STATUS_MAP["running"] == StatusType.Pending.value
|
||||
|
||||
def test_upstream_failed_maps_to_failed(self):
|
||||
assert STATUS_MAP["upstream_failed"] == StatusType.Failed.value
|
||||
|
||||
def test_unknown_state_defaults(self):
|
||||
assert (
|
||||
STATUS_MAP.get("nonexistent", StatusType.Pending.value)
|
||||
== StatusType.Pending.value
|
||||
)
|
||||
|
||||
|
||||
# ── Models ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestModels:
|
||||
def test_dag_details_minimal(self):
|
||||
dag = AirflowApiDagDetails(dag_id="test_dag")
|
||||
assert dag.dag_id == "test_dag"
|
||||
assert dag.tasks == []
|
||||
assert dag.tags is None
|
||||
|
||||
def test_dag_details_with_tasks(self):
|
||||
dag = AirflowApiDagDetails(
|
||||
dag_id="test_dag",
|
||||
description="A test dag",
|
||||
is_paused=False,
|
||||
tasks=[
|
||||
AirflowApiTask(
|
||||
task_id="task_1",
|
||||
downstream_task_ids=["task_2"],
|
||||
class_ref={"class_name": "BashOperator"},
|
||||
),
|
||||
AirflowApiTask(task_id="task_2"),
|
||||
],
|
||||
)
|
||||
assert len(dag.tasks) == 2
|
||||
assert dag.tasks[0].downstream_task_ids == ["task_2"]
|
||||
assert dag.tasks[0].class_ref["class_name"] == "BashOperator"
|
||||
|
||||
def test_dag_run(self):
|
||||
run = AirflowApiDagRun(
|
||||
dag_run_id="manual__2024-01-01",
|
||||
state="success",
|
||||
)
|
||||
assert run.dag_run_id == "manual__2024-01-01"
|
||||
assert run.state == "success"
|
||||
|
||||
def test_task_instance(self):
|
||||
ti = AirflowApiTaskInstance(
|
||||
task_id="task_1",
|
||||
state="success",
|
||||
)
|
||||
assert ti.task_id == "task_1"
|
||||
assert ti.state == "success"
|
||||
|
||||
|
||||
# ── Client: API Version Detection ────────────────────────────────────────
|
||||
|
||||
|
||||
class TestClientApiVersionDetection:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_auto_detect_v2(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
mock_rest.get.return_value = {"version": "3.0.0"}
|
||||
assert client.api_version == "v2"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_auto_detect_v1_fallback(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
|
||||
def side_effect(path):
|
||||
if "/v2/" in path:
|
||||
raise Exception("Not found")
|
||||
return {"version": "2.9.0"}
|
||||
|
||||
mock_rest.get.side_effect = side_effect
|
||||
assert client.api_version == "v1"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_explicit_version(self, mock_rest_cls):
|
||||
client, _ = _make_client(mock_rest_cls, api_version="v1")
|
||||
assert client.api_version == "v1"
|
||||
|
||||
|
||||
# ── Client: Build DAG Details ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestClientBuildDagDetails:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_build_dag_details_normalizes_tags(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {"tasks": []}
|
||||
|
||||
dag_data = {
|
||||
"dag_id": "test_dag",
|
||||
"tags": [{"name": "team:data"}, {"name": "env:prod"}],
|
||||
"owners": ["admin"],
|
||||
}
|
||||
result = client.build_dag_details(dag_data)
|
||||
assert result.tags == ["team:data", "env:prod"]
|
||||
assert result.owners == ["admin"]
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_build_dag_details_with_tasks(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {
|
||||
"tasks": [
|
||||
{
|
||||
"task_id": "extract",
|
||||
"downstream_task_ids": ["transform"],
|
||||
"class_ref": {
|
||||
"class_name": "PythonOperator",
|
||||
"module_path": "airflow.operators.python",
|
||||
},
|
||||
},
|
||||
{
|
||||
"task_id": "transform",
|
||||
"downstream_task_ids": [],
|
||||
"class_ref": {
|
||||
"class_name": "BashOperator",
|
||||
"module_path": "airflow.operators.bash",
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
dag_data = {"dag_id": "etl_pipeline", "tags": [], "owners": []}
|
||||
result = client.build_dag_details(dag_data)
|
||||
assert len(result.tasks) == 2
|
||||
assert result.tasks[0].task_id == "extract"
|
||||
assert result.tasks[0].downstream_task_ids == ["transform"]
|
||||
assert result.tasks[0].class_ref["class_name"] == "PythonOperator"
|
||||
|
||||
|
||||
# ── Client: Date Field ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestClientDateField:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_v1_uses_execution_date(self, mock_rest_cls):
|
||||
client, _ = _make_client(mock_rest_cls, api_version="v1")
|
||||
assert client._date_field == "execution_date"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_v2_uses_logical_date(self, mock_rest_cls):
|
||||
client, _ = _make_client(mock_rest_cls, api_version="v2")
|
||||
assert client._date_field == "logical_date"
|
||||
|
||||
|
||||
# ── Source URL Generation ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSourceUrlGeneration:
|
||||
def _make_source(self, api_version: str):
|
||||
source = MagicMock()
|
||||
source.service_connection = MagicMock()
|
||||
source.service_connection.hostPort = "http://airflow.example.com:8080"
|
||||
source.connection = MagicMock()
|
||||
source.connection.api_version = api_version
|
||||
return source
|
||||
|
||||
def test_v2_dag_url(self):
|
||||
source = self._make_source("v2")
|
||||
url = AirflowApiSource._get_dag_source_url(source, "my_dag")
|
||||
assert url == "http://airflow.example.com:8080/dags/my_dag"
|
||||
|
||||
def test_v1_dag_url(self):
|
||||
source = self._make_source("v1")
|
||||
url = AirflowApiSource._get_dag_source_url(source, "my_dag")
|
||||
assert url == "http://airflow.example.com:8080/dags/my_dag/grid"
|
||||
|
||||
def test_v2_task_url(self):
|
||||
source = self._make_source("v2")
|
||||
url = AirflowApiSource._get_task_source_url(source, "my_dag", "my_task")
|
||||
assert url == "http://airflow.example.com:8080/dags/my_dag/tasks/my_task"
|
||||
|
||||
def test_v1_task_url(self):
|
||||
source = self._make_source("v1")
|
||||
url = AirflowApiSource._get_task_source_url(source, "my_dag", "my_task")
|
||||
assert "taskinstance/list" in url
|
||||
assert "_flt_3_dag_id=my_dag" in url
|
||||
assert "_flt_3_task_id=my_task" in url
|
||||
|
||||
|
||||
# ── Pagination: DAGs ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPaginateGetAllDags:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_single_page(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {
|
||||
"dags": [{"dag_id": "a"}, {"dag_id": "b"}],
|
||||
"total_entries": 2,
|
||||
}
|
||||
|
||||
result = client.get_all_dags()
|
||||
assert len(result) == 2
|
||||
assert result[0]["dag_id"] == "a"
|
||||
assert mock_rest.get.call_count == 1
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_multiple_pages(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
|
||||
page1 = {
|
||||
"dags": [{"dag_id": f"dag_{i}"} for i in range(100)],
|
||||
"total_entries": 250,
|
||||
}
|
||||
page2 = {
|
||||
"dags": [{"dag_id": f"dag_{i}"} for i in range(100, 200)],
|
||||
"total_entries": 250,
|
||||
}
|
||||
page3 = {
|
||||
"dags": [{"dag_id": f"dag_{i}"} for i in range(200, 250)],
|
||||
"total_entries": 250,
|
||||
}
|
||||
mock_rest.get.side_effect = [page1, page2, page3]
|
||||
|
||||
result = client.get_all_dags()
|
||||
assert len(result) == 250
|
||||
assert result[0]["dag_id"] == "dag_0"
|
||||
assert result[-1]["dag_id"] == "dag_249"
|
||||
assert mock_rest.get.call_count == 3
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_empty_response(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {"dags": [], "total_entries": 0}
|
||||
|
||||
result = client.get_all_dags()
|
||||
assert result == []
|
||||
|
||||
|
||||
# ── Pagination: Task Instances ───────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPaginateTaskInstances:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_single_page_task_instances(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {
|
||||
"task_instances": [
|
||||
{"task_id": "t1", "state": "success"},
|
||||
{"task_id": "t2", "state": "failed"},
|
||||
],
|
||||
"total_entries": 2,
|
||||
}
|
||||
|
||||
result = client.get_task_instances_for_run("dag1", "run1")
|
||||
assert len(result) == 2
|
||||
assert result[0].task_id == "t1"
|
||||
assert result[0].state == "success"
|
||||
assert result[1].task_id == "t2"
|
||||
assert result[1].state == "failed"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_multi_page_task_instances(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
|
||||
page1 = {
|
||||
"task_instances": [
|
||||
{"task_id": f"t_{i}", "state": "success"} for i in range(100)
|
||||
],
|
||||
"total_entries": 150,
|
||||
}
|
||||
page2 = {
|
||||
"task_instances": [
|
||||
{"task_id": f"t_{i}", "state": "success"} for i in range(100, 150)
|
||||
],
|
||||
"total_entries": 150,
|
||||
}
|
||||
mock_rest.get.side_effect = [page1, page2]
|
||||
|
||||
result = client.get_task_instances_for_run("big_dag", "run1")
|
||||
assert len(result) == 150
|
||||
assert result[0].task_id == "t_0"
|
||||
assert result[-1].task_id == "t_149"
|
||||
assert mock_rest.get.call_count == 2
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_task_instances_api_error_returns_empty(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.side_effect = Exception("Connection refused")
|
||||
|
||||
result = client.get_task_instances_for_run("dag1", "run1")
|
||||
assert result == []
|
||||
|
||||
|
||||
# ── Auth & Connectivity Error Propagation ────────────────────────────────
|
||||
|
||||
|
||||
class TestAuthErrorPropagation:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_401_is_raised_during_version_detection(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
response = MagicMock()
|
||||
response.status_code = 401
|
||||
mock_rest.get.side_effect = HTTPError(response=response)
|
||||
|
||||
with pytest.raises(HTTPError):
|
||||
client._detect_api_version()
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_403_is_raised_during_version_detection(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
response = MagicMock()
|
||||
response.status_code = 403
|
||||
mock_rest.get.side_effect = HTTPError(response=response)
|
||||
|
||||
with pytest.raises(HTTPError):
|
||||
client._detect_api_version()
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_404_falls_through_to_next_version(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
response_404 = MagicMock()
|
||||
response_404.status_code = 404
|
||||
|
||||
def side_effect(path):
|
||||
if "/v2/" in path:
|
||||
raise HTTPError(response=response_404)
|
||||
return {"version": "2.9.0"}
|
||||
|
||||
mock_rest.get.side_effect = side_effect
|
||||
assert client._detect_api_version() == "v1"
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_connection_error_is_raised(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
mock_rest.get.side_effect = RequestsConnectionError("Connection refused")
|
||||
|
||||
with pytest.raises(RequestsConnectionError):
|
||||
client._detect_api_version()
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_timeout_error_is_raised(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls, api_version="auto")
|
||||
mock_rest.get.side_effect = TimeoutError("timed out")
|
||||
|
||||
with pytest.raises(TimeoutError):
|
||||
client._detect_api_version()
|
||||
|
||||
|
||||
# ── Tag Edge Cases ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildDagDetailsTagEdgeCases:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_empty_tag_names_are_filtered(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {"tasks": []}
|
||||
|
||||
dag_data = {
|
||||
"dag_id": "test_dag",
|
||||
"tags": [{"name": ""}, {"name": "valid_tag"}, {"name": None}],
|
||||
}
|
||||
result = client.build_dag_details(dag_data)
|
||||
assert result.tags == ["valid_tag"]
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_non_string_non_dict_tags_are_skipped(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {"tasks": []}
|
||||
|
||||
dag_data = {
|
||||
"dag_id": "test_dag",
|
||||
"tags": [123, None, {"name": "good"}, True],
|
||||
}
|
||||
result = client.build_dag_details(dag_data)
|
||||
assert result.tags == ["good"]
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_string_tags_are_kept(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {"tasks": []}
|
||||
|
||||
dag_data = {
|
||||
"dag_id": "test_dag",
|
||||
"tags": ["simple_string_tag", {"name": "dict_tag"}],
|
||||
}
|
||||
result = client.build_dag_details(dag_data)
|
||||
assert result.tags == ["simple_string_tag", "dict_tag"]
|
||||
|
||||
|
||||
# ── Pipeline Status: Timestamp Fallback ──────────────────────────────────
|
||||
|
||||
|
||||
class TestPipelineStatusTimestampFallback:
|
||||
def test_uses_execution_date_when_available(self):
|
||||
source, dag = _make_source_and_dag()
|
||||
exec_dt = datetime(2025, 1, 15, 12, 0)
|
||||
start_dt = datetime(2025, 1, 15, 12, 5)
|
||||
source.connection.get_dag_runs.return_value = [
|
||||
AirflowApiDagRun(
|
||||
dag_run_id="run_1",
|
||||
state="success",
|
||||
execution_date=exec_dt,
|
||||
start_date=start_dt,
|
||||
),
|
||||
]
|
||||
source.connection.get_task_instances_for_run.return_value = []
|
||||
|
||||
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
|
||||
assert len(results) == 1
|
||||
status = results[0].right.pipeline_status
|
||||
expected_ts = datetime_to_ts(exec_dt)
|
||||
assert status.timestamp.root == expected_ts
|
||||
|
||||
def test_falls_back_to_start_date(self):
|
||||
source, dag = _make_source_and_dag()
|
||||
start_dt = datetime(2025, 1, 15, 12, 5)
|
||||
source.connection.get_dag_runs.return_value = [
|
||||
AirflowApiDagRun(
|
||||
dag_run_id="run_1",
|
||||
state="success",
|
||||
execution_date=None,
|
||||
start_date=start_dt,
|
||||
),
|
||||
]
|
||||
source.connection.get_task_instances_for_run.return_value = []
|
||||
|
||||
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
|
||||
assert len(results) == 1
|
||||
status = results[0].right.pipeline_status
|
||||
expected_ts = datetime_to_ts(start_dt)
|
||||
assert status.timestamp.root == expected_ts
|
||||
|
||||
def test_falls_back_to_end_date(self):
|
||||
source, dag = _make_source_and_dag()
|
||||
end_dt = datetime(2025, 1, 15, 12, 10)
|
||||
source.connection.get_dag_runs.return_value = [
|
||||
AirflowApiDagRun(
|
||||
dag_run_id="run_1",
|
||||
state="success",
|
||||
execution_date=None,
|
||||
start_date=None,
|
||||
end_date=end_dt,
|
||||
),
|
||||
]
|
||||
source.connection.get_task_instances_for_run.return_value = []
|
||||
|
||||
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
|
||||
assert len(results) == 1
|
||||
status = results[0].right.pipeline_status
|
||||
expected_ts = datetime_to_ts(end_dt)
|
||||
assert status.timestamp.root == expected_ts
|
||||
|
||||
def test_skips_run_with_no_timestamp(self):
|
||||
source, dag = _make_source_and_dag()
|
||||
source.connection.get_dag_runs.return_value = [
|
||||
AirflowApiDagRun(
|
||||
dag_run_id="run_no_ts",
|
||||
state="success",
|
||||
execution_date=None,
|
||||
start_date=None,
|
||||
end_date=None,
|
||||
),
|
||||
]
|
||||
source.connection.get_task_instances_for_run.return_value = []
|
||||
|
||||
results = list(AirflowApiSource.yield_pipeline_status(source, dag))
|
||||
assert len(results) == 0
|
||||
|
||||
|
||||
# ── Pipeline State ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestGetPipelineState:
|
||||
def test_paused_returns_inactive(self):
|
||||
source, _ = _make_source_and_dag()
|
||||
dag = AirflowApiDagDetails(dag_id="test", is_paused=True)
|
||||
result = AirflowApiSource.get_pipeline_state(source, dag)
|
||||
assert result == PipelineState.Inactive
|
||||
|
||||
def test_not_paused_returns_active(self):
|
||||
source, _ = _make_source_and_dag()
|
||||
dag = AirflowApiDagDetails(dag_id="test", is_paused=False)
|
||||
result = AirflowApiSource.get_pipeline_state(source, dag)
|
||||
assert result == PipelineState.Active
|
||||
|
||||
def test_none_paused_returns_none(self):
|
||||
source, _ = _make_source_and_dag()
|
||||
dag = AirflowApiDagDetails(dag_id="test", is_paused=None)
|
||||
result = AirflowApiSource.get_pipeline_state(source, dag)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ── Build Tasks ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBuildTasks:
|
||||
def test_builds_tasks_with_all_fields(self):
|
||||
source, dag = _make_source_and_dag()
|
||||
tasks = AirflowApiSource._build_tasks(source, dag)
|
||||
assert len(tasks) == 2
|
||||
|
||||
t1 = tasks[0]
|
||||
assert t1.name == "task_1"
|
||||
assert t1.downstreamTasks == ["task_2"]
|
||||
assert t1.taskType == "PythonOperator"
|
||||
assert t1.description is not None
|
||||
assert "Task 1 docs" in t1.description.root
|
||||
|
||||
def test_builds_tasks_with_none_class_ref(self):
|
||||
source, _ = _make_source_and_dag()
|
||||
dag = AirflowApiDagDetails(
|
||||
dag_id="test",
|
||||
tasks=[AirflowApiTask(task_id="t1", class_ref=None)],
|
||||
)
|
||||
tasks = AirflowApiSource._build_tasks(source, dag)
|
||||
assert len(tasks) == 1
|
||||
assert tasks[0].taskType is None
|
||||
|
||||
def test_builds_tasks_empty(self):
|
||||
source, _ = _make_source_and_dag()
|
||||
dag = AirflowApiDagDetails(dag_id="test", tasks=[])
|
||||
tasks = AirflowApiSource._build_tasks(source, dag)
|
||||
assert tasks == []
|
||||
|
||||
|
||||
# ── Yield Pipeline ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestYieldPipeline:
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.source.get_tag_labels",
|
||||
return_value=[],
|
||||
)
|
||||
def test_yields_create_pipeline_request(self, _mock_tags):
|
||||
source, dag = _make_source_and_dag()
|
||||
results = list(AirflowApiSource.yield_pipeline(source, dag))
|
||||
|
||||
assert len(results) == 1
|
||||
request = results[0].right
|
||||
assert request.name.root == "test_dag"
|
||||
assert request.description.root == "A test pipeline"
|
||||
assert request.scheduleInterval == "@daily"
|
||||
assert len(request.tasks) == 2
|
||||
assert request.tasks[0].name == "task_1"
|
||||
|
||||
@patch(
|
||||
"metadata.ingestion.source.pipeline.airflow.api.source.get_tag_labels",
|
||||
return_value=[],
|
||||
)
|
||||
def test_yields_error_on_exception(self, _mock_tags):
|
||||
source, dag = _make_source_and_dag()
|
||||
# Break the service name to trigger a validation error
|
||||
source.context.get.return_value.pipeline_service = None
|
||||
|
||||
results = list(AirflowApiSource.yield_pipeline(source, dag))
|
||||
assert len(results) == 1
|
||||
assert results[0].left is not None
|
||||
assert "test_dag" in results[0].left.name
|
||||
|
||||
|
||||
# ── Client: DAG Runs Parsing ─────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestClientGetDagRuns:
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_parses_dag_runs_with_logical_date(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.return_value = {
|
||||
"dag_runs": [
|
||||
{
|
||||
"dag_run_id": "run_1",
|
||||
"state": "success",
|
||||
"logical_date": "2025-01-15T12:00:00+00:00",
|
||||
"start_date": "2025-01-15T12:01:00+00:00",
|
||||
"end_date": "2025-01-15T12:05:00+00:00",
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
runs = client.get_dag_runs("my_dag", limit=5)
|
||||
assert len(runs) == 1
|
||||
assert runs[0].dag_run_id == "run_1"
|
||||
assert runs[0].state == "success"
|
||||
assert runs[0].execution_date is not None
|
||||
|
||||
@patch("metadata.ingestion.source.pipeline.airflow.api.client.TrackedREST")
|
||||
def test_returns_empty_on_api_error(self, mock_rest_cls):
|
||||
client, mock_rest = _make_client(mock_rest_cls)
|
||||
mock_rest.get.side_effect = Exception("API down")
|
||||
|
||||
runs = client.get_dag_runs("my_dag")
|
||||
assert runs == []
|
||||
|
|
@ -0,0 +1,249 @@
|
|||
/*
|
||||
* Copyright 2021 Collate
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.openmetadata.it.tests;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.MethodOrderer;
|
||||
import org.junit.jupiter.api.Order;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.TestMethodOrder;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.openmetadata.it.util.SdkClients;
|
||||
import org.openmetadata.it.util.TestNamespace;
|
||||
import org.openmetadata.it.util.TestNamespaceExtension;
|
||||
import org.openmetadata.schema.entity.data.Database;
|
||||
import org.openmetadata.schema.entity.data.DatabaseSchema;
|
||||
import org.openmetadata.schema.entity.data.Table;
|
||||
import org.openmetadata.schema.entity.services.DatabaseService;
|
||||
import org.openmetadata.schema.type.Column;
|
||||
import org.openmetadata.schema.type.ColumnDataType;
|
||||
import org.openmetadata.sdk.fluent.DatabaseSchemas;
|
||||
import org.openmetadata.sdk.fluent.DatabaseServices;
|
||||
import org.openmetadata.sdk.fluent.Databases;
|
||||
import org.openmetadata.sdk.fluent.LineageAPI;
|
||||
import org.openmetadata.sdk.fluent.OpenLineage;
|
||||
import org.openmetadata.sdk.fluent.Tables;
|
||||
import org.openmetadata.sdk.fluent.wrappers.FluentTable;
|
||||
|
||||
/**
|
||||
* Integration tests for OpenLineage → lineage resolution.
|
||||
*
|
||||
* <p>Verifies that OL COMPLETE events with input/output datasets are resolved to existing OM table
|
||||
* entities and lineage edges are created with source=OpenLineage.
|
||||
*
|
||||
* <p>Creates its own test entities (service, database, schema, tables) to avoid depending on sample
|
||||
* data being loaded externally.
|
||||
*/
|
||||
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
|
||||
@ExtendWith(TestNamespaceExtension.class)
|
||||
public class OpenLineageLineageResolutionIT {
|
||||
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
private static final List<Column> DEFAULT_COLUMNS =
|
||||
List.of(
|
||||
new Column().withName("id").withDataType(ColumnDataType.BIGINT),
|
||||
new Column().withName("name").withDataType(ColumnDataType.VARCHAR).withDataLength(255));
|
||||
|
||||
private static String srcFqn;
|
||||
private static String tgtFqn;
|
||||
private static String serviceName;
|
||||
private static String schemaFqn;
|
||||
|
||||
@BeforeAll
|
||||
static void setup() {
|
||||
OpenLineage.setDefaultClient(SdkClients.adminClient());
|
||||
Tables.setDefaultClient(SdkClients.adminClient());
|
||||
LineageAPI.setDefaultClient(SdkClients.adminClient());
|
||||
DatabaseServices.setDefaultClient(SdkClients.adminClient());
|
||||
Databases.setDefaultClient(SdkClients.adminClient());
|
||||
DatabaseSchemas.setDefaultClient(SdkClients.adminClient());
|
||||
|
||||
String uniqueId = UUID.randomUUID().toString().substring(0, 8);
|
||||
serviceName = "ol_test_svc_" + uniqueId;
|
||||
|
||||
DatabaseService service =
|
||||
DatabaseServices.builder()
|
||||
.name(serviceName)
|
||||
.connection(
|
||||
DatabaseServices.postgresConnection()
|
||||
.hostPort("localhost:5432")
|
||||
.username("test")
|
||||
.build())
|
||||
.description("Test service for OpenLineage resolution tests")
|
||||
.create();
|
||||
|
||||
Database db =
|
||||
Databases.create().name("ecommerce_db").in(service.getFullyQualifiedName()).execute();
|
||||
|
||||
DatabaseSchema schema =
|
||||
DatabaseSchemas.create().name("shopify").in(db.getFullyQualifiedName()).execute();
|
||||
|
||||
schemaFqn = schema.getFullyQualifiedName();
|
||||
|
||||
Table rawOrder =
|
||||
Tables.create()
|
||||
.name("raw_order")
|
||||
.inSchema(schemaFqn)
|
||||
.withColumns(DEFAULT_COLUMNS)
|
||||
.execute();
|
||||
srcFqn = rawOrder.getFullyQualifiedName();
|
||||
|
||||
Table factOrder =
|
||||
Tables.create()
|
||||
.name("fact_order")
|
||||
.inSchema(schemaFqn)
|
||||
.withColumns(DEFAULT_COLUMNS)
|
||||
.execute();
|
||||
tgtFqn = factOrder.getFullyQualifiedName();
|
||||
|
||||
Tables.create().name("raw_customer").inSchema(schemaFqn).withColumns(DEFAULT_COLUMNS).execute();
|
||||
|
||||
Tables.create().name("dim_address").inSchema(schemaFqn).withColumns(DEFAULT_COLUMNS).execute();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(1)
|
||||
void testSampleDataTablesExist() {
|
||||
FluentTable src = Tables.findByName(srcFqn).fetch();
|
||||
assertNotNull(src, "Source table " + srcFqn + " must exist");
|
||||
|
||||
FluentTable tgt = Tables.findByName(tgtFqn).fetch();
|
||||
assertNotNull(tgt, "Target table " + tgtFqn + " must exist");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(2)
|
||||
void testCompleteEventCreatesLineageEdge(TestNamespace ns) throws Exception {
|
||||
String response =
|
||||
OpenLineage.event()
|
||||
.withEventType("COMPLETE")
|
||||
.withEventTime(Instant.now().toString())
|
||||
.withJob(ns.prefix("ol_resolution_job"), ns.prefix("namespace"))
|
||||
.withRun(UUID.randomUUID().toString())
|
||||
.addInput("ecommerce_db.shopify.raw_order", serviceName)
|
||||
.addOutput("ecommerce_db.shopify.fact_order", serviceName)
|
||||
.send();
|
||||
|
||||
assertNotNull(response);
|
||||
JsonNode json = MAPPER.readTree(response);
|
||||
assertEquals("success", json.get("status").asText());
|
||||
assertTrue(
|
||||
json.get("lineageEdgesCreated").asInt() >= 1,
|
||||
"Expected at least 1 lineage edge created, got: " + response);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(3)
|
||||
@SuppressWarnings("unchecked")
|
||||
void testLineageEdgeHasOpenLineageSource() throws Exception {
|
||||
LineageAPI.LineageGraph lineageGraph =
|
||||
LineageAPI.forName$("table", srcFqn).upstream(0).downstream(3).fetch();
|
||||
|
||||
assertNotNull(lineageGraph);
|
||||
Map<String, Object> lineage = MAPPER.readValue(lineageGraph.getRaw(), Map.class);
|
||||
var downstreamEdges = (java.util.List<?>) lineage.get("downstreamEdges");
|
||||
assertNotNull(downstreamEdges, "Expected downstream edges from " + srcFqn);
|
||||
|
||||
boolean hasOlEdge =
|
||||
downstreamEdges.stream()
|
||||
.map(e -> (Map<?, ?>) e)
|
||||
.map(e -> (Map<?, ?>) e.get("lineageDetails"))
|
||||
.filter(java.util.Objects::nonNull)
|
||||
.anyMatch(details -> "OpenLineage".equals(details.get("source")));
|
||||
|
||||
assertTrue(hasOlEdge, "Expected at least one edge with source=OpenLineage");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(4)
|
||||
void testStartEventDoesNotCreateEdges(TestNamespace ns) throws Exception {
|
||||
String response =
|
||||
OpenLineage.event()
|
||||
.withEventType("START")
|
||||
.withEventTime(Instant.now().toString())
|
||||
.withJob(ns.prefix("start_only_job"), ns.prefix("namespace"))
|
||||
.withRun(UUID.randomUUID().toString())
|
||||
.addInput("ecommerce_db.shopify.raw_order", serviceName)
|
||||
.addOutput("ecommerce_db.shopify.fact_order", serviceName)
|
||||
.send();
|
||||
|
||||
JsonNode json = MAPPER.readTree(response);
|
||||
assertEquals(
|
||||
0, json.get("lineageEdgesCreated").asInt(), "START events should not create lineage edges");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(5)
|
||||
void testUnresolvableDatasetsCreateNoEdges(TestNamespace ns) throws Exception {
|
||||
String response =
|
||||
OpenLineage.event()
|
||||
.withEventType("COMPLETE")
|
||||
.withEventTime(Instant.now().toString())
|
||||
.withJob(ns.prefix("unknown_job"), ns.prefix("namespace"))
|
||||
.withRun(UUID.randomUUID().toString())
|
||||
.addInput("nonexistent_schema.nonexistent_table", "nonexistent_service")
|
||||
.addOutput("nonexistent_schema.nonexistent_output", "nonexistent_service")
|
||||
.send();
|
||||
|
||||
JsonNode json = MAPPER.readTree(response);
|
||||
assertEquals(
|
||||
0, json.get("lineageEdgesCreated").asInt(), "Unresolvable datasets should create 0 edges");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(6)
|
||||
void testMultiInputOutputCreatesAllEdges(TestNamespace ns) throws Exception {
|
||||
String response =
|
||||
OpenLineage.event()
|
||||
.withEventType("COMPLETE")
|
||||
.withEventTime(Instant.now().toString())
|
||||
.withJob(ns.prefix("multi_io_job"), ns.prefix("namespace"))
|
||||
.withRun(UUID.randomUUID().toString())
|
||||
.addInput("ecommerce_db.shopify.raw_order", serviceName)
|
||||
.addInput("ecommerce_db.shopify.raw_customer", serviceName)
|
||||
.addOutput("ecommerce_db.shopify.dim_address", serviceName)
|
||||
.send();
|
||||
|
||||
JsonNode json = MAPPER.readTree(response);
|
||||
assertTrue(
|
||||
json.get("lineageEdgesCreated").asInt() >= 2,
|
||||
"2 inputs → 1 output should create at least 2 edges, got: " + response);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Order(7)
|
||||
void testEmptyInputsOutputsCreateNoEdges(TestNamespace ns) throws Exception {
|
||||
String response =
|
||||
OpenLineage.event()
|
||||
.withEventType("COMPLETE")
|
||||
.withEventTime(Instant.now().toString())
|
||||
.withJob(ns.prefix("empty_io_job"), ns.prefix("namespace"))
|
||||
.withRun(UUID.randomUUID().toString())
|
||||
.send();
|
||||
|
||||
JsonNode json = MAPPER.readTree(response);
|
||||
assertEquals(
|
||||
0, json.get("lineageEdgesCreated").asInt(), "Empty inputs/outputs should create 0 edges");
|
||||
}
|
||||
}
|
||||
|
|
@ -69,7 +69,11 @@ public final class LineageAPI {
|
|||
// ==================== Lineage Builders ====================
|
||||
|
||||
public static LineageQuery for$(String entityType, String entityId) {
|
||||
return new LineageQuery(getClient(), entityType, entityId);
|
||||
return new LineageQuery(getClient(), entityType, entityId, false);
|
||||
}
|
||||
|
||||
public static LineageQuery forName$(String entityType, String fqn) {
|
||||
return new LineageQuery(getClient(), entityType, fqn, true);
|
||||
}
|
||||
|
||||
public static LineageConnector connect() {
|
||||
|
|
@ -97,15 +101,17 @@ public final class LineageAPI {
|
|||
public static class LineageQuery {
|
||||
private final OpenMetadataClient client;
|
||||
private final String entityType;
|
||||
private final String entityId;
|
||||
private final String identifier;
|
||||
private final boolean isFqn;
|
||||
private int upstreamDepth = 1;
|
||||
private int downstreamDepth = 1;
|
||||
private boolean includeDeleted = false;
|
||||
|
||||
LineageQuery(OpenMetadataClient client, String entityType, String entityId) {
|
||||
LineageQuery(OpenMetadataClient client, String entityType, String identifier, boolean isFqn) {
|
||||
this.client = client;
|
||||
this.entityType = entityType;
|
||||
this.entityId = entityId;
|
||||
this.identifier = identifier;
|
||||
this.isFqn = isFqn;
|
||||
}
|
||||
|
||||
public LineageQuery upstream(int depth) {
|
||||
|
|
@ -130,14 +136,26 @@ public final class LineageAPI {
|
|||
}
|
||||
|
||||
public LineageGraph fetch() {
|
||||
String result =
|
||||
client
|
||||
.lineage()
|
||||
.getEntityLineage(
|
||||
entityType,
|
||||
entityId,
|
||||
String.valueOf(upstreamDepth),
|
||||
String.valueOf(downstreamDepth));
|
||||
String result;
|
||||
if (isFqn) {
|
||||
result =
|
||||
client
|
||||
.lineage()
|
||||
.getLineageByName(
|
||||
entityType,
|
||||
identifier,
|
||||
String.valueOf(upstreamDepth),
|
||||
String.valueOf(downstreamDepth));
|
||||
} else {
|
||||
result =
|
||||
client
|
||||
.lineage()
|
||||
.getEntityLineage(
|
||||
entityType,
|
||||
identifier,
|
||||
String.valueOf(upstreamDepth),
|
||||
String.valueOf(downstreamDepth));
|
||||
}
|
||||
return new LineageGraph(result, client);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import static org.openmetadata.schema.type.Include.NON_DELETED;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openmetadata.schema.api.lineage.openlineage.DatasetFacets;
|
||||
|
|
@ -32,6 +33,7 @@ import org.openmetadata.schema.api.lineage.openlineage.SchemaFacet;
|
|||
import org.openmetadata.schema.api.lineage.openlineage.SchemaField;
|
||||
import org.openmetadata.schema.api.lineage.openlineage.SymlinkIdentifier;
|
||||
import org.openmetadata.schema.api.lineage.openlineage.SymlinksFacet;
|
||||
import org.openmetadata.schema.entity.data.Container;
|
||||
import org.openmetadata.schema.entity.data.Pipeline;
|
||||
import org.openmetadata.schema.entity.data.Table;
|
||||
import org.openmetadata.schema.type.Column;
|
||||
|
|
@ -45,8 +47,12 @@ import org.openmetadata.service.jdbi3.EntityRepository;
|
|||
@Slf4j
|
||||
public class OpenLineageEntityResolver {
|
||||
|
||||
private static final Set<String> STORAGE_URI_SCHEMES =
|
||||
Set.of("gs://", "s3://", "s3a://", "abfss://", "abfs://", "wasbs://", "adl://");
|
||||
|
||||
private final Map<String, EntityReference> tableCache = new ConcurrentHashMap<>();
|
||||
private final Map<String, EntityReference> pipelineCache = new ConcurrentHashMap<>();
|
||||
private final Map<String, EntityReference> containerCache = new ConcurrentHashMap<>();
|
||||
private final boolean autoCreateEntities;
|
||||
private final String defaultPipelineService;
|
||||
private final Map<String, String> namespaceToServiceMapping;
|
||||
|
|
@ -132,6 +138,51 @@ public class OpenLineageEntityResolver {
|
|||
return createTableFromOutput(dataset, updatedBy);
|
||||
}
|
||||
|
||||
public boolean isStorageDataset(String namespace) {
|
||||
if (nullOrEmpty(namespace)) {
|
||||
return false;
|
||||
}
|
||||
String lower = namespace.toLowerCase();
|
||||
for (String scheme : STORAGE_URI_SCHEMES) {
|
||||
if (lower.startsWith(scheme)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public EntityReference resolveContainer(String namespace, String name) {
|
||||
if (nullOrEmpty(namespace) || nullOrEmpty(name)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String fullPath = namespace.endsWith("/") ? namespace + name : namespace + "/" + name;
|
||||
String cacheKey = "container:" + fullPath;
|
||||
|
||||
EntityReference cached = containerCache.get(cacheKey);
|
||||
if (cached != null) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
EntityReference ref = searchContainerByFullPath(fullPath);
|
||||
if (ref != null) {
|
||||
containerCache.put(cacheKey, ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
// Try without wildcard suffixes (e.g., "gs://bucket/path/file_*.csv" → "gs://bucket/path")
|
||||
String parentPath = extractParentPath(fullPath);
|
||||
if (parentPath != null && !parentPath.equals(fullPath)) {
|
||||
ref = searchContainerByFullPath(parentPath);
|
||||
if (ref != null) {
|
||||
containerCache.put(cacheKey, ref);
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public EntityReference resolveOrCreatePipeline(String namespace, String name, String updatedBy) {
|
||||
if (nullOrEmpty(name)) {
|
||||
return null;
|
||||
|
|
@ -157,6 +208,22 @@ public class OpenLineageEntityResolver {
|
|||
LOG.debug("Pipeline not found: {}", pipelineFqn);
|
||||
}
|
||||
|
||||
// Fallback: try namespace as service name, e.g. fasfas.stackoverflow_etl_lineage
|
||||
if (!nullOrEmpty(namespace)) {
|
||||
String fallbackFqn = namespace + "." + name;
|
||||
try {
|
||||
EntityReference ref =
|
||||
Entity.getEntityReferenceByName(Entity.PIPELINE, fallbackFqn, NON_DELETED);
|
||||
if (ref != null) {
|
||||
LOG.info("Resolved pipeline via namespace fallback: {}", fallbackFqn);
|
||||
pipelineCache.put(cacheKey, ref);
|
||||
return ref;
|
||||
}
|
||||
} catch (EntityNotFoundException e) {
|
||||
LOG.debug("Pipeline not found by namespace fallback: {}", fallbackFqn);
|
||||
}
|
||||
}
|
||||
|
||||
if (!autoCreateEntities) {
|
||||
LOG.debug("Auto-create disabled, skipping pipeline creation for: {}", pipelineName);
|
||||
return null;
|
||||
|
|
@ -334,6 +401,41 @@ public class OpenLineageEntityResolver {
|
|||
return null;
|
||||
}
|
||||
|
||||
private EntityReference searchContainerByFullPath(String fullPath) {
|
||||
try {
|
||||
@SuppressWarnings("unchecked")
|
||||
EntityRepository<Container> containerRepository =
|
||||
(EntityRepository<Container>) Entity.getEntityRepository(Entity.CONTAINER);
|
||||
|
||||
List<Container> containers =
|
||||
containerRepository.listAll(
|
||||
containerRepository.getFields(""), new ListFilterByJsonField("fullPath", fullPath));
|
||||
|
||||
if (!containers.isEmpty()) {
|
||||
Container container = containers.get(0);
|
||||
LOG.debug(
|
||||
"Resolved container by fullPath: {} -> {}",
|
||||
fullPath,
|
||||
container.getFullyQualifiedName());
|
||||
return container.getEntityReference();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.debug("Error searching for container by fullPath {}: {}", fullPath, e.getMessage());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String extractParentPath(String path) {
|
||||
if (path == null) {
|
||||
return null;
|
||||
}
|
||||
int lastSlash = path.lastIndexOf('/');
|
||||
if (lastSlash <= 0) {
|
||||
return null;
|
||||
}
|
||||
return path.substring(0, lastSlash);
|
||||
}
|
||||
|
||||
private EntityReference createTableFromInput(OpenLineageInputDataset dataset, String updatedBy) {
|
||||
return createTableInternal(
|
||||
dataset.getNamespace(), dataset.getName(), dataset.getFacets(), updatedBy);
|
||||
|
|
@ -377,6 +479,7 @@ public class OpenLineageEntityResolver {
|
|||
List<EntityReference> owners = extractOwners(facets);
|
||||
|
||||
Table newTable = new Table();
|
||||
newTable.setId(java.util.UUID.randomUUID());
|
||||
newTable.setName(table);
|
||||
newTable.setFullyQualifiedName(schemaFqn + "." + table);
|
||||
newTable.setDatabaseSchema(
|
||||
|
|
@ -561,6 +664,7 @@ public class OpenLineageEntityResolver {
|
|||
Entity.PIPELINE_SERVICE, defaultPipelineService, NON_DELETED);
|
||||
|
||||
Pipeline newPipeline = new Pipeline();
|
||||
newPipeline.setId(java.util.UUID.randomUUID());
|
||||
newPipeline.setName(pipelineName);
|
||||
newPipeline.setFullyQualifiedName(buildPipelineFqn(pipelineName));
|
||||
newPipeline.setService(serviceRef);
|
||||
|
|
@ -589,6 +693,7 @@ public class OpenLineageEntityResolver {
|
|||
public void clearCache() {
|
||||
tableCache.clear();
|
||||
pipelineCache.clear();
|
||||
containerCache.clear();
|
||||
}
|
||||
|
||||
private static class ListFilterByFqnSuffix extends org.openmetadata.service.jdbi3.ListFilter {
|
||||
|
|
@ -598,24 +703,10 @@ public class OpenLineageEntityResolver {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getCondition() {
|
||||
return getFqnCondition(null, "fqnSuffix");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCondition(String alias) {
|
||||
return getFqnCondition(alias, "fqnSuffix");
|
||||
}
|
||||
|
||||
private String getFqnCondition(String alias, String paramName) {
|
||||
String column = alias == null ? "json" : alias + ".json";
|
||||
if (Boolean.TRUE.equals(
|
||||
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
|
||||
return String.format(
|
||||
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :%s", column, paramName);
|
||||
} else {
|
||||
return String.format("%s->>'fullyQualifiedName' LIKE :%s", column, paramName);
|
||||
}
|
||||
public String getCondition(String tableName) {
|
||||
String baseCondition = super.getCondition(tableName);
|
||||
String fqnClause = buildFqnLikeClause(tableName, "fqnSuffix");
|
||||
return baseCondition + " AND " + fqnClause;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -626,24 +717,47 @@ public class OpenLineageEntityResolver {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getCondition() {
|
||||
return getFqnCondition(null);
|
||||
public String getCondition(String tableName) {
|
||||
String baseCondition = super.getCondition(tableName);
|
||||
String fqnClause = buildFqnLikeClause(tableName, "fqnPattern");
|
||||
return baseCondition + " AND " + fqnClause;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ListFilterByJsonField extends org.openmetadata.service.jdbi3.ListFilter {
|
||||
private final String fieldName;
|
||||
|
||||
public ListFilterByJsonField(String fieldName, String value) {
|
||||
super(Include.NON_DELETED);
|
||||
this.fieldName = fieldName;
|
||||
addQueryParam("jsonFieldValue", value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCondition(String alias) {
|
||||
return getFqnCondition(alias);
|
||||
}
|
||||
|
||||
private String getFqnCondition(String alias) {
|
||||
String column = alias == null ? "json" : alias + ".json";
|
||||
public String getCondition(String tableName) {
|
||||
String baseCondition = super.getCondition(tableName);
|
||||
String column = tableName == null ? "json" : tableName + ".json";
|
||||
String fieldClause;
|
||||
if (Boolean.TRUE.equals(
|
||||
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
|
||||
return String.format(
|
||||
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :fqnPattern", column);
|
||||
fieldClause =
|
||||
String.format(
|
||||
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.%s')) = :jsonFieldValue", column, fieldName);
|
||||
} else {
|
||||
return String.format("%s->>'fullyQualifiedName' LIKE :fqnPattern", column);
|
||||
fieldClause = String.format("%s->>'%s' = :jsonFieldValue", column, fieldName);
|
||||
}
|
||||
return baseCondition + " AND " + fieldClause;
|
||||
}
|
||||
}
|
||||
|
||||
private static String buildFqnLikeClause(String tableName, String paramName) {
|
||||
String column = tableName == null ? "json" : tableName + ".json";
|
||||
if (Boolean.TRUE.equals(
|
||||
org.openmetadata.service.resources.databases.DatasourceConfig.getInstance().isMySQL())) {
|
||||
return String.format(
|
||||
"JSON_UNQUOTE(JSON_EXTRACT(%s, '$.fullyQualifiedName')) LIKE :%s", column, paramName);
|
||||
} else {
|
||||
return String.format("%s->>'fullyQualifiedName' LIKE :%s", column, paramName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,9 @@ public class OpenLineageMapper {
|
|||
public OpenLineageMapper(OpenLineageEntityResolver entityResolver, OpenLineageSettings settings) {
|
||||
this.entityResolver = entityResolver;
|
||||
|
||||
if (settings != null && settings.getEventTypeFilter() != null) {
|
||||
if (settings != null
|
||||
&& settings.getEventTypeFilter() != null
|
||||
&& !settings.getEventTypeFilter().isEmpty()) {
|
||||
this.allowedEventTypes =
|
||||
settings.getEventTypeFilter().stream()
|
||||
.map(OpenLineageEventType::value)
|
||||
|
|
@ -97,6 +99,9 @@ public class OpenLineageMapper {
|
|||
|
||||
for (OpenLineageOutputDataset output : outputs) {
|
||||
EntityReference outputRef = entityResolver.resolveOrCreateTable(output, updatedBy);
|
||||
if (outputRef == null && entityResolver.isStorageDataset(output.getNamespace())) {
|
||||
outputRef = entityResolver.resolveContainer(output.getNamespace(), output.getName());
|
||||
}
|
||||
if (outputRef == null) {
|
||||
LOG.warn(
|
||||
"Could not resolve output dataset: {}.{}", output.getNamespace(), output.getName());
|
||||
|
|
@ -108,6 +113,9 @@ public class OpenLineageMapper {
|
|||
|
||||
for (OpenLineageInputDataset input : inputs) {
|
||||
EntityReference inputRef = entityResolver.resolveOrCreateTable(input, updatedBy);
|
||||
if (inputRef == null && entityResolver.isStorageDataset(input.getNamespace())) {
|
||||
inputRef = entityResolver.resolveContainer(input.getNamespace(), input.getName());
|
||||
}
|
||||
if (inputRef == null) {
|
||||
LOG.warn("Could not resolve input dataset: {}.{}", input.getNamespace(), input.getName());
|
||||
continue;
|
||||
|
|
@ -153,6 +161,9 @@ public class OpenLineageMapper {
|
|||
for (OpenLineageInputDataset input : inputs) {
|
||||
String olName = buildOpenLineageDatasetName(input.getNamespace(), input.getName());
|
||||
EntityReference ref = entityResolver.resolveTable(input);
|
||||
if (ref == null && entityResolver.isStorageDataset(input.getNamespace())) {
|
||||
ref = entityResolver.resolveContainer(input.getNamespace(), input.getName());
|
||||
}
|
||||
if (ref != null) {
|
||||
map.put(olName, ref.getFullyQualifiedName());
|
||||
}
|
||||
|
|
@ -198,12 +209,15 @@ public class OpenLineageMapper {
|
|||
|
||||
List<ColumnLineage> columnLineages = new ArrayList<>();
|
||||
|
||||
// Check outputFacets first (OpenLineage spec location), fall back to dataset facets
|
||||
ColumnLineageFacet columnLineageFacet = null;
|
||||
OutputDatasetFacets outputFacets = output.getOutputFacets();
|
||||
if (outputFacets == null) {
|
||||
return columnLineages;
|
||||
if (outputFacets != null) {
|
||||
columnLineageFacet = outputFacets.getColumnLineage();
|
||||
}
|
||||
if (columnLineageFacet == null && output.getFacets() != null) {
|
||||
columnLineageFacet = output.getFacets().getColumnLineage();
|
||||
}
|
||||
|
||||
ColumnLineageFacet columnLineageFacet = outputFacets.getColumnLineage();
|
||||
if (columnLineageFacet == null || columnLineageFacet.getFields() == null) {
|
||||
return columnLineages;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import org.openmetadata.schema.services.connections.database.MysqlConnection;
|
|||
import org.openmetadata.schema.services.connections.database.PostgresConnection;
|
||||
import org.openmetadata.schema.services.connections.database.SQLiteConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.BackendConnection;
|
||||
import org.openmetadata.schema.utils.JsonUtils;
|
||||
|
||||
|
|
@ -31,6 +32,7 @@ public class AirflowConnectionClassConverter extends ClassConverter {
|
|||
MysqlConnection.class,
|
||||
PostgresConnection.class,
|
||||
MssqlConnection.class,
|
||||
AirflowRestApiConnection.class,
|
||||
SQLiteConnection.class);
|
||||
|
||||
public AirflowConnectionClassConverter() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright 2021 Collate
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.openmetadata.service.secrets.converter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.openmetadata.schema.entity.utils.common.AccessTokenConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.BasicAuthConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.MWAAAuthConfig;
|
||||
import org.openmetadata.schema.security.credentials.AWSCredentials;
|
||||
import org.openmetadata.schema.security.credentials.GCPCredentials;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
|
||||
import org.openmetadata.schema.utils.JsonUtils;
|
||||
|
||||
/** Converter class to get an `AirflowRestApiConnection` object. */
|
||||
public class AirflowRestApiConnectionClassConverter extends ClassConverter {
|
||||
|
||||
public AirflowRestApiConnectionClassConverter() {
|
||||
super(AirflowRestApiConnection.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object convert(Object object) {
|
||||
AirflowRestApiConnection conn =
|
||||
(AirflowRestApiConnection) JsonUtils.convertValue(object, this.clazz);
|
||||
|
||||
if (!(conn.getAuthConfig() instanceof Map<?, ?> authMap)) {
|
||||
return conn;
|
||||
}
|
||||
|
||||
if (authMap.containsKey("username")) {
|
||||
tryToConvertOrFail(authMap, List.of(BasicAuthConfig.class)).ifPresent(conn::setAuthConfig);
|
||||
} else if (authMap.containsKey("token")) {
|
||||
tryToConvertOrFail(authMap, List.of(AccessTokenConfig.class)).ifPresent(conn::setAuthConfig);
|
||||
} else if (authMap.containsKey("credentials")) {
|
||||
tryToConvertOrFail(authMap, List.of(GcpCredentialsConfig.class))
|
||||
.ifPresent(conn::setAuthConfig);
|
||||
if (conn.getAuthConfig() instanceof GcpCredentialsConfig gcpCfg) {
|
||||
tryToConvertOrFail(gcpCfg.getCredentials(), List.of(GCPCredentials.class))
|
||||
.ifPresent(obj -> gcpCfg.setCredentials((GCPCredentials) obj));
|
||||
}
|
||||
} else if (authMap.containsKey("mwaaConfig")) {
|
||||
tryToConvertOrFail(authMap, List.of(MWAAAuthConfig.class)).ifPresent(conn::setAuthConfig);
|
||||
if (conn.getAuthConfig() instanceof MWAAAuthConfig mwaaCfg) {
|
||||
if (mwaaCfg.getMwaaConfig() != null && mwaaCfg.getMwaaConfig().getAwsConfig() != null) {
|
||||
tryToConvertOrFail(mwaaCfg.getMwaaConfig().getAwsConfig(), List.of(AWSCredentials.class))
|
||||
.ifPresent(obj -> mwaaCfg.getMwaaConfig().setAwsConfig((AWSCredentials) obj));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
|
|
@ -55,6 +55,7 @@ import org.openmetadata.schema.services.connections.drive.GoogleDriveConnection;
|
|||
import org.openmetadata.schema.services.connections.mlmodel.VertexAIConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirbyteConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.MatillionConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.MulesoftConnection;
|
||||
import org.openmetadata.schema.services.connections.pipeline.NifiConnection;
|
||||
|
|
@ -77,6 +78,7 @@ public final class ClassConverterFactory {
|
|||
Map.ofEntries(
|
||||
Map.entry(AirbyteConnection.class, new AirbyteConnectionClassConverter()),
|
||||
Map.entry(AirflowConnection.class, new AirflowConnectionClassConverter()),
|
||||
Map.entry(AirflowRestApiConnection.class, new AirflowRestApiConnectionClassConverter()),
|
||||
Map.entry(BigQueryConnection.class, new BigQueryConnectionClassConverter()),
|
||||
Map.entry(BigTableConnection.class, new BigTableConnectionClassConverter()),
|
||||
Map.entry(DatalakeConnection.class, new DatalakeConnectionClassConverter()),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -597,6 +597,125 @@ class OpenLineageMapperTest {
|
|||
assertTrue(description.contains("my-job"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void mapRunEvent_storageOutputDataset_resolvesContainer() {
|
||||
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
|
||||
OpenLineageInputDataset input = createInputDataset("ns", "schema.input_table");
|
||||
OpenLineageOutputDataset output = createOutputDataset("gs://my-bucket", "data/output.csv");
|
||||
event.setInputs(List.of(input));
|
||||
event.setOutputs(List.of(output));
|
||||
|
||||
EntityReference inputRef = createEntityReference("i1", "svc.db.schema.input_table");
|
||||
EntityReference containerRef =
|
||||
new EntityReference()
|
||||
.withId(
|
||||
UUID.fromString(
|
||||
"00000000-0000-0000-0000-"
|
||||
+ String.format("%012d", "c1".hashCode() & 0xFFFFFFFFL)))
|
||||
.withType("container")
|
||||
.withFullyQualifiedName("storage.my-bucket.data_output");
|
||||
|
||||
when(entityResolver.resolveTable(input)).thenReturn(inputRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(null);
|
||||
when(entityResolver.isStorageDataset("gs://my-bucket")).thenReturn(true);
|
||||
when(entityResolver.resolveContainer("gs://my-bucket", "data/output.csv"))
|
||||
.thenReturn(containerRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(inputRef);
|
||||
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
|
||||
.thenReturn(null);
|
||||
|
||||
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
|
||||
|
||||
assertEquals(1, result.size());
|
||||
assertEquals(containerRef, result.get(0).getEdge().getToEntity());
|
||||
}
|
||||
|
||||
@Test
|
||||
void mapRunEvent_storageInputDataset_resolvesContainer() {
|
||||
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
|
||||
OpenLineageInputDataset input = createInputDataset("s3://my-bucket", "data/input.parquet");
|
||||
OpenLineageOutputDataset output = createOutputDataset("ns", "schema.output_table");
|
||||
event.setInputs(List.of(input));
|
||||
event.setOutputs(List.of(output));
|
||||
|
||||
EntityReference outputRef = createEntityReference("o1", "svc.db.schema.output_table");
|
||||
EntityReference containerRef =
|
||||
new EntityReference()
|
||||
.withId(
|
||||
UUID.fromString(
|
||||
"00000000-0000-0000-0000-"
|
||||
+ String.format("%012d", "c2".hashCode() & 0xFFFFFFFFL)))
|
||||
.withType("container")
|
||||
.withFullyQualifiedName("storage.my-bucket.data_input");
|
||||
|
||||
when(entityResolver.resolveTable(input)).thenReturn(null);
|
||||
when(entityResolver.isStorageDataset("s3://my-bucket")).thenReturn(true);
|
||||
when(entityResolver.resolveContainer("s3://my-bucket", "data/input.parquet"))
|
||||
.thenReturn(containerRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(outputRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(null);
|
||||
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
|
||||
.thenReturn(null);
|
||||
|
||||
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
|
||||
|
||||
assertEquals(1, result.size());
|
||||
assertEquals(containerRef, result.get(0).getEdge().getFromEntity());
|
||||
}
|
||||
|
||||
@Test
|
||||
void mapRunEvent_columnLineageInDatasetFacets_extractsColumnLineage() {
|
||||
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
|
||||
|
||||
String inputNamespace = "input-ns";
|
||||
String inputName = "schema.input_table";
|
||||
OpenLineageInputDataset input = createInputDataset(inputNamespace, inputName);
|
||||
|
||||
String outputNamespace = "output-ns";
|
||||
String outputName = "schema.output_table";
|
||||
OpenLineageOutputDataset output = createOutputDataset(outputNamespace, outputName);
|
||||
|
||||
InputField inputField =
|
||||
new InputField().withNamespace(inputNamespace).withName(inputName).withField("src_col");
|
||||
|
||||
ColumnLineageField columnLineageField =
|
||||
new ColumnLineageField()
|
||||
.withInputFields(List.of(inputField))
|
||||
.withTransformationDescription("IDENTITY");
|
||||
|
||||
Fields fields = new Fields();
|
||||
fields.setAdditionalProperty("dst_col", columnLineageField);
|
||||
|
||||
ColumnLineageFacet columnLineageFacet = new ColumnLineageFacet().withFields(fields);
|
||||
|
||||
// Set column lineage on dataset facets (NOT outputFacets) to cover line 219
|
||||
org.openmetadata.schema.api.lineage.openlineage.DatasetFacets datasetFacets =
|
||||
new org.openmetadata.schema.api.lineage.openlineage.DatasetFacets()
|
||||
.withColumnLineage(columnLineageFacet);
|
||||
output.setFacets(datasetFacets);
|
||||
|
||||
event.setInputs(List.of(input));
|
||||
event.setOutputs(List.of(output));
|
||||
|
||||
EntityReference inputRef = createEntityReference("i1", "service.db.schema.input_table");
|
||||
EntityReference outputRef = createEntityReference("o1", "service.db.schema.output_table");
|
||||
|
||||
when(entityResolver.resolveTable(input)).thenReturn(inputRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(output), eq(UPDATED_BY))).thenReturn(outputRef);
|
||||
when(entityResolver.resolveOrCreateTable(eq(input), eq(UPDATED_BY))).thenReturn(inputRef);
|
||||
when(entityResolver.resolveOrCreatePipeline(anyString(), anyString(), eq(UPDATED_BY)))
|
||||
.thenReturn(null);
|
||||
|
||||
List<AddLineage> result = mapper.mapRunEvent(event, UPDATED_BY);
|
||||
|
||||
assertEquals(1, result.size());
|
||||
List<ColumnLineage> columnLineages =
|
||||
result.get(0).getEdge().getLineageDetails().getColumnsLineage();
|
||||
assertNotNull(columnLineages);
|
||||
assertEquals(1, columnLineages.size());
|
||||
assertEquals("service.db.schema.output_table.dst_col", columnLineages.get(0).getToColumn());
|
||||
}
|
||||
|
||||
@Test
|
||||
void mapRunEvent_noColumnLineageFacet_noColumnLineageInResult() {
|
||||
OpenLineageRunEvent event = createBaseEvent(EventType.COMPLETE);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright 2021 Collate
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.openmetadata.service.secrets.converter;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.openmetadata.schema.entity.utils.common.AccessTokenConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.BasicAuthConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig;
|
||||
import org.openmetadata.schema.entity.utils.common.MWAAAuthConfig;
|
||||
import org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection;
|
||||
|
||||
class AirflowRestApiConnectionClassConverterTest {
|
||||
|
||||
private final AirflowRestApiConnectionClassConverter converter =
|
||||
new AirflowRestApiConnectionClassConverter();
|
||||
|
||||
@Test
|
||||
void convert_basicAuth_convertsAuthConfig() {
|
||||
Map<String, Object> authMap = new HashMap<>();
|
||||
authMap.put("username", "admin");
|
||||
authMap.put("password", "secret");
|
||||
|
||||
Map<String, Object> connMap = new HashMap<>();
|
||||
connMap.put("authConfig", authMap);
|
||||
|
||||
Object result = converter.convert(connMap);
|
||||
|
||||
assertInstanceOf(AirflowRestApiConnection.class, result);
|
||||
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
|
||||
assertInstanceOf(BasicAuthConfig.class, conn.getAuthConfig());
|
||||
BasicAuthConfig auth = (BasicAuthConfig) conn.getAuthConfig();
|
||||
assertEquals("admin", auth.getUsername());
|
||||
assertEquals("secret", auth.getPassword());
|
||||
}
|
||||
|
||||
@Test
|
||||
void convert_accessToken_convertsAuthConfig() {
|
||||
Map<String, Object> authMap = new HashMap<>();
|
||||
authMap.put("token", "my-access-token");
|
||||
|
||||
Map<String, Object> connMap = new HashMap<>();
|
||||
connMap.put("authConfig", authMap);
|
||||
|
||||
Object result = converter.convert(connMap);
|
||||
|
||||
assertInstanceOf(AirflowRestApiConnection.class, result);
|
||||
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
|
||||
assertInstanceOf(AccessTokenConfig.class, conn.getAuthConfig());
|
||||
AccessTokenConfig auth = (AccessTokenConfig) conn.getAuthConfig();
|
||||
assertEquals("my-access-token", auth.getToken());
|
||||
}
|
||||
|
||||
@Test
|
||||
void convert_gcpCredentials_convertsAuthConfig() {
|
||||
Map<String, Object> gcpValues = new HashMap<>();
|
||||
gcpValues.put("type", "service_account");
|
||||
gcpValues.put("projectId", "my-project");
|
||||
|
||||
Map<String, Object> gcpCreds = new HashMap<>();
|
||||
gcpCreds.put("gcpConfig", gcpValues);
|
||||
|
||||
Map<String, Object> authMap = new HashMap<>();
|
||||
authMap.put("credentials", gcpCreds);
|
||||
|
||||
Map<String, Object> connMap = new HashMap<>();
|
||||
connMap.put("authConfig", authMap);
|
||||
|
||||
Object result = converter.convert(connMap);
|
||||
|
||||
assertInstanceOf(AirflowRestApiConnection.class, result);
|
||||
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
|
||||
assertInstanceOf(GcpCredentialsConfig.class, conn.getAuthConfig());
|
||||
}
|
||||
|
||||
@Test
|
||||
void convert_mwaaAuth_convertsAuthConfig() {
|
||||
Map<String, Object> awsConfig = new HashMap<>();
|
||||
awsConfig.put("awsRegion", "us-east-1");
|
||||
awsConfig.put("awsAccessKeyId", "AKIAIOSFODNN7EXAMPLE");
|
||||
awsConfig.put("awsSecretAccessKey", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
|
||||
|
||||
Map<String, Object> mwaaConfig = new HashMap<>();
|
||||
mwaaConfig.put("mwaaEnvironmentName", "my-environment");
|
||||
mwaaConfig.put("awsConfig", awsConfig);
|
||||
|
||||
Map<String, Object> authMap = new HashMap<>();
|
||||
authMap.put("mwaaConfig", mwaaConfig);
|
||||
|
||||
Map<String, Object> connMap = new HashMap<>();
|
||||
connMap.put("authConfig", authMap);
|
||||
|
||||
Object result = converter.convert(connMap);
|
||||
|
||||
assertInstanceOf(AirflowRestApiConnection.class, result);
|
||||
AirflowRestApiConnection conn = (AirflowRestApiConnection) result;
|
||||
assertInstanceOf(MWAAAuthConfig.class, conn.getAuthConfig());
|
||||
MWAAAuthConfig auth = (MWAAAuthConfig) conn.getAuthConfig();
|
||||
assertNotNull(auth.getMwaaConfig());
|
||||
assertEquals("my-environment", auth.getMwaaConfig().getMwaaEnvironmentName());
|
||||
}
|
||||
|
||||
@Test
|
||||
void convert_nullAuthConfig_returnsConnectionWithoutConversion() {
|
||||
// When authConfig is null, it's not a Map instance, so line 40 (early return) is hit
|
||||
Map<String, Object> connMap = new HashMap<>();
|
||||
connMap.put("authConfig", null);
|
||||
|
||||
Object result = converter.convert(connMap);
|
||||
|
||||
assertInstanceOf(AirflowRestApiConnection.class, result);
|
||||
}
|
||||
}
|
||||
|
|
@ -295,6 +295,9 @@
|
|||
},
|
||||
"ownership": {
|
||||
"$ref": "#/definitions/ownershipFacet"
|
||||
},
|
||||
"columnLineage": {
|
||||
"$ref": "#/definitions/columnLineageFacet"
|
||||
}
|
||||
},
|
||||
"additionalProperties": true
|
||||
|
|
|
|||
|
|
@ -33,9 +33,12 @@
|
|||
"default": "10"
|
||||
},
|
||||
"connection": {
|
||||
"title": "Metadata Database Connection",
|
||||
"description": "Underlying database connection. See https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for supported backends.",
|
||||
"title": "Airflow Connection",
|
||||
"description": "Choose between database connection or REST API connection to fetch metadata from Airflow.",
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "../../../utils/airflowRestApiConnection.json"
|
||||
},
|
||||
{
|
||||
"$ref": "backendConnection.json"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/utils/airflowRestApiConnection.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "AirflowRestApiConnection",
|
||||
"description": "Airflow REST API Connection Config for connecting via REST API.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.services.connections.pipeline.AirflowRestApiConnection",
|
||||
"definitions": {
|
||||
"ApiVersion": {
|
||||
"description": "Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect the version automatically.",
|
||||
"type": "string",
|
||||
"enum": ["v1", "v2", "auto"],
|
||||
"default": "auto"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"type": {
|
||||
"title": "Service Type",
|
||||
"description": "Service Type",
|
||||
"type": "string",
|
||||
"enum": ["RestAPI"],
|
||||
"default": "RestAPI"
|
||||
},
|
||||
"authConfig": {
|
||||
"title": "Authentication Configuration",
|
||||
"description": "Choose an authentication method: Basic Auth (username/password), Access Token, GCP Service Account (for Cloud Composer), or AWS Credentials (for MWAA).",
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "./common/basicAuthConfig.json"
|
||||
},
|
||||
{
|
||||
"$ref": "./common/accessTokenConfig.json"
|
||||
},
|
||||
{
|
||||
"$ref": "./common/gcpCredentialsConfig.json"
|
||||
},
|
||||
{
|
||||
"$ref": "./common/mwaaAuthConfig.json"
|
||||
}
|
||||
]
|
||||
},
|
||||
"apiVersion": {
|
||||
"title": "API Version",
|
||||
"description": "Airflow REST API version.",
|
||||
"$ref": "#/definitions/ApiVersion",
|
||||
"default": "auto"
|
||||
},
|
||||
"verifySSL": {
|
||||
"title": "Verify SSL",
|
||||
"description": "Whether to verify SSL certificates when connecting to the Airflow API.",
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
}
|
||||
},
|
||||
"required": ["authConfig"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/utils/common/accessTokenConfig.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Access Token",
|
||||
"description": "Static access token for Airflow API authentication.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.entity.utils.common.AccessTokenConfig",
|
||||
"properties": {
|
||||
"token": {
|
||||
"title": "Token",
|
||||
"description": "Static access token for Airflow API authentication.",
|
||||
"type": "string",
|
||||
"format": "password"
|
||||
}
|
||||
},
|
||||
"required": ["token"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/utils/common/basicAuthConfig.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "Basic Auth",
|
||||
"description": "Username and password for Airflow API authentication.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.entity.utils.common.BasicAuthConfig",
|
||||
"properties": {
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"description": "Username for basic authentication to the Airflow API.",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"title": "Password",
|
||||
"description": "Password for basic authentication to the Airflow API.",
|
||||
"type": "string",
|
||||
"format": "password"
|
||||
}
|
||||
},
|
||||
"required": ["username", "password"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/utils/common/gcpCredentialsConfig.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "GCP Service Account",
|
||||
"description": "GCP credentials for Google Cloud Composer. Supports service account values, credentials path, workload identity (external account), and ADC. Tokens are auto-refreshed at runtime.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.entity.utils.common.GcpCredentialsConfig",
|
||||
"properties": {
|
||||
"credentials": {
|
||||
"title": "GCP Credentials",
|
||||
"description": "GCP credentials configuration.",
|
||||
"$ref": "../../../security/credentials/gcpCredentials.json"
|
||||
}
|
||||
},
|
||||
"required": ["credentials"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
{
|
||||
"$id": "https://open-metadata.org/schema/entity/utils/common/mwaaAuthConfig.json",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "MWAA Authentication",
|
||||
"description": "AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.entity.utils.common.MWAAAuthConfig",
|
||||
"properties": {
|
||||
"mwaaConfig": {
|
||||
"title": "MWAA Configuration",
|
||||
"description": "MWAA credentials and environment configuration.",
|
||||
"type": "object",
|
||||
"javaType": "org.openmetadata.schema.entity.utils.common.MWAAConfig",
|
||||
"properties": {
|
||||
"mwaaEnvironmentName": {
|
||||
"title": "MWAA Environment Name",
|
||||
"description": "The name of your MWAA environment.",
|
||||
"type": "string"
|
||||
},
|
||||
"awsConfig": {
|
||||
"title": "AWS Configuration",
|
||||
"description": "AWS credentials for generating MWAA CLI token.",
|
||||
"$ref": "../../../security/credentials/awsCredentials.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"mwaaEnvironmentName",
|
||||
"awsConfig"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"mwaaConfig"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
|
@ -37,6 +37,165 @@ Note that the **Backend Connection** is only used to extract metadata from a DAG
|
|||
|
||||
$$
|
||||
|
||||
## Airflow REST API Connection
|
||||
|
||||
The REST API connection calls the Airflow web server over HTTP/HTTPS and does not require direct access to Airflow's metadata database. This makes it the right choice for managed deployments (Astronomer, GCP Cloud Composer, MWAA) and for any self-hosted Airflow where direct DB access is not available or desired.
|
||||
|
||||
$$note
|
||||
The REST API connection fetches DAG topology, task structure, schedules, and run statuses. **Lineage is not captured through this connection.** To get table-level and column-level lineage in OpenMetadata, you must separately install the <a href="https://docs.open-metadata.org/connectors/pipeline/airflow/lineage-backend" target="_blank">OpenMetadata Lineage Backend</a> in Airflow (strategy 2) or use the <a href="https://docs.open-metadata.org/connectors/pipeline/airflow/lineage-operator" target="_blank">Lineage Operator</a> in your DAGs (strategy 3). Once those emit OpenLineage events, lineage edges will appear automatically in OpenMetadata.
|
||||
$$
|
||||
|
||||
### Host URL Format by Deployment
|
||||
|
||||
| Deployment | Example Host and Port URL |
|
||||
|---|---|
|
||||
| Self-hosted / Docker (ingestion runs on the host) | `http://localhost:8080` |
|
||||
| Self-hosted / Docker (ingestion runs inside Docker) | `http://host.docker.internal:8080` |
|
||||
| Google Cloud Composer | `https://ko82752sdo9f7zjf811c682mw1e5uuc9-dot-us-east1.composer.googleusercontent.com` |
|
||||
| Astronomer | `https://cmn4c1zax823t00qf36gnlquw.ay.astronomer.run/v13jlquw/` |
|
||||
| Amazon MWAA | `https://a1234awd1-5324-6f89-9523-1sq41234adqa.c2.airflow.eu-north-1.on.aws` |
|
||||
|
||||
For **Cloud Composer**, find the web server URL in GCP Console → **Composer → Environments → Open Airflow UI**. Copy the base URL (omit any trailing path).
|
||||
|
||||
For **Astronomer**, find your deployment URL in the Astronomer UI → **Deployments → Open Airflow**. Do **not** include a trailing slash.
|
||||
|
||||
### When to Use REST API vs. a Database Connection
|
||||
|
||||
Use the **REST API connection** when:
|
||||
- You are on Astronomer (DB access is unavailable).
|
||||
- You are on Cloud Composer or MWAA (DB access is unavailable or impractical).
|
||||
- You are running Airflow 3.x.
|
||||
- You do not have direct network access to the underlying MySQL / Postgres / SQLite metadata DB.
|
||||
|
||||
Use a **Database connection** (MySQL / Postgres / SQLite sections below) when:
|
||||
- You self-host Airflow and have direct access to the metadata DB.
|
||||
- You want to read raw task-instance data directly from the DB rather than via the API.
|
||||
- You are using the Backend Connection strategy (Airflow plugin / Lineage Backend approach).
|
||||
|
||||
$$section
|
||||
### Authentication Configuration $(id="authConfig")
|
||||
|
||||
Select the authentication method for the Airflow REST API. Pick one of the three options from the dropdown — the corresponding fields will appear:
|
||||
|
||||
- **Basic Auth**: Enter a username and password. For Airflow 3.x, a short-lived JWT is automatically exchanged at startup; for Airflow 2.x, HTTP Basic auth is used directly.
|
||||
- **Access Token**: Paste a static bearer token you have generated in Airflow.
|
||||
- **GCP Service Account**: Recommended for **Google Cloud Composer**. GCP OAuth2 tokens are fetched and auto-refreshed at runtime via `google-auth` — tokens never expire mid-run.
|
||||
- **MWAA Configuration**: AWS credentials used to authenticate with Amazon Managed Workflows for Apache Airflow (MWAA).
|
||||
|
||||
$$
|
||||
|
||||
### Authentication Quick Reference
|
||||
|
||||
| Deployment | Recommended Auth |
|
||||
|---|---|
|
||||
| Self-hosted Airflow 2.x or 3.x | Basic Auth |
|
||||
| Astronomer | Access Token (Deployment API token) |
|
||||
| Google Cloud Composer | GCP Service Account |
|
||||
| Any deployment with a pre-generated bearer token | Access Token |
|
||||
|
||||
$$section
|
||||
### Username $(id="username")
|
||||
|
||||
Username for Basic Auth. The user must have permission to call the Airflow REST API.
|
||||
|
||||
For Airflow 3.x this triggers an automatic JWT exchange (`POST /auth/token`). For Airflow 2.x, HTTP Basic auth is used directly.
|
||||
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Password $(id="password")
|
||||
|
||||
Password for Basic Auth.
|
||||
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Token $(id="token")
|
||||
|
||||
Static bearer token for Access Token authentication. Paste the token value here — it will be sent as `Authorization: Bearer <token>` on every request.
|
||||
|
||||
Use this when you have generated a long-lived API token in your Airflow deployment.
|
||||
|
||||
$$
|
||||
|
||||
### Generating an Astronomer Deployment Token
|
||||
|
||||
For **Astronomer** deployments, use Access Token auth with a Deployment API token:
|
||||
1. Open the Astronomer UI and navigate to **Deployments**.
|
||||
2. Select your deployment and go to **API Keys** or **Tokens** (the exact label depends on your Astronomer version).
|
||||
3. Click **Add API Key** / **Generate Token**, give it a descriptive name (e.g. `openmetadata-ingestion`), and copy the value.
|
||||
4. Paste it in the **Token** field above.
|
||||
|
||||
For self-hosted Airflow, you can generate an API token via the Airflow UI under **Admin → Users** or via the Airflow CLI.
|
||||
|
||||
|
||||
$$section
|
||||
|
||||
### MWAA Configuration $(id="mwaaConfig")
|
||||
|
||||
AWS credentials used to authenticate with Amazon Managed Workflows for Apache Airflow (MWAA).
|
||||
|
||||
The authentication requires the MWAA Environment Name and an AWS configuration.
|
||||
|
||||
#### Configuration Fields
|
||||
**MWAA Environment Name**: The name of the Amazon MWAA environment to connect to.
|
||||
**AWS Region**: The AWS region where the MWAA environment is deployed.
|
||||
**AWS Access Key ID**: The access key used to authenticate with AWS.
|
||||
**AWS Secret Access Key**: The secret key associated with the AWS access key.
|
||||
**AWS Session Token (Optional)**: Required when using temporary AWS credentials.
|
||||
**Assume Role ARN (Optional)**: ARN of IAM role to assume for cross-account access.
|
||||
**Assume Role Session Name (Optional)**: Session name for assumed role.
|
||||
**Endpoint URL (Optional)**: Custom endpoint URL for AWS-compatible services (MinIO, LocalStack).
|
||||
|
||||
$$
|
||||
|
||||
$$section
|
||||
### GCP Credentials $(id="credentials")
|
||||
|
||||
GCP credentials used to obtain short-lived OAuth2 tokens for authenticating with Google Cloud Composer. Tokens are automatically refreshed when they expire, so ingestion runs are never interrupted by token expiry.
|
||||
|
||||
Supports all four GCP authentication types:
|
||||
|
||||
- **GCP Credentials Values**: Paste the service account JSON fields directly (project ID, client email, private key, etc.).
|
||||
- **GCP Credentials Path**: Provide a file path to a service account JSON key file on the ingestion host.
|
||||
- **GCP External Account (Workload Identity Federation)**: For GKE or other workload identity setups.
|
||||
- **GCP ADC (Application Default Credentials)**: Uses the credentials already available in the environment (e.g. via `gcloud auth application-default login` or the GCE metadata server).
|
||||
|
||||
You can also optionally configure **service account impersonation** via `gcpImpersonateServiceAccount`.
|
||||
|
||||
$$
|
||||
|
||||
### Finding Your Cloud Composer Airflow URL
|
||||
|
||||
In GCP Console, go to **Composer → Environments**, select your environment, and click **Open Airflow UI**. Copy the base URL (e.g. `https://<hash>-dot-<region>.composer.googleusercontent.com`) — this is what you enter in the **Host and Port** field above.
|
||||
|
||||
### Choosing a GCP Credential Type
|
||||
|
||||
| Credential Type | When to Use |
|
||||
|---|---|
|
||||
| **GCP Credentials Values** | Ingestion runs outside GCP (on-prem, local machine). Paste the service account JSON fields directly. |
|
||||
| **GCP Credentials Path** | Ingestion runs on a host where the service account JSON key file already exists at a known local path. |
|
||||
| **GCP ADC (Application Default Credentials)** | Ingestion runs on a GCE VM or GKE pod with an attached service account. Uses the GCE metadata server or `gcloud auth application-default login`. |
|
||||
| **GCP External Account (Workload Identity Federation)** | Ingestion runs on GKE with Workload Identity, or on a non-GCP system using federated identity (e.g. AWS → GCP). |
|
||||
|
||||
$$section
|
||||
### API Version $(id="apiVersion")
|
||||
|
||||
Airflow REST API version to use:
|
||||
|
||||
- **auto** (default): OpenMetadata tries `v2` first (Airflow 3.x), then falls back to `v1` (Airflow 2.x).
|
||||
- **v1**: Force Airflow 2.x API.
|
||||
- **v2**: Force Airflow 3.x API.
|
||||
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Verify SSL $(id="verifySSL")
|
||||
|
||||
Whether to verify SSL certificates when connecting to the Airflow REST API. Set to `false` only in development environments with self-signed certificates.
|
||||
|
||||
$$
|
||||
|
||||
|
||||
## MySQL Connection
|
||||
|
||||
|
|
|
|||
|
|
@ -530,7 +530,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -1197,9 +1197,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between API or database connection fetch metadata from superset.
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*
|
||||
|
|
@ -2507,6 +2506,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -2689,6 +2690,8 @@ export interface AuthenticationType {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3137,6 +3140,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3410,12 +3415,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3625,9 +3634,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Mysql Database Connection Config
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -3738,7 +3748,10 @@ export interface ConfigConnection {
|
|||
* SSL Configuration details.
|
||||
*/
|
||||
sslConfig?: ConnectionSSLConfig;
|
||||
verifySSL?: VerifySSL;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -3797,6 +3810,15 @@ export interface ConfigConnection {
|
|||
* Use slow logs to extract lineage.
|
||||
*/
|
||||
useSlowLogs?: boolean;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -3808,6 +3830,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -3873,6 +3981,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -4001,6 +4111,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -4025,6 +4136,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -4033,7 +4146,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4202,27 +4315,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ export interface OpenLineageInputDataset {
|
|||
* Dataset facets containing metadata like schema.
|
||||
*/
|
||||
export interface DatasetFacets {
|
||||
columnLineage?: ColumnLineageFacet;
|
||||
datasource?: DatasourceFacet;
|
||||
documentation?: DocumentationFacet;
|
||||
ownership?: OwnershipFacet;
|
||||
|
|
@ -111,6 +112,65 @@ export interface DatasetFacets {
|
|||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage facet describing how output columns are derived from input columns.
|
||||
*
|
||||
* Base facet that all facets extend from.
|
||||
*/
|
||||
export interface ColumnLineageFacet {
|
||||
/**
|
||||
* URI identifying the producer of this metadata.
|
||||
*/
|
||||
_producer?: string;
|
||||
/**
|
||||
* URI pointing to the schema definition for this facet.
|
||||
*/
|
||||
_schemaURL?: string;
|
||||
/**
|
||||
* Map of output field names to their lineage information.
|
||||
*/
|
||||
fields: { [key: string]: ColumnLineageField };
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage information for a single output field.
|
||||
*/
|
||||
export interface ColumnLineageField {
|
||||
/**
|
||||
* List of input fields that contribute to this output field.
|
||||
*/
|
||||
inputFields: InputField[];
|
||||
/**
|
||||
* Human-readable description of the transformation.
|
||||
*/
|
||||
transformationDescription?: string;
|
||||
/**
|
||||
* Type of transformation (e.g., DIRECT, AGGREGATION).
|
||||
*/
|
||||
transformationType?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* A reference to an input column in column lineage.
|
||||
*/
|
||||
export interface InputField {
|
||||
/**
|
||||
* The name of the input field/column.
|
||||
*/
|
||||
field: string;
|
||||
/**
|
||||
* The name of the input dataset.
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
* The namespace of the input dataset.
|
||||
*/
|
||||
namespace: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Datasource facet providing connection details for the dataset.
|
||||
*
|
||||
|
|
@ -357,65 +417,6 @@ export interface OutputDatasetFacets {
|
|||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage facet describing how output columns are derived from input columns.
|
||||
*
|
||||
* Base facet that all facets extend from.
|
||||
*/
|
||||
export interface ColumnLineageFacet {
|
||||
/**
|
||||
* URI identifying the producer of this metadata.
|
||||
*/
|
||||
_producer?: string;
|
||||
/**
|
||||
* URI pointing to the schema definition for this facet.
|
||||
*/
|
||||
_schemaURL?: string;
|
||||
/**
|
||||
* Map of output field names to their lineage information.
|
||||
*/
|
||||
fields: { [key: string]: ColumnLineageField };
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage information for a single output field.
|
||||
*/
|
||||
export interface ColumnLineageField {
|
||||
/**
|
||||
* List of input fields that contribute to this output field.
|
||||
*/
|
||||
inputFields: InputField[];
|
||||
/**
|
||||
* Human-readable description of the transformation.
|
||||
*/
|
||||
transformationDescription?: string;
|
||||
/**
|
||||
* Type of transformation (e.g., DIRECT, AGGREGATION).
|
||||
*/
|
||||
transformationType?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* A reference to an input column in column lineage.
|
||||
*/
|
||||
export interface InputField {
|
||||
/**
|
||||
* The name of the input field/column.
|
||||
*/
|
||||
field: string;
|
||||
/**
|
||||
* The name of the input dataset.
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
* The namespace of the input dataset.
|
||||
*/
|
||||
namespace: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* The run this event is about.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ export interface OpenLineageInputDataset {
|
|||
* Dataset facets containing metadata like schema.
|
||||
*/
|
||||
export interface DatasetFacets {
|
||||
columnLineage?: ColumnLineageFacet;
|
||||
datasource?: DatasourceFacet;
|
||||
documentation?: DocumentationFacet;
|
||||
ownership?: OwnershipFacet;
|
||||
|
|
@ -101,6 +102,65 @@ export interface DatasetFacets {
|
|||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage facet describing how output columns are derived from input columns.
|
||||
*
|
||||
* Base facet that all facets extend from.
|
||||
*/
|
||||
export interface ColumnLineageFacet {
|
||||
/**
|
||||
* URI identifying the producer of this metadata.
|
||||
*/
|
||||
_producer?: string;
|
||||
/**
|
||||
* URI pointing to the schema definition for this facet.
|
||||
*/
|
||||
_schemaURL?: string;
|
||||
/**
|
||||
* Map of output field names to their lineage information.
|
||||
*/
|
||||
fields: { [key: string]: ColumnLineageField };
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage information for a single output field.
|
||||
*/
|
||||
export interface ColumnLineageField {
|
||||
/**
|
||||
* List of input fields that contribute to this output field.
|
||||
*/
|
||||
inputFields: InputField[];
|
||||
/**
|
||||
* Human-readable description of the transformation.
|
||||
*/
|
||||
transformationDescription?: string;
|
||||
/**
|
||||
* Type of transformation (e.g., DIRECT, AGGREGATION).
|
||||
*/
|
||||
transformationType?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* A reference to an input column in column lineage.
|
||||
*/
|
||||
export interface InputField {
|
||||
/**
|
||||
* The name of the input field/column.
|
||||
*/
|
||||
field: string;
|
||||
/**
|
||||
* The name of the input dataset.
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
* The namespace of the input dataset.
|
||||
*/
|
||||
namespace: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Datasource facet providing connection details for the dataset.
|
||||
*
|
||||
|
|
@ -347,65 +407,6 @@ export interface OutputDatasetFacets {
|
|||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage facet describing how output columns are derived from input columns.
|
||||
*
|
||||
* Base facet that all facets extend from.
|
||||
*/
|
||||
export interface ColumnLineageFacet {
|
||||
/**
|
||||
* URI identifying the producer of this metadata.
|
||||
*/
|
||||
_producer?: string;
|
||||
/**
|
||||
* URI pointing to the schema definition for this facet.
|
||||
*/
|
||||
_schemaURL?: string;
|
||||
/**
|
||||
* Map of output field names to their lineage information.
|
||||
*/
|
||||
fields: { [key: string]: ColumnLineageField };
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* Column lineage information for a single output field.
|
||||
*/
|
||||
export interface ColumnLineageField {
|
||||
/**
|
||||
* List of input fields that contribute to this output field.
|
||||
*/
|
||||
inputFields: InputField[];
|
||||
/**
|
||||
* Human-readable description of the transformation.
|
||||
*/
|
||||
transformationDescription?: string;
|
||||
/**
|
||||
* Type of transformation (e.g., DIRECT, AGGREGATION).
|
||||
*/
|
||||
transformationType?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* A reference to an input column in column lineage.
|
||||
*/
|
||||
export interface InputField {
|
||||
/**
|
||||
* The name of the input field/column.
|
||||
*/
|
||||
field: string;
|
||||
/**
|
||||
* The name of the input dataset.
|
||||
*/
|
||||
name: string;
|
||||
/**
|
||||
* The namespace of the input dataset.
|
||||
*/
|
||||
namespace: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* The run this event is about.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -117,13 +117,12 @@ export interface PipelineConnection {
|
|||
*/
|
||||
export interface ConfigObject {
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
connection?: MetadataDatabaseConnection;
|
||||
connection?: AirflowConnection;
|
||||
/**
|
||||
* Pipeline Service Management/UI URI.
|
||||
*
|
||||
|
|
@ -454,6 +453,8 @@ export interface FluffyAuthentication {
|
|||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
|
|
@ -689,9 +690,10 @@ export interface AzureCredentials {
|
|||
}
|
||||
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -705,15 +707,28 @@ export interface AzureCredentials {
|
|||
*
|
||||
* Matillion ETL Auth Config.
|
||||
*/
|
||||
export interface MetadataDatabaseConnection {
|
||||
export interface AirflowConnection {
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Service Type
|
||||
*/
|
||||
type?: Type;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -827,6 +842,188 @@ export interface MetadataDatabaseConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GCPCredentials;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*/
|
||||
export interface GCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*
|
||||
* Pass the raw credential values provided by GCP
|
||||
*
|
||||
* Pass the path of file containing the GCP credentials info
|
||||
*
|
||||
* Use the application default credentials
|
||||
*/
|
||||
export interface GCPCredentialsConfiguration {
|
||||
/**
|
||||
* Google Cloud auth provider certificate.
|
||||
*/
|
||||
authProviderX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud auth uri.
|
||||
*/
|
||||
authUri?: string;
|
||||
/**
|
||||
* Google Cloud email.
|
||||
*/
|
||||
clientEmail?: string;
|
||||
/**
|
||||
* Google Cloud Client ID.
|
||||
*/
|
||||
clientId?: string;
|
||||
/**
|
||||
* Google Cloud client certificate uri.
|
||||
*/
|
||||
clientX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud private key.
|
||||
*/
|
||||
privateKey?: string;
|
||||
/**
|
||||
* Google Cloud private key id.
|
||||
*/
|
||||
privateKeyId?: string;
|
||||
/**
|
||||
* Project ID
|
||||
*
|
||||
* GCP Project ID to parse metadata from
|
||||
*/
|
||||
projectId?: string[] | string;
|
||||
/**
|
||||
* Google Cloud token uri.
|
||||
*/
|
||||
tokenUri?: string;
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*
|
||||
* Google Cloud Platform ADC ( Application Default Credentials )
|
||||
*/
|
||||
type?: string;
|
||||
/**
|
||||
* Path of the file containing the GCP credentials info
|
||||
*/
|
||||
path?: string;
|
||||
/**
|
||||
* Google Security Token Service audience which contains the resource name for the workload
|
||||
* identity pool and the provider identifier in that pool.
|
||||
*/
|
||||
audience?: string;
|
||||
/**
|
||||
* This object defines the mechanism used to retrieve the external credential from the local
|
||||
* environment so that it can be exchanged for a GCP access token via the STS endpoint
|
||||
*/
|
||||
credentialSource?: { [key: string]: string };
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*/
|
||||
externalType?: string;
|
||||
/**
|
||||
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
|
||||
* spec.
|
||||
*/
|
||||
subjectTokenType?: string;
|
||||
/**
|
||||
* Google Security Token Service token exchange endpoint.
|
||||
*/
|
||||
tokenURL?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*
|
||||
* Pass the values to impersonate a service account of Google Cloud
|
||||
*/
|
||||
export interface GCPImpersonateServiceAccountValues {
|
||||
/**
|
||||
* The impersonated service account email
|
||||
*/
|
||||
impersonateServiceAccount?: string;
|
||||
/**
|
||||
* Number of seconds the delegated credential should be valid
|
||||
*/
|
||||
lifetime?: number;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -908,6 +1105,8 @@ export interface DataStorageConfig {
|
|||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
|
|
@ -993,6 +1192,7 @@ export enum Type {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2235,6 +2235,8 @@ export interface DBTPrefixConfig {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -2249,6 +2251,8 @@ export interface DBTPrefixConfig {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3549,9 +3553,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
|
|
@ -3772,7 +3775,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -5001,6 +5004,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -5183,6 +5188,8 @@ export interface AuthenticationTypeForTableau {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -5631,6 +5638,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -5916,9 +5925,10 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -5965,8 +5975,11 @@ export interface ConfigConnection {
|
|||
* Username to connect to the Matillion. This user should have privileges to read all the
|
||||
* metadata in Matillion.
|
||||
*/
|
||||
username?: string;
|
||||
verifySSL?: VerifySSL;
|
||||
username?: string;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -6086,6 +6099,15 @@ export interface ConfigConnection {
|
|||
* <USERNAME> <PASSWORD>`
|
||||
*/
|
||||
userKey?: string;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -6097,6 +6119,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -6162,6 +6270,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -6290,6 +6400,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -6314,6 +6425,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -6322,7 +6435,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -6491,27 +6604,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -412,7 +412,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -1079,9 +1079,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between API or database connection fetch metadata from superset.
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*
|
||||
|
|
@ -2389,6 +2388,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -2571,6 +2572,8 @@ export interface AuthenticationType {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3019,6 +3022,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3292,12 +3297,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3507,9 +3516,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Mysql Database Connection Config
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -3620,7 +3630,10 @@ export interface ConfigConnection {
|
|||
* SSL Configuration details.
|
||||
*/
|
||||
sslConfig?: ConnectionSSLConfig;
|
||||
verifySSL?: VerifySSL;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -3679,6 +3692,15 @@ export interface ConfigConnection {
|
|||
* Use slow logs to extract lineage.
|
||||
*/
|
||||
useSlowLogs?: boolean;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -3690,6 +3712,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -3755,6 +3863,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -3883,6 +3993,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -3907,6 +4018,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3915,7 +4028,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4084,27 +4197,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -1065,7 +1065,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -1732,9 +1732,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between API or database connection fetch metadata from superset.
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*
|
||||
|
|
@ -2940,6 +2939,8 @@ export enum AuthMechanismEnum {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -3122,6 +3123,8 @@ export interface AuthenticationType {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3531,6 +3534,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3804,12 +3809,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -4019,9 +4028,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Mysql Database Connection Config
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -4132,7 +4142,10 @@ export interface ConfigConnection {
|
|||
* SSL Configuration details.
|
||||
*/
|
||||
sslConfig?: ConnectionSSLConfig;
|
||||
verifySSL?: VerifySSL;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -4191,6 +4204,15 @@ export interface ConfigConnection {
|
|||
* Use slow logs to extract lineage.
|
||||
*/
|
||||
useSlowLogs?: boolean;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -4202,6 +4224,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -4267,6 +4375,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -4395,6 +4505,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -4406,6 +4517,8 @@ export enum ConnectionType {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -4414,7 +4527,7 @@ export enum ConnectionType {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4583,27 +4696,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -15,11 +15,9 @@
|
|||
*/
|
||||
export interface AirflowConnection {
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from Airflow.
|
||||
*/
|
||||
connection: MetadataDatabaseConnection;
|
||||
connection: AirflowConnectionClass;
|
||||
/**
|
||||
* Pipeline Service Management/UI URI.
|
||||
*/
|
||||
|
|
@ -40,9 +38,10 @@ export interface AirflowConnection {
|
|||
}
|
||||
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -52,15 +51,28 @@ export interface AirflowConnection {
|
|||
*
|
||||
* SQLite Database Connection Config
|
||||
*/
|
||||
export interface MetadataDatabaseConnection {
|
||||
export interface AirflowConnectionClass {
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Service Type
|
||||
*/
|
||||
type?: Type;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -168,24 +180,190 @@ export interface MetadataDatabaseConnection {
|
|||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Common Database Connection Config
|
||||
*
|
||||
* IAM Auth Database Connection Config
|
||||
*
|
||||
* Azure Database Connection Config
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export interface AuthConfigurationType {
|
||||
/**
|
||||
* Password to connect to source.
|
||||
*/
|
||||
password?: string;
|
||||
awsConfig?: AWSCredentials;
|
||||
azureConfig?: AzureCredentials;
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GCPCredentials;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*/
|
||||
export interface GCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*
|
||||
* Pass the raw credential values provided by GCP
|
||||
*
|
||||
* Pass the path of file containing the GCP credentials info
|
||||
*
|
||||
* Use the application default credentials
|
||||
*/
|
||||
export interface GCPCredentialsConfiguration {
|
||||
/**
|
||||
* Google Cloud auth provider certificate.
|
||||
*/
|
||||
authProviderX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud auth uri.
|
||||
*/
|
||||
authUri?: string;
|
||||
/**
|
||||
* Google Cloud email.
|
||||
*/
|
||||
clientEmail?: string;
|
||||
/**
|
||||
* Google Cloud Client ID.
|
||||
*/
|
||||
clientId?: string;
|
||||
/**
|
||||
* Google Cloud client certificate uri.
|
||||
*/
|
||||
clientX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud private key.
|
||||
*/
|
||||
privateKey?: string;
|
||||
/**
|
||||
* Google Cloud private key id.
|
||||
*/
|
||||
privateKeyId?: string;
|
||||
/**
|
||||
* Project ID
|
||||
*
|
||||
* GCP Project ID to parse metadata from
|
||||
*/
|
||||
projectId?: string[] | string;
|
||||
/**
|
||||
* Google Cloud token uri.
|
||||
*/
|
||||
tokenUri?: string;
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*
|
||||
* Google Cloud Platform ADC ( Application Default Credentials )
|
||||
*/
|
||||
type?: string;
|
||||
/**
|
||||
* Path of the file containing the GCP credentials info
|
||||
*/
|
||||
path?: string;
|
||||
/**
|
||||
* Google Security Token Service audience which contains the resource name for the workload
|
||||
* identity pool and the provider identifier in that pool.
|
||||
*/
|
||||
audience?: string;
|
||||
/**
|
||||
* This object defines the mechanism used to retrieve the external credential from the local
|
||||
* environment so that it can be exchanged for a GCP access token via the STS endpoint
|
||||
*/
|
||||
credentialSource?: { [key: string]: string };
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*/
|
||||
externalType?: string;
|
||||
/**
|
||||
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
|
||||
* spec.
|
||||
*/
|
||||
subjectTokenType?: string;
|
||||
/**
|
||||
* Google Security Token Service token exchange endpoint.
|
||||
*/
|
||||
tokenURL?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*
|
||||
* Pass the values to impersonate a service account of Google Cloud
|
||||
*/
|
||||
export interface GCPImpersonateServiceAccountValues {
|
||||
/**
|
||||
* The impersonated service account email
|
||||
*/
|
||||
impersonateServiceAccount?: string;
|
||||
/**
|
||||
* Number of seconds the delegated credential should be valid
|
||||
*/
|
||||
lifetime?: number;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -237,6 +415,24 @@ export interface AWSCredentials {
|
|||
profileName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
* Common Database Connection Config
|
||||
*
|
||||
* IAM Auth Database Connection Config
|
||||
*
|
||||
* Azure Database Connection Config
|
||||
*/
|
||||
export interface AuthConfigurationType {
|
||||
/**
|
||||
* Password to connect to source.
|
||||
*/
|
||||
password?: string;
|
||||
awsConfig?: AWSCredentials;
|
||||
azureConfig?: AzureCredentials;
|
||||
}
|
||||
|
||||
/**
|
||||
* Azure Cloud Credentials
|
||||
*/
|
||||
|
|
@ -324,6 +520,8 @@ export interface DataStorageConfig {
|
|||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -428,6 +626,7 @@ export enum Type {
|
|||
Backend = "Backend",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -837,9 +837,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
|
|
@ -1060,7 +1059,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -2372,6 +2371,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -2554,6 +2555,8 @@ export interface AuthenticationTypeForTableau {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3002,6 +3005,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3275,12 +3280,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3492,9 +3501,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -3541,8 +3551,11 @@ export interface ConfigConnection {
|
|||
* Username to connect to the Matillion. This user should have privileges to read all the
|
||||
* metadata in Matillion.
|
||||
*/
|
||||
username?: string;
|
||||
verifySSL?: VerifySSL;
|
||||
username?: string;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -3662,6 +3675,15 @@ export interface ConfigConnection {
|
|||
* <USERNAME> <PASSWORD>`
|
||||
*/
|
||||
userKey?: string;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -3673,6 +3695,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -3738,6 +3846,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -3866,6 +3976,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -3890,6 +4001,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3898,7 +4011,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4067,27 +4180,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -2818,6 +2818,8 @@ export interface DBTPrefixConfig {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -2832,6 +2834,8 @@ export interface DBTPrefixConfig {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -4132,9 +4136,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
|
|
@ -4355,7 +4358,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -5565,6 +5568,8 @@ export enum AuthMechanismEnum {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -5747,6 +5752,8 @@ export interface AuthenticationTypeForTableau {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -6156,6 +6163,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -6441,9 +6450,10 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -6490,8 +6500,11 @@ export interface ConfigConnection {
|
|||
* Username to connect to the Matillion. This user should have privileges to read all the
|
||||
* metadata in Matillion.
|
||||
*/
|
||||
username?: string;
|
||||
verifySSL?: VerifySSL;
|
||||
username?: string;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -6611,6 +6624,15 @@ export interface ConfigConnection {
|
|||
* <USERNAME> <PASSWORD>`
|
||||
*/
|
||||
userKey?: string;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -6622,6 +6644,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -6687,6 +6795,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -6815,6 +6925,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -6826,6 +6937,8 @@ export enum ConnectionType {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -6834,7 +6947,7 @@ export enum ConnectionType {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -7003,27 +7116,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -235,13 +235,12 @@ export interface PipelineConnection {
|
|||
*/
|
||||
export interface ConfigObject {
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
connection?: MetadataDatabaseConnection;
|
||||
connection?: AirflowConnection;
|
||||
/**
|
||||
* Pipeline Service Management/UI URI.
|
||||
*
|
||||
|
|
@ -572,6 +571,8 @@ export interface FluffyAuthentication {
|
|||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
|
|
@ -807,9 +808,10 @@ export interface AzureCredentials {
|
|||
}
|
||||
|
||||
/**
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -823,15 +825,28 @@ export interface AzureCredentials {
|
|||
*
|
||||
* Matillion ETL Auth Config.
|
||||
*/
|
||||
export interface MetadataDatabaseConnection {
|
||||
export interface AirflowConnection {
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Service Type
|
||||
*/
|
||||
type?: Type;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
pipelineFilterPattern?: FilterPattern;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -945,6 +960,188 @@ export interface MetadataDatabaseConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GCPCredentials;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*/
|
||||
export interface GCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*
|
||||
* Pass the raw credential values provided by GCP
|
||||
*
|
||||
* Pass the path of file containing the GCP credentials info
|
||||
*
|
||||
* Use the application default credentials
|
||||
*/
|
||||
export interface GCPCredentialsConfiguration {
|
||||
/**
|
||||
* Google Cloud auth provider certificate.
|
||||
*/
|
||||
authProviderX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud auth uri.
|
||||
*/
|
||||
authUri?: string;
|
||||
/**
|
||||
* Google Cloud email.
|
||||
*/
|
||||
clientEmail?: string;
|
||||
/**
|
||||
* Google Cloud Client ID.
|
||||
*/
|
||||
clientId?: string;
|
||||
/**
|
||||
* Google Cloud client certificate uri.
|
||||
*/
|
||||
clientX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud private key.
|
||||
*/
|
||||
privateKey?: string;
|
||||
/**
|
||||
* Google Cloud private key id.
|
||||
*/
|
||||
privateKeyId?: string;
|
||||
/**
|
||||
* Project ID
|
||||
*
|
||||
* GCP Project ID to parse metadata from
|
||||
*/
|
||||
projectId?: string[] | string;
|
||||
/**
|
||||
* Google Cloud token uri.
|
||||
*/
|
||||
tokenUri?: string;
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*
|
||||
* Google Cloud Platform ADC ( Application Default Credentials )
|
||||
*/
|
||||
type?: string;
|
||||
/**
|
||||
* Path of the file containing the GCP credentials info
|
||||
*/
|
||||
path?: string;
|
||||
/**
|
||||
* Google Security Token Service audience which contains the resource name for the workload
|
||||
* identity pool and the provider identifier in that pool.
|
||||
*/
|
||||
audience?: string;
|
||||
/**
|
||||
* This object defines the mechanism used to retrieve the external credential from the local
|
||||
* environment so that it can be exchanged for a GCP access token via the STS endpoint
|
||||
*/
|
||||
credentialSource?: { [key: string]: string };
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*/
|
||||
externalType?: string;
|
||||
/**
|
||||
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
|
||||
* spec.
|
||||
*/
|
||||
subjectTokenType?: string;
|
||||
/**
|
||||
* Google Security Token Service token exchange endpoint.
|
||||
*/
|
||||
tokenURL?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*
|
||||
* Pass the values to impersonate a service account of Google Cloud
|
||||
*/
|
||||
export interface GCPImpersonateServiceAccountValues {
|
||||
/**
|
||||
* The impersonated service account email
|
||||
*/
|
||||
impersonateServiceAccount?: string;
|
||||
/**
|
||||
* Number of seconds the delegated credential should be valid
|
||||
*/
|
||||
lifetime?: number;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -1026,6 +1223,8 @@ export interface DataStorageConfig {
|
|||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
|
|
@ -1111,6 +1310,7 @@ export enum Type {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Copyright 2026 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*/
|
||||
export interface AirflowRESTAPIConnection {
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig: AuthenticationConfiguration;
|
||||
/**
|
||||
* Service Type
|
||||
*/
|
||||
type?: ServiceType;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GCPCredentials;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*/
|
||||
export interface GCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*
|
||||
* Pass the raw credential values provided by GCP
|
||||
*
|
||||
* Pass the path of file containing the GCP credentials info
|
||||
*
|
||||
* Use the application default credentials
|
||||
*/
|
||||
export interface GCPCredentialsConfiguration {
|
||||
/**
|
||||
* Google Cloud auth provider certificate.
|
||||
*/
|
||||
authProviderX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud auth uri.
|
||||
*/
|
||||
authUri?: string;
|
||||
/**
|
||||
* Google Cloud email.
|
||||
*/
|
||||
clientEmail?: string;
|
||||
/**
|
||||
* Google Cloud Client ID.
|
||||
*/
|
||||
clientId?: string;
|
||||
/**
|
||||
* Google Cloud client certificate uri.
|
||||
*/
|
||||
clientX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud private key.
|
||||
*/
|
||||
privateKey?: string;
|
||||
/**
|
||||
* Google Cloud private key id.
|
||||
*/
|
||||
privateKeyId?: string;
|
||||
/**
|
||||
* Project ID
|
||||
*
|
||||
* GCP Project ID to parse metadata from
|
||||
*/
|
||||
projectId?: string[] | string;
|
||||
/**
|
||||
* Google Cloud token uri.
|
||||
*/
|
||||
tokenUri?: string;
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*
|
||||
* Google Cloud Platform ADC ( Application Default Credentials )
|
||||
*/
|
||||
type?: string;
|
||||
/**
|
||||
* Path of the file containing the GCP credentials info
|
||||
*/
|
||||
path?: string;
|
||||
/**
|
||||
* Google Security Token Service audience which contains the resource name for the workload
|
||||
* identity pool and the provider identifier in that pool.
|
||||
*/
|
||||
audience?: string;
|
||||
/**
|
||||
* This object defines the mechanism used to retrieve the external credential from the local
|
||||
* environment so that it can be exchanged for a GCP access token via the STS endpoint
|
||||
*/
|
||||
credentialSource?: { [key: string]: string };
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*/
|
||||
externalType?: string;
|
||||
/**
|
||||
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
|
||||
* spec.
|
||||
*/
|
||||
subjectTokenType?: string;
|
||||
/**
|
||||
* Google Security Token Service token exchange endpoint.
|
||||
*/
|
||||
tokenURL?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*
|
||||
* Pass the values to impersonate a service account of Google Cloud
|
||||
*/
|
||||
export interface GCPImpersonateServiceAccountValues {
|
||||
/**
|
||||
* The impersonated service account email
|
||||
*/
|
||||
impersonateServiceAccount?: string;
|
||||
/**
|
||||
* Number of seconds the delegated credential should be valid
|
||||
*/
|
||||
lifetime?: number;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
/**
|
||||
* The Amazon Resource Name (ARN) of the role to assume. Required Field in case of Assume
|
||||
* Role
|
||||
*/
|
||||
assumeRoleArn?: string;
|
||||
/**
|
||||
* An identifier for the assumed role session. Use the role session name to uniquely
|
||||
* identify a session when the same role is assumed by different principals or for different
|
||||
* reasons. Required Field in case of Assume Role
|
||||
*/
|
||||
assumeRoleSessionName?: string;
|
||||
/**
|
||||
* The Amazon Resource Name (ARN) of the role to assume. Optional Field in case of Assume
|
||||
* Role
|
||||
*/
|
||||
assumeRoleSourceIdentity?: string;
|
||||
/**
|
||||
* AWS Access key ID.
|
||||
*/
|
||||
awsAccessKeyId?: string;
|
||||
/**
|
||||
* AWS Region
|
||||
*/
|
||||
awsRegion: string;
|
||||
/**
|
||||
* AWS Secret Access Key.
|
||||
*/
|
||||
awsSecretAccessKey?: string;
|
||||
/**
|
||||
* AWS Session Token.
|
||||
*/
|
||||
awsSessionToken?: string;
|
||||
/**
|
||||
* Enable AWS IAM authentication. When enabled, uses the default credential provider chain
|
||||
* (environment variables, instance profile, etc.). Defaults to false for backward
|
||||
* compatibility.
|
||||
*/
|
||||
enabled?: boolean;
|
||||
/**
|
||||
* EndPoint URL for the AWS
|
||||
*/
|
||||
endPointURL?: string;
|
||||
/**
|
||||
* The name of a profile to use with the boto session.
|
||||
*/
|
||||
profileName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Service Type
|
||||
*/
|
||||
export enum ServiceType {
|
||||
RESTAPI = "RestAPI",
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright 2026 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
export interface AccessTokenConfig {
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token: string;
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright 2026 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* Username and password for Airflow API authentication.
|
||||
*/
|
||||
export interface BasicAuthConfig {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username: string;
|
||||
}
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
/*
|
||||
* Copyright 2026 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at runtime.
|
||||
*/
|
||||
export interface GcpCredentialsConfig {
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials: GCPCredentials;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP credentials configs.
|
||||
*/
|
||||
export interface GCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*
|
||||
* Pass the raw credential values provided by GCP
|
||||
*
|
||||
* Pass the path of file containing the GCP credentials info
|
||||
*
|
||||
* Use the application default credentials
|
||||
*/
|
||||
export interface GCPCredentialsConfiguration {
|
||||
/**
|
||||
* Google Cloud auth provider certificate.
|
||||
*/
|
||||
authProviderX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud auth uri.
|
||||
*/
|
||||
authUri?: string;
|
||||
/**
|
||||
* Google Cloud email.
|
||||
*/
|
||||
clientEmail?: string;
|
||||
/**
|
||||
* Google Cloud Client ID.
|
||||
*/
|
||||
clientId?: string;
|
||||
/**
|
||||
* Google Cloud client certificate uri.
|
||||
*/
|
||||
clientX509CertUrl?: string;
|
||||
/**
|
||||
* Google Cloud private key.
|
||||
*/
|
||||
privateKey?: string;
|
||||
/**
|
||||
* Google Cloud private key id.
|
||||
*/
|
||||
privateKeyId?: string;
|
||||
/**
|
||||
* Project ID
|
||||
*
|
||||
* GCP Project ID to parse metadata from
|
||||
*/
|
||||
projectId?: string[] | string;
|
||||
/**
|
||||
* Google Cloud token uri.
|
||||
*/
|
||||
tokenUri?: string;
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*
|
||||
* Google Cloud Platform ADC ( Application Default Credentials )
|
||||
*/
|
||||
type?: string;
|
||||
/**
|
||||
* Path of the file containing the GCP credentials info
|
||||
*/
|
||||
path?: string;
|
||||
/**
|
||||
* Google Security Token Service audience which contains the resource name for the workload
|
||||
* identity pool and the provider identifier in that pool.
|
||||
*/
|
||||
audience?: string;
|
||||
/**
|
||||
* This object defines the mechanism used to retrieve the external credential from the local
|
||||
* environment so that it can be exchanged for a GCP access token via the STS endpoint
|
||||
*/
|
||||
credentialSource?: { [key: string]: string };
|
||||
/**
|
||||
* Google Cloud Platform account type.
|
||||
*/
|
||||
externalType?: string;
|
||||
/**
|
||||
* Google Security Token Service subject token type based on the OAuth 2.0 token exchange
|
||||
* spec.
|
||||
*/
|
||||
subjectTokenType?: string;
|
||||
/**
|
||||
* Google Security Token Service token exchange endpoint.
|
||||
*/
|
||||
tokenURL?: string;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*
|
||||
* Pass the values to impersonate a service account of Google Cloud
|
||||
*/
|
||||
export interface GCPImpersonateServiceAccountValues {
|
||||
/**
|
||||
* The impersonated service account email
|
||||
*/
|
||||
impersonateServiceAccount?: string;
|
||||
/**
|
||||
* Number of seconds the delegated credential should be valid
|
||||
*/
|
||||
lifetime?: number;
|
||||
[property: string]: any;
|
||||
}
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright 2026 Collate.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface MwaaAuthConfig {
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
/**
|
||||
* The Amazon Resource Name (ARN) of the role to assume. Required Field in case of Assume
|
||||
* Role
|
||||
*/
|
||||
assumeRoleArn?: string;
|
||||
/**
|
||||
* An identifier for the assumed role session. Use the role session name to uniquely
|
||||
* identify a session when the same role is assumed by different principals or for different
|
||||
* reasons. Required Field in case of Assume Role
|
||||
*/
|
||||
assumeRoleSessionName?: string;
|
||||
/**
|
||||
* The Amazon Resource Name (ARN) of the role to assume. Optional Field in case of Assume
|
||||
* Role
|
||||
*/
|
||||
assumeRoleSourceIdentity?: string;
|
||||
/**
|
||||
* AWS Access key ID.
|
||||
*/
|
||||
awsAccessKeyId?: string;
|
||||
/**
|
||||
* AWS Region
|
||||
*/
|
||||
awsRegion: string;
|
||||
/**
|
||||
* AWS Secret Access Key.
|
||||
*/
|
||||
awsSecretAccessKey?: string;
|
||||
/**
|
||||
* AWS Session Token.
|
||||
*/
|
||||
awsSessionToken?: string;
|
||||
/**
|
||||
* Enable AWS IAM authentication. When enabled, uses the default credential provider chain
|
||||
* (environment variables, instance profile, etc.). Defaults to false for backward
|
||||
* compatibility.
|
||||
*/
|
||||
enabled?: boolean;
|
||||
/**
|
||||
* EndPoint URL for the AWS
|
||||
*/
|
||||
endPointURL?: string;
|
||||
/**
|
||||
* The name of a profile to use with the boto session.
|
||||
*/
|
||||
profileName?: string;
|
||||
}
|
||||
|
|
@ -881,9 +881,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
|
|
@ -1104,7 +1103,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -2416,6 +2415,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -2598,6 +2599,8 @@ export interface AuthenticationTypeForTableau {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3046,6 +3049,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3319,12 +3324,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3536,9 +3545,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -3585,8 +3595,11 @@ export interface ConfigConnection {
|
|||
* Username to connect to the Matillion. This user should have privileges to read all the
|
||||
* metadata in Matillion.
|
||||
*/
|
||||
username?: string;
|
||||
verifySSL?: VerifySSL;
|
||||
username?: string;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -3706,6 +3719,15 @@ export interface ConfigConnection {
|
|||
* <USERNAME> <PASSWORD>`
|
||||
*/
|
||||
userKey?: string;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -3717,6 +3739,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -3782,6 +3890,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -3910,6 +4020,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -3934,6 +4045,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3942,7 +4055,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4111,27 +4224,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
|
|
@ -926,9 +926,8 @@ export interface ConfigObject {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Matillion Auth Configuration
|
||||
*/
|
||||
|
|
@ -1149,7 +1148,7 @@ export interface ConfigObject {
|
|||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
credentials?: CredentialsClass;
|
||||
credentials?: PurpleGCPCredentials;
|
||||
/**
|
||||
* Regex to only include/exclude databases that matches the pattern.
|
||||
*
|
||||
|
|
@ -2474,6 +2473,8 @@ export enum AuthProvider {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Authentication type to connect to Apache Ranger.
|
||||
|
|
@ -2656,6 +2657,8 @@ export interface AuthenticationTypeForTableau {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AWSCredentials {
|
||||
|
|
@ -3104,6 +3107,8 @@ export interface IcebergFileSystem {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -3377,12 +3382,16 @@ export interface ConfigSourceConnection {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* AWS credentials required to access the S3 file.
|
||||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface Credentials {
|
||||
|
|
@ -3594,9 +3603,10 @@ export interface GCPImpersonateServiceAccountValues {
|
|||
*
|
||||
* Choose between mysql and postgres connection for alation database
|
||||
*
|
||||
* Underlying database connection. See
|
||||
* https://airflow.apache.org/docs/apache-airflow/stable/howto/set-up-database.html for
|
||||
* supported backends.
|
||||
* Choose between database connection or REST API connection to fetch metadata from
|
||||
* Airflow.
|
||||
*
|
||||
* Airflow REST API Connection Config for connecting via REST API.
|
||||
*
|
||||
* Lineage Backend Connection Config
|
||||
*
|
||||
|
|
@ -3643,8 +3653,11 @@ export interface ConfigConnection {
|
|||
* Username to connect to the Matillion. This user should have privileges to read all the
|
||||
* metadata in Matillion.
|
||||
*/
|
||||
username?: string;
|
||||
verifySSL?: VerifySSL;
|
||||
username?: string;
|
||||
/**
|
||||
* Whether to verify SSL certificates when connecting to the Airflow API.
|
||||
*/
|
||||
verifySSL?: boolean | VerifySSL;
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*/
|
||||
|
|
@ -3764,6 +3777,15 @@ export interface ConfigConnection {
|
|||
* <USERNAME> <PASSWORD>`
|
||||
*/
|
||||
userKey?: string;
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*/
|
||||
apiVersion?: APIVersion;
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*/
|
||||
authConfig?: AuthenticationConfiguration;
|
||||
/**
|
||||
* Regex exclude pipelines.
|
||||
*/
|
||||
|
|
@ -3775,6 +3797,92 @@ export interface ConfigConnection {
|
|||
supportsViewLineageExtraction?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Airflow REST API version.
|
||||
*
|
||||
* Airflow REST API version. Use v1 for Airflow 2.x and v2 for Airflow 3.x. Auto will detect
|
||||
* the version automatically.
|
||||
*/
|
||||
export enum APIVersion {
|
||||
Auto = "auto",
|
||||
V1 = "v1",
|
||||
V2 = "v2",
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose an authentication method: Basic Auth (username/password), Access Token, GCP
|
||||
* Service Account (for Cloud Composer), or AWS Credentials (for MWAA).
|
||||
*
|
||||
* Username and password for Airflow API authentication.
|
||||
*
|
||||
* Static access token for Airflow API authentication.
|
||||
*
|
||||
* GCP credentials for Google Cloud Composer. Supports service account values, credentials
|
||||
* path, workload identity (external account), and ADC. Tokens are auto-refreshed at
|
||||
* runtime.
|
||||
*
|
||||
* AWS MWAA (Managed Workflows for Apache Airflow) authentication configuration.
|
||||
*/
|
||||
export interface AuthenticationConfiguration {
|
||||
/**
|
||||
* Password for basic authentication to the Airflow API.
|
||||
*/
|
||||
password?: string;
|
||||
/**
|
||||
* Username for basic authentication to the Airflow API.
|
||||
*/
|
||||
username?: string;
|
||||
/**
|
||||
* Static access token for Airflow API authentication.
|
||||
*/
|
||||
token?: string;
|
||||
/**
|
||||
* GCP credentials configuration.
|
||||
*/
|
||||
credentials?: GcpConfigClass;
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
mwaaConfig?: MWAAConfiguration;
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* MWAA credentials and environment configuration.
|
||||
*/
|
||||
export interface MWAAConfiguration {
|
||||
/**
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*/
|
||||
awsConfig: AWSCredentials;
|
||||
/**
|
||||
* The name of your MWAA environment.
|
||||
*/
|
||||
mwaaEnvironmentName: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Choose Auth Config Type.
|
||||
*
|
||||
|
|
@ -3840,6 +3948,8 @@ export interface DataStorageConfig {
|
|||
*
|
||||
* AWS credentials configs.
|
||||
*
|
||||
* AWS credentials for generating MWAA CLI token.
|
||||
*
|
||||
* AWS credentials configuration.
|
||||
*/
|
||||
export interface AwsCredentials {
|
||||
|
|
@ -3968,6 +4078,7 @@ export enum ConnectionType {
|
|||
MatillionETL = "MatillionETL",
|
||||
Mysql = "Mysql",
|
||||
Postgres = "Postgres",
|
||||
RESTAPI = "RestAPI",
|
||||
S3 = "S3",
|
||||
SQLite = "SQLite",
|
||||
}
|
||||
|
|
@ -3992,6 +4103,8 @@ export enum VerifySSL {
|
|||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP credentials configuration.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*
|
||||
* Azure Cloud Credentials
|
||||
|
|
@ -4000,7 +4113,7 @@ export enum VerifySSL {
|
|||
*
|
||||
* Azure Credentials
|
||||
*/
|
||||
export interface CredentialsClass {
|
||||
export interface PurpleGCPCredentials {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
|
|
@ -4169,27 +4282,6 @@ export enum FHIRVersion {
|
|||
Stu3 = "STU3",
|
||||
}
|
||||
|
||||
/**
|
||||
* GCP credentials configs.
|
||||
*
|
||||
* GCP Credentials
|
||||
*
|
||||
* GCP credentials configuration for authenticating with Pub/Sub.
|
||||
*
|
||||
* GCP Credentials for Google Drive API
|
||||
*/
|
||||
export interface GcpConfigClass {
|
||||
/**
|
||||
* We support two ways of authenticating to GCP i.e via GCP Credentials Values or GCP
|
||||
* Credentials Path
|
||||
*/
|
||||
gcpConfig: GCPCredentialsConfiguration;
|
||||
/**
|
||||
* we enable the authenticated service account to impersonate another service account
|
||||
*/
|
||||
gcpImpersonateServiceAccount?: GCPImpersonateServiceAccountValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not set any credentials. Note that credentials are required to extract .lkml views and
|
||||
* their lineage.
|
||||
|
|
|
|||
Loading…
Reference in a new issue