OpenMetadata/ingestion/tests/unit/test_parser_connection_module.py

#  Copyright 2025 Collate
#  Licensed under the Collate Community License, Version 1.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
"""
Unit tests for parser.get_connection_class()
Tests for Issue #22920 - Connection module import handling

ISSUE #22920 - Root Cause Analysis
===================================

The Schema Generation Pattern:
-------------------------------
Most service connection files use camelCase naming:
  - BigQuery    -> bigQueryConnection.py    (first char lower, rest same)
  - AzureSQL    -> azureSQLConnection.py    (first char lower, rest same)
  - DynamoDB    -> dynamoDBConnection.py    (first char lower, rest same)
  - MariaDB     -> mariaDBConnection.py     (first char lower, rest same)

Three exceptions use all-lowercase:
  - SAS         -> sasConnection.py         (all lowercase)
  - SQLite      -> sqliteConnection.py      (all lowercase)
  - SSAS        -> ssasConnection.py        (all lowercase)

The Original Bug:
-----------------
The code only used: source_type[0].lower() + source_type[1:] + "Connection"
This worked for most services but FAILED for the 3 lowercase exceptions:
  - "SAS" produced "sASConnection" but file is "sasConnection.py"

On case-insensitive filesystems (macOS), this worked by accident.
On case-sensitive filesystems (Linux/Docker), imports failed:
  ModuleNotFoundError: No module named '...sASConnection'

The Solution:
-------------
The current implementation uses a try-except pattern:
1. Try standard camelCase: "BigQuery" -> "bigQueryConnection" (most services)
2. Fallback to lowercase: "SAS" -> "sasConnection" (3 exceptions)

This handles both naming patterns without hardcoded lists.

Performance Impact:
-------------------
- Standard services (44): Single import, ~5-10ms
- Exceptional services (3): First import fails + fallback, ~12-20ms
- Negligible impact: Only 3 out of 47 services use fallback
"""

import pytest

from metadata.generated.schema.entity.services.databaseService import (
    DatabaseConnection,
    DatabaseServiceType,
)
from metadata.ingestion.api.parser import get_connection_class


class TestGetConnectionClass:
    """
    Test suite for get_connection_class() function to ensure it correctly
    generates connection module names for all database service types.

    This tests the fix for Issue #22920 where mixed-case service names
    (like SAS, BigQuery, AzureSQL) were failing due to incorrect casing
    in the generated module name.
    """

    # Services that use camelCase in file names (most services)
    CAMELCASE_SERVICES = [  # noqa: RUF012
        "AzureSQL",
        "BigQuery",
        "BigTable",
        "DeltaLake",
        "DomoDatabase",
        "DynamoDB",
        "MariaDB",
        "MongoDB",
        "PinotDB",
        "SapErp",
        "SapHana",
        "ServiceNow",
        "SingleStore",
        "UnityCatalog",
    ]

    # Services that use all-lowercase in file names (exceptions)
    LOWERCASE_SERVICES = [  # noqa: RUF012
        "SAS",  # sasConnection.py
        "SQLite",  # sqliteConnection.py
        "SSAS",  # ssasConnection.py
    ]

    # Services that worked with simple casing (first char lowercase only)
    SIMPLE_CASE_SERVICES = [  # noqa: RUF012
        "Athena",
        "Cassandra",
        "Clickhouse",
        "Cockroach",
        "Couchbase",
        "Databricks",
        "Datalake",
        "Db2",
        "Doris",
        "Druid",
        "Epic",
        "Exasol",
        "Glue",
        "Greenplum",
        "Hive",
        "Impala",
        "Mssql",
        "Mysql",
        "Oracle",
        "Postgres",
        "Presto",
        "Redshift",
        "Salesforce",
        "Snowflake",
        "Synapse",
        "Teradata",
        "Timescale",
        "Trino",
        "Vertica",
    ]

    @pytest.mark.parametrize("service_name", CAMELCASE_SERVICES)
    def test_camelcase_services(self, service_name):
        """
        Test services that use camelCase in their module file names.

        These services have capital letters beyond the first character:
        - BigQuery -> bigQueryConnection.py
        - AzureSQL -> azureSQLConnection.py
        - DynamoDB -> dynamoDBConnection.py
        """
        try:
            connection_class = get_connection_class(service_name, DatabaseConnection)

            # Verify we got a valid class
            assert connection_class is not None, f"get_connection_class returned None for {service_name}"

            # Verify class name follows expected pattern
            expected_class_name = f"{service_name}Connection"
            assert connection_class.__name__ == expected_class_name, (
                f"Expected class name '{expected_class_name}', got '{connection_class.__name__}'"
            )

            # Generate expected camelCase module name
            # (first char lowercase, rest unchanged)
            expected_module_name = service_name[0].lower() + service_name[1:] + "Connection"
            assert expected_module_name in connection_class.__module__, (
                f"Expected module to contain '{expected_module_name}', got '{connection_class.__module__}'"
            )

        except ModuleNotFoundError as e:
            pytest.fail(f"Failed to import connection class for {service_name}: {e}")
        except Exception as e:
            pytest.fail(f"Unexpected error for {service_name}: {e}")

    @pytest.mark.parametrize("service_name", LOWERCASE_SERVICES)
    def test_lowercase_services(self, service_name):
        """
        Test services that use all-lowercase in their module file names.

        These are exceptions: SAS, SQLite, SSAS
        - SAS -> sasConnection.py (not sASConnection.py)
        - SQLite -> sqliteConnection.py (not sQLiteConnection.py)
        - SSAS -> ssasConnection.py (not sSASConnection.py)
        """
        try:
            connection_class = get_connection_class(service_name, DatabaseConnection)

            # Verify we got a valid class
            assert connection_class is not None, f"get_connection_class returned None for {service_name}"

            # Verify class name follows expected pattern
            expected_class_name = f"{service_name}Connection"
            assert connection_class.__name__ == expected_class_name, (
                f"Expected class name '{expected_class_name}', got '{connection_class.__name__}'"
            )

            # Generate expected lowercase module name
            expected_module_name = service_name.lower() + "Connection"
            assert expected_module_name in connection_class.__module__, (
                f"Expected module to contain '{expected_module_name}', got '{connection_class.__module__}'"
            )

        except ModuleNotFoundError as e:
            pytest.fail(f"Failed to import connection class for {service_name}: {e}")
        except Exception as e:
            pytest.fail(f"Unexpected error for {service_name}: {e}")

    @pytest.mark.parametrize("service_name", SIMPLE_CASE_SERVICES)
    def test_simple_case_services(self, service_name):
        """
        Test services where simple first-char lowercase works.

        These services naturally work with: first char lowercase, rest same
        - Mysql -> mysqlConnection.py
        - Athena -> athenaConnection.py
        """
        try:
            connection_class = get_connection_class(service_name, DatabaseConnection)

            # Verify we got a valid class
            assert connection_class is not None, f"get_connection_class returned None for {service_name}"

            # Verify class name follows expected pattern
            expected_class_name = f"{service_name}Connection"
            assert connection_class.__name__ == expected_class_name, (
                f"Expected class name '{expected_class_name}', got '{connection_class.__name__}'"
            )

            # Generate expected simple-case module name
            expected_module_name = service_name[0].lower() + service_name[1:] + "Connection"
            assert expected_module_name in connection_class.__module__, (
                f"Expected module to contain '{expected_module_name}', got '{connection_class.__module__}'"
            )

        except ModuleNotFoundError as e:
            pytest.fail(f"Failed to import connection class for {service_name}: {e}")
        except Exception as e:
            pytest.fail(f"Unexpected error for {service_name}: {e}")

    def test_all_database_services(self):
        """
        Test that database service types with connection classes
        can successfully import them.

        Note: CustomDatabase, QueryLog, and Dbt are in DatabaseServiceType
        but don't have connection modules (they're metadata-only services).
        """
        failed_services = []
        success_count = 0
        skipped_services = [
            "CustomDatabase",
            "QueryLog",
            "Dbt",
        ]  # No connection modules

        for service in DatabaseServiceType:
            service_name = service.value

            # Skip services without connection modules
            if service_name in skipped_services:
                continue

            try:
                connection_class = get_connection_class(service_name, DatabaseConnection)
                assert connection_class is not None
                success_count += 1
            except Exception as e:
                failed_services.append((service_name, str(e)))

        # Report results
        total_testable = len(list(DatabaseServiceType)) - len(skipped_services)

        if failed_services:
            failure_details = "\n".join(f"  - {name}: {error}" for name, error in failed_services)
            pytest.fail(
                f"Failed to import {len(failed_services)} out of "
                f"{total_testable} services:\n"
                f"{failure_details}\n\n"
                f"Successfully imported {success_count} services."
            )

        # If we get here, all services passed
        assert success_count == total_testable, (
            f"Expected {total_testable} services, but only {success_count} succeeded"
        )

    def test_sas_connection_specific(self):
        """
        Specific test for SAS connection (the original issue #22920).

        SAS is one of the exceptions that uses all-lowercase:
        - File: sasConnection.py
        - Uses fallback import path
        """
        try:
            connection_class = get_connection_class("SAS", DatabaseConnection)

            # Verify class details
            assert connection_class.__name__ == "SASConnection"
            assert "sasConnection" in connection_class.__module__

            # Verify it has expected attributes
            assert hasattr(connection_class, "model_fields") or hasattr(connection_class, "__fields__")

        except ModuleNotFoundError as e:
            pytest.fail(
                f"SAS connection import failed with "
                f"ModuleNotFoundError: {e}\n"
                f"This is the exact bug reported in Issue #22920.\n"
                f"The fix should use fallback to lowercase."
            )

    def test_bigquery_connection_specific(self):
        """
        Specific test for BigQuery connection.

        BigQuery uses camelCase in the module file:
        - File: bigQueryConnection.py (NOT bigqueryConnection.py)
        - Standard import path works
        """
        try:
            connection_class = get_connection_class("BigQuery", DatabaseConnection)

            assert connection_class.__name__ == "BigQueryConnection"
            # Expect camelCase module name
            assert "bigQueryConnection" in connection_class.__module__

        except ModuleNotFoundError as e:
            pytest.fail(f"BigQuery connection import failed: {e}\nExpected module 'bigQueryConnection' (camelCase).")

    def test_azuresql_connection_specific(self):
        """
        Specific test for AzureSQL connection.

        AzureSQL uses camelCase in the module file:
        - File: azureSQLConnection.py (NOT azuresqlConnection.py)
        - Standard import path works
        """
        try:
            connection_class = get_connection_class("AzureSQL", DatabaseConnection)

            assert connection_class.__name__ == "AzureSQLConnection"
            # Expect camelCase module name
            assert "azureSQLConnection" in connection_class.__module__

        except ModuleNotFoundError as e:
            pytest.fail(f"AzureSQL connection import failed: {e}\nExpected module 'azureSQLConnection' (camelCase).")

    def test_dynamodb_connection_specific(self):
        """
        Specific test for DynamoDB connection.

        DynamoDB uses camelCase in the module file:
        - File: dynamoDBConnection.py (NOT dynamodbConnection.py)
        - Standard import path works
        """
        try:
            connection_class = get_connection_class("DynamoDB", DatabaseConnection)

            assert connection_class.__name__ == "DynamoDBConnection"
            # Expect camelCase module name
            assert "dynamoDBConnection" in connection_class.__module__

        except ModuleNotFoundError as e:
            pytest.fail(f"DynamoDB connection import failed: {e}\nExpected module 'dynamoDBConnection' (camelCase).")

    def test_module_name_generation_formula(self):
        """
        Test the formula used to generate connection module names.

        This test documents the expected behavior:

        Most services use camelCase (first char lowercase, rest same):
        - BigQuery -> bigQueryConnection.py
        - AzureSQL -> azureSQLConnection.py
        - DynamoDB -> dynamoDBConnection.py

        Three exceptions use all-lowercase:
        - SAS -> sasConnection.py (not sASConnection.py)
        - SQLite -> sqliteConnection.py (not sQLiteConnection.py)
        - SSAS -> ssasConnection.py (not sSASConnection.py)
        """
        test_cases = {
            # All-lowercase exceptions (use fallback)
            "SAS": "sasConnection",
            "SQLite": "sqliteConnection",
            "SSAS": "ssasConnection",
            # CamelCase services (standard path)
            "BigQuery": "bigQueryConnection",
            "AzureSQL": "azureSQLConnection",
            "DynamoDB": "dynamoDBConnection",
            # Simple lowercase services
            "Mysql": "mysqlConnection",
            "Glue": "glueConnection",
            "Db2": "db2Connection",
        }

        for service_name, expected_module_name in test_cases.items():
            try:
                connection_class = get_connection_class(service_name, DatabaseConnection)

                # Extract just the module filename
                actual_module_name = connection_class.__module__.split(".")[-1]

                assert actual_module_name == expected_module_name, (
                    f"For service '{service_name}': "
                    f"expected module '{expected_module_name}', "
                    f"got '{actual_module_name}'"
                )

            except Exception as e:
                pytest.fail(f"Failed test for {service_name}: {e}")