mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
IBM's public CDN (public.dhe.ibm.com) has been unreliable, causing CI build failures with "Failed to connect to ... port 443". Switch all ingestion Dockerfiles to wget the .deb from cdn.getcollate.io with SHA256 verification. Changes: - ingestion/Dockerfile + Dockerfile.ci: replace apt-list+apt-install pattern with direct wget+dpkg, matching the operators' existing shape. - ingestion/operators/docker/Dockerfile + Dockerfile.ci: bump pinned version 1.1.0.13 (2022) -> 1.1.0.29 (matches production ingestion-slim image), add SHA256 verification. The CDN-mirrored .deb is byte-identical to IBM's upstream (verified by SHA256). Production ingestion-slim:1.13.0-n103 already runs 1.1.0.29 (confirmed via dpkg -l inside the image). Decouples Docker builds from IBM's CDN availability — the recent CI failure mode (curl timeout to public.dhe.ibm.com) can no longer occur.
124 lines
5.1 KiB
Docker
124 lines
5.1 KiB
Docker
FROM python:3.10-bookworm
|
|
|
|
RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
|
|
RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
|
|
|
|
# Install Dependencies (listed in alphabetical order)
|
|
RUN dpkg --configure -a \
|
|
&& apt-get -qq update \
|
|
&& apt-get -qq install -y \
|
|
alien \
|
|
build-essential \
|
|
default-libmysqlclient-dev \
|
|
freetds-bin \
|
|
freetds-dev \
|
|
gcc \
|
|
gnupg \
|
|
libaio1 \
|
|
libevent-dev \
|
|
libffi-dev \
|
|
libpq-dev \
|
|
librdkafka-dev \
|
|
libsasl2-dev \
|
|
libsasl2-2 \
|
|
libsasl2-modules \
|
|
libsasl2-modules-gssapi-mit \
|
|
libssl-dev \
|
|
libxml2 \
|
|
libkrb5-dev \
|
|
default-jdk \
|
|
openssl \
|
|
# To ensure compatibility with unixodbc package
|
|
odbcinst=2.3.11-2+deb12u1 \
|
|
postgresql \
|
|
postgresql-contrib \
|
|
tdsodbc \
|
|
unixodbc=2.3.11-2+deb12u1 \
|
|
unixodbc-dev=2.3.11-2+deb12u1 \
|
|
unzip \
|
|
git \
|
|
wget --no-install-recommends \
|
|
# Accept MSSQL ODBC License
|
|
&& ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Add updated postgres/redshift dependencies based on libq
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
|
|
RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \
|
|
apt-get -qq update; \
|
|
apt-get -qq install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \
|
|
apt-get -qq autoremove -yqq --purge; \
|
|
apt-get -qq clean && rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN if [ $(uname -m) = "arm64" || $(uname -m) = "aarch64" ]; \
|
|
then \
|
|
wget -q https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
|
|
unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
|
|
else \
|
|
wget -q https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
|
|
unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
|
|
fi
|
|
|
|
ENV LD_LIBRARY_PATH=/instantclient
|
|
|
|
# Install DB2 iAccess driver
|
|
# Mirrored on cdn.getcollate.io to decouple builds from IBM's CDN availability.
|
|
# Use dpkg --force-depends because the package declares old Debian package names (libodbc1, odbcinst1debian2)
|
|
# that don't exist in Debian 12, but the actual dependencies (unixodbc, odbcinst) are already installed.
|
|
# SHA256 pinned to v1.1.0.29 — matches the version production ingestion-slim images run.
|
|
RUN if [ $(uname -m) = "x86_64" ]; then \
|
|
wget -q https://cdn.getcollate.io/deps/ingestion/ibm/ibm-iaccess-1.1.0.29-1.0.amd64.deb \
|
|
-O /tmp/ibm-iaccess.deb && \
|
|
echo "e60e968d2cee96b2851964456f5b31ab990b1aa47d8f2399607809f7d4514f58 /tmp/ibm-iaccess.deb" | sha256sum -c - && \
|
|
dpkg -i --force-depends /tmp/ibm-iaccess.deb && \
|
|
apt-get install -f -y --no-install-recommends && \
|
|
rm -f /tmp/ibm-iaccess.deb; \
|
|
fi
|
|
|
|
WORKDIR ingestion/
|
|
|
|
# Required for Airflow DockerOperator, as we need to run the workflows from a `python main.py` command in the container.
|
|
COPY ingestion/operators/docker/*.py .
|
|
|
|
# Create a non-root user with a writable home directory.
|
|
# When the container runs with securityContext runAsUser: 1000 but no
|
|
# /etc/passwd entry for that UID, Python disables user site-packages.
|
|
# This causes runtime failures when tools like spacy download models
|
|
# via pip --user, as the installed packages are invisible to the import
|
|
# system. Creating the user ensures Python recognises UID 1000 and
|
|
# enables the standard ~/.local install path.
|
|
RUN groupadd -g 1000 openmetadata && useradd -m -u 1000 -g 1000 openmetadata
|
|
RUN chown -R openmetadata:openmetadata /ingestion
|
|
ENV HOME=/home/openmetadata
|
|
ENV PATH="/home/openmetadata/.local/bin:${PATH}"
|
|
USER openmetadata
|
|
|
|
# Disable pip cache dir
|
|
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
|
|
ENV PIP_NO_CACHE_DIR=1
|
|
# Make pip silent
|
|
ENV PIP_QUIET=1
|
|
|
|
# Pin setuptools<81 as pkg_resources was removed in setuptools 81.0.0+
|
|
# cx-Oracle's setup.py still uses pkg_resources
|
|
RUN pip install --upgrade pip "setuptools<81"
|
|
# Pre-install cx-Oracle without build isolation to use the pinned setuptools
|
|
RUN pip install --no-build-isolation "cx_Oracle>=8.3.0,<9"
|
|
|
|
ARG INGESTION_DEPENDENCY="all"
|
|
ARG RI_VERSION="1.12.0.0.dev0"
|
|
RUN pip install "openmetadata-ingestion[airflow]~=${RI_VERSION}"
|
|
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}"
|
|
|
|
|
|
# Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593
|
|
RUN [ $(uname -m) = "x86_64" ] \
|
|
&& pip install "openmetadata-ingestion[db2]~=${RI_VERSION}" \
|
|
|| echo "DB2 not supported on ARM architectures."
|
|
|
|
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
|
# because the psycopg2-binary generates a architecture specific error
|
|
# while authenticating connection with the airflow, psycopg2 solves this error
|
|
RUN pip uninstall psycopg2-binary -y
|
|
RUN pip install psycopg2 mysqlclient==2.1.1
|