mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
* fix(rdf): reclaim Fuseki disk via compaction + upgrade Jena 4.10 → 5.6.0 PR #28117's SPARQL cleanup converged the logical RDF state but never freed disk: TDB2 deletes only mark blocks free and the journal grows monotonically until /$/compact runs. RdfIndexApp.clearRdfData() now calls a new RdfStorageInterface.compactStorage() between clearAll() and reloadOntologies() so each recreate run reclaims to a fresh dataset directory. JenaFusekiStorage posts to /$/compact/{dataset}?deleteOld=true and polls /$/tasks/{id} until finished, with failures logged and swallowed (disk hygiene, not correctness). Also unifies the Jena classpath: openmetadata-service was on 4.10.0 and openmetadata-integration-tests on 5.0.0. Both now pin to 5.6.0 via a single root pom property, dropping the apache-jena-libs BOM in favour of explicit jena-core/arq/rdfconnection deps (we're a remote-Fuseki client and never embed TDB; pulling jena-tdb1/2 triggers a Jena 5/6 static-init regression). Picks up CVE-2025-49656 and CVE-2025-50151 (admin-side fixes shipped in Jena 5.5.0). Jena 6.x parked: both 6.0.0 and 6.1.0 hit a recursive clinit bug where TypeMapper.reset reads RDF.dtLangString before RDF.<clinit> completes. Fuseki server bumped 4.10/5.0 → 5.6.0 across all in-repo Dockerfiles; the unmaintained stain/jena-fuseki:* image references in dev compose files switched to building from docker/rdf-store/Dockerfile, and Testcontainers moved to secoresearch/fuseki:5.5.0 (maintained, CVE-fixed; the dataset is created by JenaFusekiStorage.ensureDatasetExists() so the stain-only FUSEKI_DATASET_1 env var is no longer needed).
61 lines
No EOL
1.8 KiB
Text
61 lines
No EOL
1.8 KiB
Text
# Multi-architecture Dockerfile for Apache Jena Fuseki
|
|
FROM eclipse-temurin:17-jre-jammy
|
|
|
|
# Fix for GPG signature issues and install dependencies
|
|
RUN apt-get update || true && \
|
|
apt-get install -y --no-install-recommends \
|
|
wget \
|
|
curl \
|
|
ca-certificates \
|
|
|| (rm -rf /var/lib/apt/lists/* && \
|
|
apt-get clean && \
|
|
apt-get update --fix-missing && \
|
|
apt-get install -y --no-install-recommends wget curl ca-certificates) && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set Fuseki version
|
|
ENV FUSEKI_VERSION=5.6.0
|
|
ENV FUSEKI_HOME=/fuseki
|
|
|
|
# Download and install Fuseki
|
|
RUN mkdir -p ${FUSEKI_HOME} && \
|
|
cd /tmp && \
|
|
wget -q https://archive.apache.org/dist/jena/binaries/apache-jena-fuseki-${FUSEKI_VERSION}.tar.gz && \
|
|
tar -xzf apache-jena-fuseki-${FUSEKI_VERSION}.tar.gz && \
|
|
mv apache-jena-fuseki-${FUSEKI_VERSION}/* ${FUSEKI_HOME}/ && \
|
|
rm -rf apache-jena-fuseki-${FUSEKI_VERSION}.tar.gz apache-jena-fuseki-${FUSEKI_VERSION}
|
|
|
|
# Create necessary directories
|
|
RUN mkdir -p ${FUSEKI_HOME}/run && \
|
|
mkdir -p ${FUSEKI_HOME}/databases
|
|
|
|
# Set working directory
|
|
WORKDIR ${FUSEKI_HOME}
|
|
|
|
# Expose Fuseki port
|
|
EXPOSE 3030
|
|
|
|
# Set JVM options
|
|
ENV JVM_ARGS="-Xmx4g -Xms2g"
|
|
|
|
# Create entrypoint script
|
|
RUN echo '#!/bin/bash\n\
|
|
if [ ! -z "$ADMIN_PASSWORD" ]; then\n\
|
|
echo "Setting admin password..."\n\
|
|
${FUSEKI_HOME}/fuseki-server --passwd --update /fuseki/run/shiro.ini <<EOF\n\
|
|
admin=$ADMIN_PASSWORD\n\
|
|
EOF\n\
|
|
fi\n\
|
|
\n\
|
|
# Create openmetadata database if it doesn'\''t exist\n\
|
|
mkdir -p ${FUSEKI_HOME}/databases/openmetadata\n\
|
|
\n\
|
|
# Start Fuseki\n\
|
|
exec ${FUSEKI_HOME}/fuseki-server --update --loc=${FUSEKI_HOME}/databases/openmetadata /openmetadata\n\
|
|
' > /entrypoint.sh && chmod +x /entrypoint.sh
|
|
|
|
# Volume for persistent data
|
|
VOLUME ["${FUSEKI_HOME}/databases", "${FUSEKI_HOME}/run"]
|
|
|
|
# Entrypoint
|
|
ENTRYPOINT ["/entrypoint.sh"] |