mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
* fix(rdf): reclaim Fuseki disk via compaction + upgrade Jena 4.10 → 5.6.0 PR #28117's SPARQL cleanup converged the logical RDF state but never freed disk: TDB2 deletes only mark blocks free and the journal grows monotonically until /$/compact runs. RdfIndexApp.clearRdfData() now calls a new RdfStorageInterface.compactStorage() between clearAll() and reloadOntologies() so each recreate run reclaims to a fresh dataset directory. JenaFusekiStorage posts to /$/compact/{dataset}?deleteOld=true and polls /$/tasks/{id} until finished, with failures logged and swallowed (disk hygiene, not correctness). Also unifies the Jena classpath: openmetadata-service was on 4.10.0 and openmetadata-integration-tests on 5.0.0. Both now pin to 5.6.0 via a single root pom property, dropping the apache-jena-libs BOM in favour of explicit jena-core/arq/rdfconnection deps (we're a remote-Fuseki client and never embed TDB; pulling jena-tdb1/2 triggers a Jena 5/6 static-init regression). Picks up CVE-2025-49656 and CVE-2025-50151 (admin-side fixes shipped in Jena 5.5.0). Jena 6.x parked: both 6.0.0 and 6.1.0 hit a recursive clinit bug where TypeMapper.reset reads RDF.dtLangString before RDF.<clinit> completes. Fuseki server bumped 4.10/5.0 → 5.6.0 across all in-repo Dockerfiles; the unmaintained stain/jena-fuseki:* image references in dev compose files switched to building from docker/rdf-store/Dockerfile, and Testcontainers moved to secoresearch/fuseki:5.5.0 (maintained, CVE-fixed; the dataset is created by JenaFusekiStorage.ensureDatasetExists() so the stain-only FUSEKI_DATASET_1 env var is no longer needed).
96 lines
3.7 KiB
YAML
96 lines
3.7 KiB
YAML
version: "3.9"
|
|
|
|
# Compose override for RDF-enabled local stacks.
|
|
# Use together with docker-compose.yml or docker-compose-postgres.yml.
|
|
services:
|
|
execute-migrate-all:
|
|
environment:
|
|
RDF_ENABLED: ${RDF_ENABLED:-true}
|
|
RDF_STORAGE_TYPE: ${RDF_STORAGE_TYPE:-FUSEKI}
|
|
RDF_ENDPOINT: ${RDF_ENDPOINT:-http://fuseki:3030/openmetadata}
|
|
RDF_REMOTE_USERNAME: ${RDF_REMOTE_USERNAME:-admin}
|
|
RDF_REMOTE_PASSWORD: ${RDF_REMOTE_PASSWORD:-admin}
|
|
RDF_BASE_URI: ${RDF_BASE_URI:-https://open-metadata.org/}
|
|
RDF_JSONLD_ENABLED: ${RDF_JSONLD_ENABLED:-true}
|
|
RDF_SPARQL_ENABLED: ${RDF_SPARQL_ENABLED:-true}
|
|
RDF_DATASET: ${RDF_DATASET:-openmetadata}
|
|
depends_on:
|
|
fuseki:
|
|
condition: service_healthy
|
|
|
|
openmetadata-server:
|
|
environment:
|
|
RDF_ENABLED: ${RDF_ENABLED:-true}
|
|
RDF_STORAGE_TYPE: ${RDF_STORAGE_TYPE:-FUSEKI}
|
|
RDF_ENDPOINT: ${RDF_ENDPOINT:-http://fuseki:3030/openmetadata}
|
|
RDF_REMOTE_USERNAME: ${RDF_REMOTE_USERNAME:-admin}
|
|
RDF_REMOTE_PASSWORD: ${RDF_REMOTE_PASSWORD:-admin}
|
|
RDF_BASE_URI: ${RDF_BASE_URI:-https://open-metadata.org/}
|
|
RDF_JSONLD_ENABLED: ${RDF_JSONLD_ENABLED:-true}
|
|
RDF_SPARQL_ENABLED: ${RDF_SPARQL_ENABLED:-true}
|
|
RDF_DATASET: ${RDF_DATASET:-openmetadata}
|
|
depends_on:
|
|
fuseki:
|
|
condition: service_healthy
|
|
|
|
fuseki:
|
|
# Build from the in-repo Dockerfile (Fuseki 5.6.0) instead of the
|
|
# unmaintained `stain/jena-fuseki` Docker Hub image, which capped at 5.1.0
|
|
# and never picked up the 2025 admin-side Fuseki CVE fixes (CVE-2025-49656,
|
|
# CVE-2025-50151 — both fixed in Jena 5.5.0). The `image:` tag below names
|
|
# the locally-built image so subsequent `docker compose up` runs reuse the
|
|
# cached build instead of rebuilding from scratch each time.
|
|
build:
|
|
context: ../rdf-store
|
|
dockerfile: Dockerfile
|
|
image: openmetadata-fuseki:5.6.0
|
|
container_name: openmetadata-fuseki
|
|
hostname: fuseki
|
|
ports:
|
|
- "3030:3030"
|
|
networks:
|
|
- local_app_net
|
|
environment:
|
|
# Default for local dev only — production deployments MUST override
|
|
# via the FUSEKI_ADMIN_PASSWORD env var (and FUSEKI_OPENMETADATA_PASSWORD)
|
|
# before bringing this stack up. The entrypoint envsubsts these into
|
|
# shiro.ini at container start so the override actually takes effect.
|
|
- FUSEKI_ADMIN_PASSWORD=${FUSEKI_ADMIN_PASSWORD:-admin}
|
|
- FUSEKI_OPENMETADATA_PASSWORD=${FUSEKI_OPENMETADATA_PASSWORD:-openmetadata-secret}
|
|
- JVM_ARGS=${FUSEKI_JVM_ARGS:--Xmx1500m -Xms256m}
|
|
volumes:
|
|
# New volume name (was `fuseki-data` mounted at `/fuseki`). The in-repo
|
|
# Dockerfile stores TDB2 at `/fuseki-data` and the data layout differs
|
|
# from the old stain/jena-fuseki image — re-using the previous volume
|
|
# name would mount stale state at a path Fuseki no longer reads from,
|
|
# silently looking like an empty database. Using a fresh volume name
|
|
# forces operators to consciously migrate (or accept a re-index). The
|
|
# orphaned `fuseki-data` volume can be removed manually with
|
|
# `docker volume rm fuseki-data` after confirming the new stack is
|
|
# healthy.
|
|
- fuseki-tdb2-data:/fuseki-data
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
reservations:
|
|
memory: 256m
|
|
restart: "on-failure:3"
|
|
healthcheck:
|
|
test: "curl -s -f http://localhost:3030/\\$/ping > /dev/null || exit 1"
|
|
interval: 15s
|
|
timeout: 10s
|
|
retries: 20
|
|
start_period: 60s
|
|
|
|
networks:
|
|
local_app_net:
|
|
name: ometa_network
|
|
ipam:
|
|
driver: default
|
|
config:
|
|
- subnet: "172.16.239.0/24"
|
|
|
|
volumes:
|
|
fuseki-tdb2-data:
|
|
driver: local
|