mirror of
https://github.com/open-metadata/OpenMetadata
synced 2026-05-24 09:39:11 +00:00
* fix(rdf): reclaim Fuseki disk via compaction + upgrade Jena 4.10 → 5.6.0 PR #28117's SPARQL cleanup converged the logical RDF state but never freed disk: TDB2 deletes only mark blocks free and the journal grows monotonically until /$/compact runs. RdfIndexApp.clearRdfData() now calls a new RdfStorageInterface.compactStorage() between clearAll() and reloadOntologies() so each recreate run reclaims to a fresh dataset directory. JenaFusekiStorage posts to /$/compact/{dataset}?deleteOld=true and polls /$/tasks/{id} until finished, with failures logged and swallowed (disk hygiene, not correctness). Also unifies the Jena classpath: openmetadata-service was on 4.10.0 and openmetadata-integration-tests on 5.0.0. Both now pin to 5.6.0 via a single root pom property, dropping the apache-jena-libs BOM in favour of explicit jena-core/arq/rdfconnection deps (we're a remote-Fuseki client and never embed TDB; pulling jena-tdb1/2 triggers a Jena 5/6 static-init regression). Picks up CVE-2025-49656 and CVE-2025-50151 (admin-side fixes shipped in Jena 5.5.0). Jena 6.x parked: both 6.0.0 and 6.1.0 hit a recursive clinit bug where TypeMapper.reset reads RDF.dtLangString before RDF.<clinit> completes. Fuseki server bumped 4.10/5.0 → 5.6.0 across all in-repo Dockerfiles; the unmaintained stain/jena-fuseki:* image references in dev compose files switched to building from docker/rdf-store/Dockerfile, and Testcontainers moved to secoresearch/fuseki:5.5.0 (maintained, CVE-fixed; the dataset is created by JenaFusekiStorage.ensureDatasetExists() so the stain-only FUSEKI_DATASET_1 env var is no longer needed).
44 lines
No EOL
1.1 KiB
YAML
44 lines
No EOL
1.1 KiB
YAML
version: "3.9"
|
|
|
|
# Override file to increase disk storage for persistent volumes
|
|
# Place this file in the same directory as docker-compose-rdf.yml
|
|
|
|
volumes:
|
|
# Increase Elasticsearch data volume
|
|
es-data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: ./docker-volume/elasticsearch-data
|
|
|
|
# Fuseki data volume. Renamed from `fuseki-data` (and host path changed
|
|
# from `./docker-volume/fuseki-data` to `./docker-volume/fuseki-tdb2-data`)
|
|
# to match docker-compose-rdf.yml — see ../development/docker-compose-fuseki.yml
|
|
# for the migration rationale (new on-disk layout vs the old stain image).
|
|
fuseki-tdb2-data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: ./docker-volume/fuseki-tdb2-data
|
|
|
|
services:
|
|
# Additional Elasticsearch optimizations
|
|
elasticsearch:
|
|
ulimits:
|
|
memlock:
|
|
soft: -1
|
|
hard: -1
|
|
nofile:
|
|
soft: 65536
|
|
hard: 65536
|
|
sysctls:
|
|
- vm.max_map_count=262144
|
|
|
|
# Additional Fuseki optimizations
|
|
fuseki:
|
|
ulimits:
|
|
nofile:
|
|
soft: 65536
|
|
hard: 65536 |