db-tune: add --diagnose for read-only DBA findings

The tuning report we already had is a static playbook (table → recipe + row-count gate). Operators rightly asked whether db-tune actually inspects the live database for *its own* signals — unused indexes, bloat, slow queries, cache hit ratios — and surfaces recommendations from those measurements. Until now: no.

This adds a parallel read-only path:

  ./bootstrap/openmetadata-ops.sh db-tune --diagnose

Postgres categories (each isolated in its own try block; missing extension or permissions surfaces in DbTuneDiagnosis.notes rather than failing the run):

- UNUSED_INDEX — pg_stat_user_indexes idx_scan=0, size > 10 MB, non-unique non-pkey
- HIGH_DEAD_TUPLES — n_dead_tup/n_live_tup > 0.2 with n_live_tup > 10k (autovacuum falling behind)
- LOW_CACHE_HIT — heap_blks_read > 1000 AND hit ratio < 90%
- STALE_STATS — last_autoanalyze NULL or > 14 days, n_live_tup > 1000
- SEQ_SCAN_HEAVY — seq_scan/idx_scan > 10 with > 1000 seq scans (suggests missing index)
- SLOW_QUERY — pg_stat_statements top 10 by mean_exec_time, calls > 100 (gracefully skipped if extension absent)

MySQL categories:

- UNUSED_INDEX — sys.schema_unused_indexes filtered to current schema
- LOW_BUFFER_POOL_HIT — Innodb_buffer_pool_reads / Innodb_buffer_pool_read_requests < 99%
- SLOW_QUERY — performance_schema.events_statements_summary_by_digest top 10 by avg_timer_wait
- FULL_TABLE_SCAN — sys.statements_with_full_table_scans

Concept stays separate from AutoTuner: Diagnostic is read-only and never participates in --apply. Categories with zero findings are suppressed in the report; notes capture what couldn't be checked. Each finding is structured as (category, severity, attributes) with the attribute keys driven by DiagnosticCategory.columns() so the renderer dispatches a category-specific layout.

Tests: 50 unit tests passing (40 → 50, +10 in DiagnosticReportTest covering EnumMap grouping order, empty/non-empty rendering, suppression, notes appending, query truncation, and column-list immutability). DbTuneIT gains diagnoseCompletesWithoutErrorAndReturnsStructuredResult that exercises the full end-to-end against the live Testcontainer (read-only, safe).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sriharsha Chintalapani 2026-05-05 16:38:05 -07:00
parent 28cdd7d3a1
commit 2d9f047b71
11 changed files with 887 additions and 4 deletions

View file

@ -28,9 +28,13 @@ import org.openmetadata.it.bootstrap.TestSuiteBootstrap;
import org.openmetadata.service.jdbi3.locator.ConnectionType;
import org.openmetadata.service.util.dbtune.Action;
import org.openmetadata.service.util.dbtune.AutoTuner;
import org.openmetadata.service.util.dbtune.DbTuneDiagnosis;
import org.openmetadata.service.util.dbtune.DbTuneResult;
import org.openmetadata.service.util.dbtune.Diagnostic;
import org.openmetadata.service.util.dbtune.MysqlAutoTuner;
import org.openmetadata.service.util.dbtune.MysqlDiagnostic;
import org.openmetadata.service.util.dbtune.PostgresAutoTuner;
import org.openmetadata.service.util.dbtune.PostgresDiagnostic;
import org.openmetadata.service.util.dbtune.TableRecommendation;
/**
@ -131,6 +135,23 @@ class DbTuneIT {
jdbi.useHandle(handle -> tuner.analyzeOne(handle, ISOLATED_TABLE));
}
@Test
void diagnoseCompletesWithoutErrorAndReturnsStructuredResult() {
Diagnostic diagnostic = currentDiagnostic();
Jdbi jdbi = TestSuiteBootstrap.getJdbi();
DbTuneDiagnosis diagnosis = jdbi.withHandle(diagnostic::diagnose);
assertNotNull(diagnosis, "diagnose() must return a non-null diagnosis");
assertNotNull(diagnosis.findings(), "findings list must be present (empty allowed)");
assertNotNull(diagnosis.notes(), "notes list must be present (empty allowed)");
// On a freshly-bootstrapped IT DB we expect either:
// - an empty diagnosis (nothing has accumulated yet to flag), OR
// - notes about missing optional extensions like pg_stat_statements.
// Either is fine what we're really asserting is the diagnostic ran end-to-end without
// throwing on the live schema.
}
@Test
void dryRunDoesNotMutateReloptions() {
AutoTuner tuner = currentTuner();
@ -153,6 +174,12 @@ class DbTuneIT {
: new MysqlAutoTuner();
}
private Diagnostic currentDiagnostic() {
return currentConnectionType() == ConnectionType.POSTGRES
? new PostgresDiagnostic()
: new MysqlDiagnostic();
}
private ConnectionType currentConnectionType() {
return "mysql".equalsIgnoreCase(System.getProperty("databaseType", "postgres"))
? ConnectionType.MYSQL

View file

@ -134,10 +134,14 @@ import org.openmetadata.service.secrets.SecretsManagerUpdateService;
import org.openmetadata.service.security.auth.SecurityConfigurationManager;
import org.openmetadata.service.security.jwt.JWTTokenGenerator;
import org.openmetadata.service.util.dbtune.AutoTuner;
import org.openmetadata.service.util.dbtune.DbTuneDiagnosis;
import org.openmetadata.service.util.dbtune.DbTuneReport;
import org.openmetadata.service.util.dbtune.DbTuneResult;
import org.openmetadata.service.util.dbtune.Diagnostic;
import org.openmetadata.service.util.dbtune.MysqlAutoTuner;
import org.openmetadata.service.util.dbtune.MysqlDiagnostic;
import org.openmetadata.service.util.dbtune.PostgresAutoTuner;
import org.openmetadata.service.util.dbtune.PostgresDiagnostic;
import org.openmetadata.service.util.dbtune.TableRecommendation;
import org.openmetadata.service.util.jdbi.DatabaseAuthenticationProviderFactory;
import org.openmetadata.service.util.jdbi.JdbiUtils;
@ -186,7 +190,8 @@ public class OpenMetadataOperations implements Callable<Integer> {
"Use 'reindex --auto-tune' for automatic performance optimization based on cluster capabilities");
LOG.info(
"Use 'db-tune' for a per-table autovacuum / InnoDB stats tuning report; add --apply to "
+ "execute the recommendations and --analyze to refresh planner stats on changed tables");
+ "execute the recommendations, --analyze to refresh planner stats on changed tables, "
+ "and --diagnose to surface unused indexes, bloat, slow queries, and other DBA findings");
LOG.info(
"Use 'cleanup-flowable-history --delete --runtime-batch-size=1000 --history-batch-size=1000' for Flowable cleanup with custom options");
LOG.info(
@ -2482,8 +2487,9 @@ public class OpenMetadataOperations implements Callable<Integer> {
name = "db-tune",
description =
"Generate a per-table autovacuum / InnoDB stats tuning report and optionally apply it. "
+ "Default mode is read-only — pass --apply to execute the ALTER TABLE statements "
+ "and --analyze to refresh planner stats on changed tables.")
+ "Default mode is read-only — pass --apply to execute the ALTER TABLE statements, "
+ "--analyze to refresh planner stats on changed tables, and --diagnose to also "
+ "surface unused indexes, bloat, slow queries, and other read-only DBA findings.")
public Integer dbTune(
@Option(
names = {"--apply"},
@ -2501,7 +2507,15 @@ public class OpenMetadataOperations implements Callable<Integer> {
defaultValue = "false",
description =
"After --apply, run ANALYZE on each changed table so planner stats reflect the new settings.")
boolean runAnalyze) {
boolean runAnalyze,
@Option(
names = {"--diagnose"},
defaultValue = "false",
description =
"Also run a read-only diagnostic pass (unused indexes, bloat, low cache hit, "
+ "stale ANALYZE, seq-scan-heavy tables, slow queries). Pure inspection — "
+ "never modifies anything.")
boolean runDiagnose) {
try {
parseConfig();
String driverClass = config.getDataSourceFactory().getDriverClass();
@ -2515,6 +2529,11 @@ public class OpenMetadataOperations implements Callable<Integer> {
AutoTuner tuner = autoTunerFor(connType);
DbTuneResult result = jdbi.withHandle(tuner::analyze);
LOG.info("\n{}", DbTuneReport.render(result));
if (runDiagnose) {
Diagnostic diagnostic = diagnosticFor(connType);
DbTuneDiagnosis diagnosis = jdbi.withHandle(diagnostic::diagnose);
LOG.info("\n{}", DbTuneReport.renderDiagnosis(diagnosis));
}
if (!apply) {
return 0;
}
@ -2547,6 +2566,13 @@ public class OpenMetadataOperations implements Callable<Integer> {
};
}
private Diagnostic diagnosticFor(final ConnectionType connType) {
return switch (connType) {
case POSTGRES -> new PostgresDiagnostic();
case MYSQL -> new MysqlDiagnostic();
};
}
private boolean confirmApply(final AutoTuner tuner, final List<TableRecommendation> actionable) {
LOG.info("About to apply {} ALTER statements:", actionable.size());
LOG.info("\n{}", DbTuneReport.renderAlterStatements(tuner, actionable));

View file

@ -0,0 +1,36 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/** Diagnostic result bundle. {@code notes} carries advisory messages (e.g. missing extension). */
public record DbTuneDiagnosis(List<Finding> findings, List<String> notes) {
public DbTuneDiagnosis {
findings = findings == null ? List.of() : List.copyOf(findings);
notes = notes == null ? List.of() : List.copyOf(notes);
}
/** Group findings by category preserving the enum order so the report sections print stably. */
public Map<DiagnosticCategory, List<Finding>> findingsByCategory() {
return findings.stream()
.collect(
Collectors.groupingBy(
Finding::category,
() -> new java.util.EnumMap<>(DiagnosticCategory.class),
Collectors.toList()));
}
}

View file

@ -150,4 +150,54 @@ public final class DbTuneReport {
}
return String.join("\n", lines);
}
/**
* Renders read-only diagnostic findings grouped by category. Each category that produced at
* least one finding gets its own section with a category-specific column layout. Categories with
* zero findings are suppressed; the {@code notes} list is appended at the end so an operator sees
* what couldn't be checked (missing extension, permissions, etc.).
*/
public static String renderDiagnosis(final DbTuneDiagnosis diagnosis) {
StringBuilder out = new StringBuilder();
out.append("=== Diagnostic findings ===\n");
Map<DiagnosticCategory, List<Finding>> grouped = diagnosis.findingsByCategory();
if (grouped.isEmpty()) {
out.append("(no findings — every check returned a clean result)\n");
}
for (Map.Entry<DiagnosticCategory, List<Finding>> e : grouped.entrySet()) {
appendCategorySection(out, e.getKey(), e.getValue());
}
appendNotes(out, diagnosis.notes());
return out.toString();
}
private static void appendCategorySection(
final StringBuilder out, final DiagnosticCategory category, final List<Finding> findings) {
out.append('\n')
.append(category.title())
.append(" (")
.append(findings.size())
.append(" found):\n");
out.append(" ").append(category.description()).append('\n');
List<List<String>> rows = new ArrayList<>();
for (Finding f : findings) {
List<String> row = new ArrayList<>(category.columns().size());
for (String col : category.columns()) {
row.add(nullToBlank(f.attributes().get(col)));
}
rows.add(row);
}
out.append(new AsciiTable(category.columns(), rows, true, "", "(empty)").render());
out.append('\n');
}
private static void appendNotes(final StringBuilder out, final List<String> notes) {
if (notes == null || notes.isEmpty()) {
return;
}
out.append("\nNotes:\n");
for (String note : notes) {
out.append(" - ").append(note).append('\n');
}
}
}

View file

@ -0,0 +1,26 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import org.jdbi.v3.core.Handle;
/**
* Read-only DBA diagnostic. Inspects the live database for unused indexes, bloat indicators, slow
* queries, and other signals. Implementations must catch and log per-category errors so a missing
* extension (e.g. {@code pg_stat_statements} not installed) does not abort the whole diagnose run
* surface it in {@link DbTuneDiagnosis#notes()} instead.
*/
public interface Diagnostic {
DbTuneDiagnosis diagnose(Handle handle);
}

View file

@ -0,0 +1,77 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import java.util.List;
/**
* Categories of read-only diagnostic findings emitted by {@link Diagnostic#diagnose}. Each category
* has a fixed list of attribute keys that {@link Finding#attributes} is expected to populate; the
* report renderer dispatches column layout per category.
*/
public enum DiagnosticCategory {
UNUSED_INDEX(
"Unused indexes",
"Indexes with zero scans since last stats reset; candidates for DROP after a usage review.",
List.of("table", "index", "size", "scans")),
HIGH_DEAD_TUPLES(
"Tables with high dead-tuple ratio",
"n_dead_tup / n_live_tup > 0.2 — autovacuum is falling behind on this table.",
List.of("table", "live_rows", "dead_rows", "dead_ratio", "last_vacuum")),
LOW_CACHE_HIT(
"Tables with low cache hit ratio",
"Heap reads exceed 1000 with hit ratio < 90%; suggests undersized buffers or hot seq scans.",
List.of("table", "heap_reads", "heap_hits", "hit_pct")),
STALE_STATS(
"Tables with stale ANALYZE",
"Last autoanalyze older than 14 days (or never); planner stats may be misleading.",
List.of("table", "last_analyzed", "live_rows")),
SEQ_SCAN_HEAVY(
"Tables with seq-scan-heavy access",
"seq_scan/idx_scan > 10 with > 1000 seq scans; suggests a missing index.",
List.of("table", "seq_scans", "idx_scans", "ratio")),
SLOW_QUERY(
"Top slowest queries",
"From pg_stat_statements / events_statements_summary_by_digest. Truncated to 100 chars.",
List.of("query", "calls", "mean_ms")),
FULL_TABLE_SCAN(
"Queries doing full table scans",
"From sys.statements_with_full_table_scans (MySQL).",
List.of("query", "exec_count", "rows_examined_avg")),
LOW_BUFFER_POOL_HIT(
"InnoDB buffer pool hit ratio",
"Hit ratio < 99% suggests undersized innodb_buffer_pool_size for the working set.",
List.of("metric", "value"));
private final String title;
private final String description;
private final List<String> columns;
DiagnosticCategory(final String title, final String description, final List<String> columns) {
this.title = title;
this.description = description;
this.columns = List.copyOf(columns);
}
public String title() {
return title;
}
public String description() {
return description;
}
public List<String> columns() {
return columns;
}
}

View file

@ -0,0 +1,28 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import java.util.Map;
/**
* One row of a diagnostic finding. {@code attributes} keys must match {@link
* DiagnosticCategory#columns()} for the same {@code category} so the renderer can lay them out
* predictably.
*/
public record Finding(
DiagnosticCategory category, Severity severity, Map<String, String> attributes) {
public Finding {
attributes = attributes == null ? Map.of() : Map.copyOf(attributes);
}
}

View file

@ -0,0 +1,189 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Handle;
/**
* MySQL diagnostic. Reads from {@code sys.*}, {@code performance_schema.*}, and
* {@code INFORMATION_SCHEMA} views; gracefully degrades if a view is missing or permissions are
* insufficient (the operator gets a {@link DbTuneDiagnosis#notes()} entry).
*/
@Slf4j
public final class MysqlDiagnostic implements Diagnostic {
static final double LOW_BUFFER_POOL_HIT = 0.99;
static final int SLOW_QUERY_LIMIT = 10;
static final int QUERY_TRUNCATE = 100;
@Override
public DbTuneDiagnosis diagnose(final Handle handle) {
List<Finding> findings = new ArrayList<>();
List<String> notes = new ArrayList<>();
runCategory(handle, notes, "unused indexes", h -> findings.addAll(unusedIndexes(h)));
runCategory(handle, notes, "buffer pool hit", h -> findings.addAll(bufferPoolHit(h, notes)));
runCategory(handle, notes, "slow queries", h -> findings.addAll(slowQueries(h, notes)));
runCategory(handle, notes, "full table scans", h -> findings.addAll(fullTableScans(h, notes)));
return new DbTuneDiagnosis(findings, notes);
}
private void runCategory(
final Handle handle,
final List<String> notes,
final String label,
final java.util.function.Consumer<Handle> body) {
try {
body.accept(handle);
} catch (Exception e) {
LOG.warn("Diagnostic [{}] failed: {}", label, e.getMessage());
notes.add(label + ": " + e.getMessage());
}
}
// ---- categories ----
List<Finding> unusedIndexes(final Handle handle) {
return handle
.createQuery(
"SELECT object_schema, object_name, index_name "
+ "FROM sys.schema_unused_indexes "
+ "WHERE object_schema = DATABASE() "
+ "ORDER BY object_name "
+ "LIMIT 50")
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.UNUSED_INDEX,
Severity.WARN,
Map.of(
"table",
rs.getString("object_name"),
"index",
rs.getString("index_name"),
"size",
"(not in view)",
"scans",
"0")))
.list();
}
List<Finding> bufferPoolHit(final Handle handle, final List<String> notes) {
Long reads = readGlobalStatusLong(handle, "Innodb_buffer_pool_reads");
Long requests = readGlobalStatusLong(handle, "Innodb_buffer_pool_read_requests");
if (reads == null || requests == null || requests == 0) {
notes.add("buffer pool hit: Innodb_buffer_pool_* counters not available");
return List.of();
}
double hitRatio = 1.0 - (reads.doubleValue() / requests.doubleValue());
if (hitRatio >= LOW_BUFFER_POOL_HIT) {
return List.of();
}
return List.of(
new Finding(
DiagnosticCategory.LOW_BUFFER_POOL_HIT,
Severity.INFO,
Map.of(
"metric",
"innodb_buffer_pool_hit_ratio",
"value",
String.format(Locale.ROOT, "%.4f", hitRatio))));
}
List<Finding> slowQueries(final Handle handle, final List<String> notes) {
try {
return handle
.createQuery(
"SELECT digest_text, count_star AS calls, "
+ " ROUND(avg_timer_wait/1000000, 2) AS mean_us "
+ "FROM performance_schema.events_statements_summary_by_digest "
+ "WHERE schema_name = DATABASE() "
+ " AND digest_text IS NOT NULL "
+ "ORDER BY avg_timer_wait DESC "
+ "LIMIT :limit")
.bind("limit", SLOW_QUERY_LIMIT)
.map(
(rs, ctx) -> {
Map<String, String> attrs = new LinkedHashMap<>();
attrs.put("query", truncate(rs.getString("digest_text")));
attrs.put("calls", String.valueOf(rs.getLong("calls")));
attrs.put(
"mean_ms",
String.format(Locale.ROOT, "%.2f", rs.getDouble("mean_us") / 1000.0));
return new Finding(DiagnosticCategory.SLOW_QUERY, Severity.INFO, attrs);
})
.list();
} catch (Exception e) {
notes.add("slow queries: performance_schema not available (" + e.getMessage() + ")");
return List.of();
}
}
List<Finding> fullTableScans(final Handle handle, final List<String> notes) {
try {
return handle
.createQuery(
"SELECT query, exec_count, rows_examined_avg "
+ "FROM sys.statements_with_full_table_scans "
+ "WHERE db = DATABASE() "
+ "ORDER BY exec_count DESC "
+ "LIMIT 10")
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.FULL_TABLE_SCAN,
Severity.INFO,
Map.of(
"query", truncate(rs.getString("query")),
"exec_count", String.valueOf(rs.getLong("exec_count")),
"rows_examined_avg", String.valueOf(rs.getLong("rows_examined_avg")))))
.list();
} catch (Exception e) {
notes.add(
"full table scans: sys.statements_with_full_table_scans not available ("
+ e.getMessage()
+ ")");
return List.of();
}
}
private Long readGlobalStatusLong(final Handle handle, final String name) {
try {
return handle
.createQuery(
"SELECT VARIABLE_VALUE FROM performance_schema.global_status "
+ "WHERE VARIABLE_NAME = :n")
.bind("n", name)
.mapTo(Long.class)
.findOne()
.orElse(null);
} catch (Exception e) {
return null;
}
}
static String truncate(final String query) {
if (query == null) {
return "";
}
String collapsed = query.replaceAll("\\s+", " ").trim();
return collapsed.length() <= QUERY_TRUNCATE
? collapsed
: collapsed.substring(0, QUERY_TRUNCATE) + "";
}
}

View file

@ -0,0 +1,253 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.jdbi.v3.core.Handle;
/**
* Postgres diagnostic. Each finding category is queried in its own try block so that a missing
* extension or a stat view permission issue surfaces as a {@link DbTuneDiagnosis#notes()} entry
* rather than aborting the whole run.
*
* <p>Thresholds are baked in for v1; if operators want them tunable later they become CLI flags.
*/
@Slf4j
public final class PostgresDiagnostic implements Diagnostic {
static final long UNUSED_INDEX_SIZE_BYTES = 10L * 1024 * 1024;
static final double DEAD_TUPLE_RATIO = 0.2;
static final long DEAD_TUPLE_MIN_LIVE_ROWS = 10_000;
static final double LOW_CACHE_HIT_RATIO = 0.9;
static final long LOW_CACHE_HIT_MIN_READS = 1_000;
static final int STALE_STATS_DAYS = 14;
static final long STALE_STATS_MIN_LIVE_ROWS = 1_000;
static final long SEQ_SCAN_RATIO = 10;
static final long SEQ_SCAN_MIN = 1_000;
static final int SLOW_QUERY_LIMIT = 10;
static final long SLOW_QUERY_MIN_CALLS = 100;
static final int QUERY_TRUNCATE = 100;
@Override
public DbTuneDiagnosis diagnose(final Handle handle) {
List<Finding> findings = new ArrayList<>();
List<String> notes = new ArrayList<>();
runCategory(handle, notes, "unused indexes", h -> findings.addAll(unusedIndexes(h)));
runCategory(handle, notes, "dead tuples", h -> findings.addAll(highDeadTuples(h)));
runCategory(handle, notes, "cache hit", h -> findings.addAll(lowCacheHit(h)));
runCategory(handle, notes, "stale stats", h -> findings.addAll(staleStats(h)));
runCategory(handle, notes, "seq scans", h -> findings.addAll(seqScanHeavy(h)));
runCategory(handle, notes, "slow queries", h -> findings.addAll(slowQueries(h, notes)));
return new DbTuneDiagnosis(findings, notes);
}
private void runCategory(
final Handle handle,
final List<String> notes,
final String label,
final java.util.function.Consumer<Handle> body) {
try {
body.accept(handle);
} catch (Exception e) {
LOG.warn("Diagnostic [{}] failed: {}", label, e.getMessage());
notes.add(label + ": " + e.getMessage());
}
}
// ---- categories ----
List<Finding> unusedIndexes(final Handle handle) {
return handle
.createQuery(
"SELECT s.schemaname, s.relname AS table_name, s.indexrelname AS index_name, "
+ " s.idx_scan AS scans, "
+ " pg_relation_size(s.indexrelid) AS bytes "
+ "FROM pg_stat_user_indexes s "
+ "JOIN pg_index i ON i.indexrelid = s.indexrelid "
+ "WHERE s.idx_scan = 0 "
+ " AND NOT i.indisunique "
+ " AND NOT i.indisprimary "
+ " AND pg_relation_size(s.indexrelid) > :min_bytes "
+ "ORDER BY pg_relation_size(s.indexrelid) DESC "
+ "LIMIT 50")
.bind("min_bytes", UNUSED_INDEX_SIZE_BYTES)
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.UNUSED_INDEX,
Severity.WARN,
Map.of(
"table", rs.getString("table_name"),
"index", rs.getString("index_name"),
"size", DbTuneReport.formatBytes(rs.getLong("bytes")),
"scans", String.valueOf(rs.getLong("scans")))))
.list();
}
List<Finding> highDeadTuples(final Handle handle) {
return handle
.createQuery(
"SELECT relname AS table_name, "
+ " n_live_tup, "
+ " n_dead_tup, "
+ " ROUND((n_dead_tup::numeric / GREATEST(n_live_tup, 1)) * 100, 2) AS dead_pct, "
+ " last_autovacuum "
+ "FROM pg_stat_user_tables "
+ "WHERE n_live_tup > :min_live "
+ " AND n_dead_tup::numeric / GREATEST(n_live_tup, 1) > :threshold "
+ "ORDER BY n_dead_tup DESC "
+ "LIMIT 25")
.bind("min_live", DEAD_TUPLE_MIN_LIVE_ROWS)
.bind("threshold", DEAD_TUPLE_RATIO)
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.HIGH_DEAD_TUPLES,
Severity.WARN,
Map.of(
"table", rs.getString("table_name"),
"live_rows", String.valueOf(rs.getLong("n_live_tup")),
"dead_rows", String.valueOf(rs.getLong("n_dead_tup")),
"dead_ratio", rs.getString("dead_pct") + "%",
"last_vacuum", String.valueOf(rs.getString("last_autovacuum")))))
.list();
}
List<Finding> lowCacheHit(final Handle handle) {
return handle
.createQuery(
"SELECT relname AS table_name, "
+ " heap_blks_read, "
+ " heap_blks_hit, "
+ " ROUND(heap_blks_hit::numeric / NULLIF(heap_blks_hit + heap_blks_read, 0) * 100, 2) AS hit_pct "
+ "FROM pg_statio_user_tables "
+ "WHERE heap_blks_read > :min_reads "
+ " AND heap_blks_hit::numeric / NULLIF(heap_blks_hit + heap_blks_read, 0) < :threshold "
+ "ORDER BY heap_blks_read DESC "
+ "LIMIT 25")
.bind("min_reads", LOW_CACHE_HIT_MIN_READS)
.bind("threshold", LOW_CACHE_HIT_RATIO)
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.LOW_CACHE_HIT,
Severity.INFO,
Map.of(
"table", rs.getString("table_name"),
"heap_reads", String.valueOf(rs.getLong("heap_blks_read")),
"heap_hits", String.valueOf(rs.getLong("heap_blks_hit")),
"hit_pct", rs.getString("hit_pct") + "%")))
.list();
}
List<Finding> staleStats(final Handle handle) {
return handle
.createQuery(
"SELECT relname AS table_name, "
+ " n_live_tup, "
+ " COALESCE(last_autoanalyze, last_analyze) AS last_analyzed "
+ "FROM pg_stat_user_tables "
+ "WHERE n_live_tup > :min_live "
+ " AND (COALESCE(last_autoanalyze, last_analyze) IS NULL "
+ " OR COALESCE(last_autoanalyze, last_analyze) < now() - (:days || ' days')::interval) "
+ "ORDER BY n_live_tup DESC "
+ "LIMIT 25")
.bind("min_live", STALE_STATS_MIN_LIVE_ROWS)
.bind("days", STALE_STATS_DAYS)
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.STALE_STATS,
Severity.WARN,
Map.of(
"table", rs.getString("table_name"),
"live_rows", String.valueOf(rs.getLong("n_live_tup")),
"last_analyzed", String.valueOf(rs.getString("last_analyzed")))))
.list();
}
List<Finding> seqScanHeavy(final Handle handle) {
return handle
.createQuery(
"SELECT relname AS table_name, seq_scan, idx_scan "
+ "FROM pg_stat_user_tables "
+ "WHERE seq_scan > :min_seq "
+ " AND seq_scan::numeric / NULLIF(idx_scan, 0) > :ratio "
+ "ORDER BY seq_scan DESC "
+ "LIMIT 25")
.bind("min_seq", SEQ_SCAN_MIN)
.bind("ratio", SEQ_SCAN_RATIO)
.map(
(rs, ctx) ->
new Finding(
DiagnosticCategory.SEQ_SCAN_HEAVY,
Severity.INFO,
Map.of(
"table", rs.getString("table_name"),
"seq_scans", String.valueOf(rs.getLong("seq_scan")),
"idx_scans", String.valueOf(rs.getLong("idx_scan")),
"ratio",
rs.getLong("idx_scan") == 0
? ""
: String.valueOf(rs.getLong("seq_scan") / rs.getLong("idx_scan")))))
.list();
}
List<Finding> slowQueries(final Handle handle, final List<String> notes) {
if (!hasPgStatStatements(handle)) {
notes.add("slow queries: pg_stat_statements extension not installed");
return List.of();
}
return handle
.createQuery(
"SELECT query, calls, mean_exec_time AS mean_ms "
+ "FROM pg_stat_statements "
+ "WHERE calls > :min_calls "
+ "ORDER BY mean_exec_time DESC "
+ "LIMIT :limit")
.bind("min_calls", SLOW_QUERY_MIN_CALLS)
.bind("limit", SLOW_QUERY_LIMIT)
.map(
(rs, ctx) -> {
Map<String, String> attrs = new LinkedHashMap<>();
attrs.put("query", truncate(rs.getString("query")));
attrs.put("calls", String.valueOf(rs.getLong("calls")));
attrs.put(
"mean_ms", String.format(java.util.Locale.ROOT, "%.1f", rs.getDouble("mean_ms")));
return new Finding(DiagnosticCategory.SLOW_QUERY, Severity.INFO, attrs);
})
.list();
}
private boolean hasPgStatStatements(final Handle handle) {
return handle
.createQuery("SELECT 1 FROM pg_extension WHERE extname = 'pg_stat_statements'")
.mapTo(Integer.class)
.findOne()
.isPresent();
}
static String truncate(final String query) {
if (query == null) {
return "";
}
String collapsed = query.replaceAll("\\s+", " ").trim();
return collapsed.length() <= QUERY_TRUNCATE
? collapsed
: collapsed.substring(0, QUERY_TRUNCATE) + "";
}
}

View file

@ -0,0 +1,18 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
public enum Severity {
INFO,
WARN
}

View file

@ -0,0 +1,153 @@
/*
* Copyright 2026 Collate
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.openmetadata.service.util.dbtune;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
/**
* Diagnostic-side rendering and grouping tests. Pure logic, no DB. The end-to-end DB query
* exercise lives in {@code DbTuneIT}.
*/
class DiagnosticReportTest {
@Test
void findingsByCategory_groupsByEnumOrder() {
DbTuneDiagnosis d =
new DbTuneDiagnosis(
List.of(
finding(DiagnosticCategory.SLOW_QUERY, "q1"),
finding(DiagnosticCategory.UNUSED_INDEX, "idx_a"),
finding(DiagnosticCategory.UNUSED_INDEX, "idx_b"),
finding(DiagnosticCategory.HIGH_DEAD_TUPLES, "tag_usage")),
List.of());
Map<DiagnosticCategory, List<Finding>> grouped = d.findingsByCategory();
assertEquals(2, grouped.get(DiagnosticCategory.UNUSED_INDEX).size());
assertEquals(1, grouped.get(DiagnosticCategory.HIGH_DEAD_TUPLES).size());
assertEquals(1, grouped.get(DiagnosticCategory.SLOW_QUERY).size());
// EnumMap preserves enum declaration order UNUSED_INDEX precedes HIGH_DEAD_TUPLES precedes
// SLOW_QUERY.
List<DiagnosticCategory> orderedKeys = grouped.keySet().stream().toList();
assertEquals(
List.of(
DiagnosticCategory.UNUSED_INDEX,
DiagnosticCategory.HIGH_DEAD_TUPLES,
DiagnosticCategory.SLOW_QUERY),
orderedKeys);
}
@Test
void renderDiagnosis_empty_showsCleanResultMessage() {
DbTuneDiagnosis empty = new DbTuneDiagnosis(List.of(), List.of());
String out = DbTuneReport.renderDiagnosis(empty);
assertTrue(out.contains("Diagnostic findings"));
assertTrue(out.contains("every check returned a clean result"));
}
@Test
void renderDiagnosis_findingsRenderUnderCategorySections() {
DbTuneDiagnosis d =
new DbTuneDiagnosis(
List.of(
new Finding(
DiagnosticCategory.UNUSED_INDEX,
Severity.WARN,
Map.of(
"table", "tag_usage",
"index", "idx_unused_tag",
"size", "120 MB",
"scans", "0"))),
List.of());
String out = DbTuneReport.renderDiagnosis(d);
assertTrue(out.contains("Unused indexes (1 found)"));
assertTrue(out.contains("idx_unused_tag"));
assertTrue(out.contains("120 MB"));
}
@Test
void renderDiagnosis_notesAppendedWhenPresent() {
DbTuneDiagnosis d =
new DbTuneDiagnosis(
List.of(), List.of("slow queries: pg_stat_statements extension not installed"));
String out = DbTuneReport.renderDiagnosis(d);
assertTrue(out.contains("Notes:"));
assertTrue(out.contains("pg_stat_statements extension not installed"));
}
@Test
void renderDiagnosis_categoriesWithoutFindingsAreSuppressed() {
DbTuneDiagnosis d =
new DbTuneDiagnosis(List.of(finding(DiagnosticCategory.SLOW_QUERY, "SELECT 1")), List.of());
String out = DbTuneReport.renderDiagnosis(d);
assertTrue(out.contains("Top slowest queries"));
assertFalse(out.contains("Unused indexes"));
assertFalse(out.contains("Tables with high dead-tuple ratio"));
}
@Test
void truncate_collapsesWhitespaceAndAppliesLimit() {
String long_ =
"SELECT *\nFROM table_entity\nWHERE fqnHash LIKE 'foo%' ORDER BY name LIMIT 100";
String t = PostgresDiagnostic.truncate(long_);
assertFalse(t.contains(" "));
assertFalse(t.contains("\n"));
assertTrue(t.length() <= 101); // 100 + ellipsis
}
@Test
void truncate_nullReturnsEmpty() {
assertEquals("", PostgresDiagnostic.truncate(null));
assertEquals("", MysqlDiagnostic.truncate(null));
}
@Test
void truncate_underLimitReturnsAsIs() {
assertEquals("SELECT 1", PostgresDiagnostic.truncate("SELECT 1"));
}
@Test
void truncate_overLimitGetsEllipsis() {
String long_ = "x".repeat(150);
String t = PostgresDiagnostic.truncate(long_);
assertTrue(t.endsWith(""));
assertEquals(101, t.length());
}
@Test
void diagnosticCategory_columnsAreImmutable() {
List<String> cols = DiagnosticCategory.UNUSED_INDEX.columns();
org.junit.jupiter.api.Assertions.assertThrows(
UnsupportedOperationException.class, () -> cols.add("new_col"));
}
private static Finding finding(final DiagnosticCategory category, final String objectName) {
return new Finding(category, Severity.INFO, Map.of("table", objectName));
}
}