OpenMetadata/scripts/jacoco_diff_coverage.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

282 lines
9.5 KiB
Python
Raw Normal View History

Add Unit Tests coverage (#26360) * Enable more service unit tests and fix uncovered regressions * Fix remaining broadened unit-suite regressions * Add meaningful Handlebars helper coverage * Add formatter decorator unit coverage * Improve formatter decorator coverage * Improve utility, validator, and formatter coverage * Expand OIDC validator coverage * Tighten shared OIDC validator coverage * Improve user and connection utility coverage * Cover subscription utility workflows * Cover entity field utility workflows * Expand lineage and helper utility coverage * Improve auth code flow handler coverage * Expand auth code flow handler coverage * Cover entity csv parsing flows * Deepen entity csv parser coverage * Fix search builder aggregation null handling * Expand entity utility core coverage * Cover search index utility workflows * Expand search utility coverage * Expand formatter message coverage * Harden notification markdown rendering coverage * Add notification card assembler coverage * Expand EntityCsv coverage and dry-run fixes * Expand K8s pipeline client coverage * Expand saml validator coverage * Expand rdf property mapper coverage * Expand subscription utility coverage * Fix schema field extractor coverage gaps * Expand auth refresh flow coverage * Add service unit test workflow * Enforce new-code coverage on service PRs * Add Unit Test Coverage * Expand k8s pipeline and auth flow coverage * Expand entity csv batch import coverage * Expand entity csv entity creation coverage * Expand entity csv user and flush coverage * Expand entity csv typed import coverage * Cover entity csv dependency validation paths * Expand airflow and csv utility coverage * Replace placeholder authorizer tests with real coverage * Cover PII masking security flows * Tighten async service retry and shutdown coverage * Expand security util claim coverage * Fix checkstyle * Strengthen user bootstrap utility coverage * Expand user activity tracker coverage * Expand ODCS converter coverage * Expand S3 log storage coverage * Expand search repository and lineage coverage * Expand search filter and index factory coverage * Expand reindex handler coverage * Expand inherited field search coverage * Expand search cluster metrics coverage * Expand search repository lifecycle coverage * Expand slack client coverage and stabilize tests * Expand search index executor control flow coverage * Cover search index utility helpers * Cover distributed indexing strategy flows * Strengthen distributed search executor coverage * Cover search reindex pipeline flows * Cover search index logging flows * Cover search index stats tracking * Cover quartz search index progress flows * Cover search index app coordination * Cover slack progress listener behavior * Cover polling job notifier behavior * Cover redis job notifier behavior * Expand Slack notifier coverage * Cover partition worker processing flows * Expand distributed participant coverage * Cover orphan job monitor behavior * Expand distributed stats aggregator coverage * Expand distributed partition coverage * Strengthen distributed coordinator coverage * Expand search index and repository coverage * Expand search executor control flow coverage * Expand search repository delegation coverage * Expand search index executor coverage * Expand search repository helper coverage * Expand search utility coverage * Expand search index executor coverage * Expand search repository coverage * Strengthen search index manager coverage * Strengthen distributed recovery and worker coverage * Strengthen distributed executor coverage * Fix index sink batching and stats coverage * Expand elastic bulk sink behavior coverage * Expand open search bulk sink behavior coverage * Fix dropped bulk processor failure accounting * Cover migration workflow discovery paths * fix java checkstyle * Fix permission debug effect normalization * Cover migration FQN repair workflows * Fix glossary workflow migration idempotency * Cover v1100 migration utility flows * Cover v1104 migration extension flows * Fix and cover v160 migration policy flows * fix java checkstyle * Address PR review comments on vector search and csv docs * fix java checkstyle * Harden service unit test PR workflow * Cover migration utility repair flows * fix java checkstyle * Fix service unit test regressions * Split service new-code coverage check * fix java checkstyle * Fix service diff coverage regressions * fix java checkstyle * Clarify missing JaCoCo artifact failures * fix java checkstyle * Fix bulk sink lifecycle tests * simplify CI * Address PR review feedback after main merge * Fix merged service unit test expectations * Fix search repository bulk update tests * Apply spotless formatting * Use standard exception logging in search repository * Stabilize multi-domain search integration test * Apply spotless formatting * Isolate web analytic event integration timestamps --------- Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
2026-03-23 15:17:15 +00:00
#!/usr/bin/env python3
import argparse
import re
import subprocess
import sys
import xml.etree.ElementTree as ET
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
HUNK_PATTERN = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@")
@dataclass
class FileCoverage:
path: str
executable_lines: list[int]
covered_lines: list[int]
missed_lines: list[int]
non_executable_lines: list[int]
@property
def executable_count(self) -> int:
return len(self.executable_lines)
@property
def covered_count(self) -> int:
return len(self.covered_lines)
@property
def missed_count(self) -> int:
return len(self.missed_lines)
@property
def non_executable_count(self) -> int:
return len(self.non_executable_lines)
@property
def coverage_pct(self) -> float | None:
if not self.executable_count:
return None
return (self.covered_count / self.executable_count) * 100.0
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Compute JaCoCo coverage for changed production lines in a PR diff."
)
parser.add_argument("--report", required=True, help="Path to jacoco.xml report")
parser.add_argument(
"--source-root",
default="openmetadata-service/src/main/java",
help="Source root to evaluate for changed production code",
)
parser.add_argument(
"--base-ref",
required=True,
help="Git base ref/SHA. If --head-ref is omitted, compare working tree against this ref.",
)
parser.add_argument(
"--head-ref",
help="Git head ref/SHA. If set, diff is computed with base...head.",
)
parser.add_argument(
"--minimum-coverage",
type=float,
default=90.0,
help="Minimum required changed-line coverage percentage",
)
parser.add_argument(
"--markdown-output",
required=True,
help="File to write the Markdown summary to",
)
return parser.parse_args()
def run_git_diff(base_ref: str, head_ref: str | None, source_root: str) -> str:
cmd = ["git", "diff", "--unified=0", "--no-color"]
if head_ref:
cmd.append(f"{base_ref}...{head_ref}")
else:
cmd.append(base_ref)
cmd.extend(["--", source_root])
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
return result.stdout
def parse_changed_lines(diff_text: str) -> dict[str, set[int]]:
changed_lines: dict[str, set[int]] = defaultdict(set)
current_file: str | None = None
current_line: int | None = None
for line in diff_text.splitlines():
if line.startswith("+++ "):
file_path = line[4:]
if file_path == "/dev/null":
current_file = None
elif file_path.startswith("b/"):
current_file = file_path[2:]
else:
current_file = file_path
current_line = None
continue
if line.startswith("@@ "):
match = HUNK_PATTERN.match(line)
current_line = int(match.group(1)) if match else None
continue
if current_file is None or current_line is None:
continue
if line.startswith("+") and not line.startswith("+++"):
changed_lines[current_file].add(current_line)
current_line += 1
elif line.startswith("-") and not line.startswith("---"):
continue
elif line.startswith(" "):
current_line += 1
return changed_lines
def parse_jacoco_report(report_path: str, source_root: str) -> dict[str, dict[int, bool]]:
root = ET.parse(report_path).getroot()
normalized_root = PurePosixPath(source_root)
coverage: dict[str, dict[int, bool]] = {}
for package in root.findall("package"):
package_name = package.attrib.get("name", "")
package_root = normalized_root / package_name if package_name else normalized_root
for sourcefile in package.findall("sourcefile"):
file_path = (package_root / sourcefile.attrib["name"]).as_posix()
line_coverage: dict[int, bool] = {}
for line in sourcefile.findall("line"):
line_number = int(line.attrib["nr"])
line_coverage[line_number] = int(line.attrib["ci"]) > 0
coverage[file_path] = line_coverage
return coverage
def build_file_coverage(
changed_lines: dict[str, set[int]], jacoco_coverage: dict[str, dict[int, bool]]
) -> list[FileCoverage]:
files: list[FileCoverage] = []
for file_path in sorted(changed_lines):
line_map = jacoco_coverage.get(file_path, {})
executable_lines = sorted(line for line in changed_lines[file_path] if line in line_map)
covered_lines = sorted(line for line in executable_lines if line_map.get(line, False))
missed_lines = sorted(line for line in executable_lines if not line_map.get(line, False))
non_executable_lines = sorted(line for line in changed_lines[file_path] if line not in line_map)
files.append(
FileCoverage(
path=file_path,
executable_lines=executable_lines,
covered_lines=covered_lines,
missed_lines=missed_lines,
non_executable_lines=non_executable_lines,
)
)
files.sort(
key=lambda item: (
item.coverage_pct if item.coverage_pct is not None else 101.0,
-item.missed_count,
item.path,
)
)
return files
def format_line_list(lines: list[int], limit: int = 12) -> str:
if not lines:
return "-"
if len(lines) <= limit:
return ", ".join(str(line) for line in lines)
visible = ", ".join(str(line) for line in lines[:limit])
return f"{visible}, +{len(lines) - limit} more"
def render_markdown(
files: list[FileCoverage], minimum_coverage: float, source_root: str
) -> tuple[str, bool]:
changed_files = len(files)
executable_total = sum(item.executable_count for item in files)
covered_total = sum(item.covered_count for item in files)
missed_total = sum(item.missed_count for item in files)
non_executable_total = sum(item.non_executable_count for item in files)
overall_pct = (covered_total / executable_total * 100.0) if executable_total else 100.0
failing_files = [
item for item in files if item.coverage_pct is not None and item.coverage_pct < minimum_coverage
]
should_fail = executable_total > 0 and (
overall_pct < minimum_coverage or bool(failing_files)
)
status = "FAIL" if should_fail else "PASS"
status_icon = "" if should_fail else ""
lines: list[str] = []
lines.append("## OpenMetadata Service New-Code Coverage")
lines.append("")
if changed_files == 0:
lines.append(
f"{status_icon} No changed production Java files under `{source_root}`. Coverage gate skipped."
)
return "\n".join(lines) + "\n", False
lines.append(
f"{status_icon} **{status}**. Required changed-line coverage: `{minimum_coverage:.2f}%` overall and per touched production file."
)
lines.append("")
lines.append(
f"- Overall executable changed lines: `{covered_total}/{executable_total}` covered (`{overall_pct:.2f}%`)"
)
lines.append(f"- Missed executable changed lines: `{missed_total}`")
lines.append(f"- Non-executable changed lines ignored by JaCoCo: `{non_executable_total}`")
lines.append(f"- Changed production files: `{changed_files}`")
lines.append("")
if executable_total == 0:
lines.append(
"All changed production lines are non-executable from JaCoCo's perspective. Gate passed."
)
lines.append("")
elif failing_files:
lines.append("Files below threshold:")
for item in failing_files:
lines.append(
f"- `{item.path}`: `{item.covered_count}/{item.executable_count}` covered (`{item.coverage_pct:.2f}%`), uncovered lines `{format_line_list(item.missed_lines)}`"
)
lines.append("")
lines.append("| File | Covered | Missed | Executable | Non-exec | Coverage | Uncovered lines |")
lines.append("| --- | ---: | ---: | ---: | ---: | ---: | --- |")
for item in files:
coverage_display = (
f"{item.coverage_pct:.2f}%"
if item.coverage_pct is not None
else "N/A"
)
lines.append(
f"| `{item.path}` | {item.covered_count} | {item.missed_count} | {item.executable_count} | {item.non_executable_count} | {coverage_display} | {format_line_list(item.missed_lines)} |"
)
lines.append("")
lines.append(
f"Only changed executable lines under `{source_root}` are counted. Test files, comments, imports, and non-executable lines are excluded."
)
return "\n".join(lines) + "\n", should_fail
def main() -> int:
args = parse_args()
diff_text = run_git_diff(args.base_ref, args.head_ref, args.source_root)
changed_lines = parse_changed_lines(diff_text)
jacoco_coverage = parse_jacoco_report(args.report, args.source_root)
files = build_file_coverage(changed_lines, jacoco_coverage)
markdown, should_fail = render_markdown(files, args.minimum_coverage, args.source_root)
Path(args.markdown_output).parent.mkdir(parents=True, exist_ok=True)
Path(args.markdown_output).write_text(markdown, encoding="utf-8")
if should_fail:
print(markdown)
return 1
print(markdown)
return 0
if __name__ == "__main__":
sys.exit(main())