OpenMetadata/.github/scripts/label_connector.py
IceS2 1fa0c79d27
chore(github): migrate issue templates to structured forms (#27710)
* chore(github): migrate issue templates to structured forms

- Convert bug_report, feature_request, doc_update to GitHub issue forms (YAML)
- Add connector_bug form with free-text Connector field
- Drop epic and feature_task templates (stale since 2022, no usage evidence)
- Add auto-label workflow that maps the Connector field to a namespaced
  connector:<name> label, falling back to connector:other on 0 or 2+ matches
- Labels are applied exclusively and auto-created with a grey "Connector"
  description when missing

* chore(github): drop redundant pipeline type field from connector_bug form

Feature area already covers metadata/lineage/profiler/usage distinction.

* fix(github): address PR review feedback

- bug_report.yml: add labels: ["bug"] for pattern consistency
- label-connector.yml: add contents: read permission (needed by checkout)
- label_connector.py: raise on unexpected HTTP status; accept 404 for
  idempotent GET-label and DELETE-label-from-issue; stop echoing the
  raw Connector field value into workflow logs
2026-04-24 14:08:20 +02:00

98 lines
3.3 KiB
Python

"""Auto-label connector bugs.
Reads the "Connector" field from the issue body and applies one connector:* label.
- Exactly one rule matches → that label.
- Zero or multiple matches → connector:other.
Any other connector:* label this script manages is removed.
To add a connector: append one row to RULES.
"""
import json
import os
import re
from urllib.error import HTTPError
from urllib.parse import quote
from urllib.request import Request, urlopen
RULES = [
("connector:mssql", r"\b(mssql|ms ?sql|sql ?server)\b"),
("connector:mysql", r"\bmysql\b"),
("connector:s3", r"\b(aws )?s3\b"),
("connector:bigquery", r"\b(big ?query|gcp bigquery)\b"),
("connector:snowflake", r"\bsnowflake\b"),
("connector:redshift", r"\b(aws )?redshift\b"),
("connector:unity-catalog", r"\bunity ?catalog\b"),
("connector:powerbi", r"\bpower ?bi\b"),
("connector:postgres", r"\bpostgres(ql)?\b"),
("connector:athena", r"\b(aws )?athena\b"),
("connector:tableau", r"\btableau\b"),
("connector:looker", r"\blooker\b"),
("connector:airflow", r"\b(apache )?airflow\b"),
("connector:dbt", r"\bdbt( ?cloud| ?core)?\b"),
("connector:databricks", r"\bdatabricks\b"),
("connector:fabric", r"\b(microsoft |ms )?fabric\b"),
]
OTHER = "connector:other"
MANAGED = {label for label, _ in RULES} | {OTHER}
TOKEN = os.environ["GITHUB_TOKEN"]
REPO = os.environ["GITHUB_REPOSITORY"]
def gh(method, path, body=None, ok=(200, 201, 204)):
data = json.dumps(body).encode() if body else None
req = Request(
f"https://api.github.com/repos/{REPO}{path}",
data=data, method=method,
headers={
"Authorization": f"Bearer {TOKEN}",
"Accept": "application/vnd.github+json",
"Content-Type": "application/json",
},
)
try:
with urlopen(req) as r:
status = r.status
except HTTPError as e:
status = e.code
if status not in ok:
raise RuntimeError(f"{method} {path} returned HTTP {status}")
return status
def classify(field_value):
norm = re.sub(r"\s+", " ", re.sub(r"[_\-/.,()]", " ", field_value.lower())).strip()
hits = [label for label, pattern in RULES if re.search(pattern, norm)]
return hits[0] if len(hits) == 1 else OTHER
def main():
with open(os.environ["GITHUB_EVENT_PATH"]) as f:
issue = json.load(f)["issue"]
match = re.search(r"### Connector\s*\n+\s*([^\n]+)", issue.get("body") or "")
field = match.group(1).strip() if match else ""
if not field or field == "_No response_":
print("No Connector field — skipping.")
return
target = classify(field)
current = {label["name"] for label in issue.get("labels", [])}
print(f'Resolved to "{target}"')
if gh("GET", f"/labels/{quote(target, safe='')}", ok=(200, 404)) == 404:
gh("POST", "/labels", {"name": target, "color": "aaaaaa", "description": "Connector"})
for label in current & MANAGED - {target}:
gh("DELETE", f"/issues/{issue['number']}/labels/{quote(label, safe='')}", ok=(200, 404))
print(f'Removed "{label}"')
if target not in current:
gh("POST", f"/issues/{issue['number']}/labels", {"labels": [target]})
print(f'Added "{target}"')
if __name__ == "__main__":
main()