ngclient: Fix the lockfile handling in Windows

There does not seem to be a way around a ugly loop over open()... Signed-off-by: Jussi Kukkonen <jkukkonen@google.com>
2026-05-24 10:08:28 +00:00 · 2025-08-22 19:18:27 +03:00 · 2025-08-22 19:18:27 +03:00 · ba0842ff72
commit ba0842ff72
parent cbe34d956a
5 changed files with 81 additions and 50 deletions
--- a/tests/refresh_script.py
+++ b/tests/refresh_script.py
@ -1,4 +1,5 @@
 import sys
+import time

 from tuf.ngclient import Updater

@ -6,11 +7,17 @@
 print(f"  metadata dir: {sys.argv[2]}")
 print(f"  metadata url: {sys.argv[3]}")

+start = time.time()

 for i in range(int(sys.argv[1])):
    try:
+        refresh_start = time.time()
        u = Updater(metadata_dir=sys.argv[2], metadata_base_url=sys.argv[3])
        # file3.txt is delegated so we end up exercising all metadata load paths
        u.get_targetinfo("file3.txt")
    except OSError as e:
-        sys.exit(f"Failed on iteration {i}: {e}")
+        print(
+            f"Failed on iteration {i}, "
+            f"{time.time() - refresh_start} secs elapsed ({time.time() - start} total)"
+        )
+        raise e
--- a/tests/test_updater_ng.py
+++ b/tests/test_updater_ng.py
@ -356,16 +356,14 @@ def test_user_agent(self) -> None:

        self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/")

-
-class TestParallelUpdater(TestUpdater):
    def test_parallel_updaters(self) -> None:
-        # Refresh two updaters in parallel many times, using the same local metadata cache.
+        # Refresh many updaters in parallel many times, using the same local metadata cache.
        # This should reveal race conditions.

-        iterations = 100
+        iterations = 50
+        process_count = 10

-        # The project root is the parent of the tests directory
-        project_root = os.path.dirname(utils.TESTS_DIR)
+        project_root_dir = os.path.dirname(utils.TESTS_DIR)

        command = [
            sys.executable,
@ -376,29 +374,26 @@ def test_parallel_updaters(self) -> None:
            self.metadata_url,
        ]

-        p1 = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            cwd=project_root,
-        )
-        p2 = subprocess.Popen(
-            command,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            cwd=project_root,
-        )
+        procs = [
+            subprocess.Popen(
+                command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                cwd=project_root_dir,
+            )
+            for _ in range(process_count)
+        ]

-        stdout1, stderr1 = p1.communicate()
-        stdout2, stderr2 = p2.communicate()
-
-        if p1.returncode != 0 or p2.returncode != 0:
+        errout = ""
+        for proc in procs:
+            stdout, stderr = proc.communicate()
+            if proc.returncode != 0:
+                errout += "Parallel Refresh script failed:"
+                errout += f"\nprocess stdout: \n{stdout.decode()}"
+                errout += f"\nprocess stderr: \n{stderr.decode()}"
+        if errout:
            self.fail(
-                "Parallel refresh failed"
-                f"\nprocess 1 stdout: \n{stdout1.decode()}"
-                f"\nprocess 1 stderr: \n{stderr1.decode()}"
-                f"\nprocess 2 stdout: \n{stdout2.decode()}"
-                f"\nprocess 2 stderr: \n{stderr2.decode()}"
+                f"One or more scripts failed parallel refresh test:\n{errout}"
            )


--- a/tests/test_updater_validation.py
+++ b/tests/test_updater_validation.py
@ -53,9 +53,9 @@ def test_local_target_storage_fail(self) -> None:

    def test_non_existing_metadata_dir(self) -> None:
        with self.assertRaises(FileNotFoundError):
-            # Initialize Updater with non-existing metadata_dir
+            # Initialize Updater with non-existing metadata_dir and no bootstrap root
            Updater(
-                "non_existing_metadata_dir",
+                f"{self.temp_dir.name}/non_existing_metadata_dir",
                "https://example.com/metadata/",
                fetcher=self.sim,
            )
--- a/tox.ini
+++ b/tox.ini
@ -11,8 +11,8 @@ skipsdist = true
 [testenv]
 commands =
    python3 --version
-    python3 -m coverage run -m unittest
-    python3 -m coverage report -m --fail-under 97
+    python3 -m coverage run -m unittest -v
+    python3 -m coverage report -m --fail-under 96

 deps =
    -r{toxinidir}/requirements/test.txt
--- a/tuf/ngclient/updater.py
+++ b/tuf/ngclient/updater.py
@ -58,6 +58,7 @@
 import os
 import shutil
 import tempfile
+import time
 from pathlib import Path
 from typing import IO, TYPE_CHECKING, cast
 from urllib import parse
@ -79,21 +80,50 @@
    # advisory file locking for posix
    import fcntl

-    def _lock_file(f: IO) -> None:
-        if f.writable():
+    @contextlib.contextmanager
+    def _lock_file(path: str) -> Iterator[IO]:
+        with open(path, "wb") as f:
            fcntl.lockf(f, fcntl.LOCK_EX)
+            yield f

 except ModuleNotFoundError:
-    # Windows file locking
+    # Windows file locking, in belt-and-suspenders-from-Temu style:
+    # Use a loop that tries to open the lockfile for 30 secs, but also
+    # use msvcrt.locking().
+    # * since open() usually just fails when another process has the file open
+    #   msvcrt.locking() almost never gets called when there is a lock. open()
+    #   sometimes succeeds for multiple processes though
+    # * msvcrt.locking() does not even block until file is available: it just
+    #   tries once per second in a non-blocking manner for 10 seconds. So if
+    #   another process keeps opening the file it's unlikely that we actually
+    #   get the lock
    import msvcrt

-    def _lock_file(f: IO) -> None:
-        # On Windows we lock a byte range and file must not be empty
-        f.write(b"\0")
-        f.flush()
-        f.seek(0)
+    @contextlib.contextmanager
+    def _lock_file(path: str) -> Iterator[IO]:
+        err = None
+        locked = False
+        for _ in range(100):
+            try:
+                with open(path, "wb") as f:
+                    msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
+                    locked = True
+                    yield f
+                    return
+            except FileNotFoundError:
+                # could be from yield or from open() -- either way we bail
+                raise
+            except OSError as e:
+                if locked:
+                    # yield has raised, let's not continue loop
+                    raise e
+                err = e
+                logger.warning("Unsuccessful lock attempt for %s: %s", path, e)
+                time.sleep(0.3)

-        msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
+        # raise the last failure if we never got a lock
+        if err is not None:
+            raise err


 class Updater:
@ -153,6 +183,10 @@ def __init__(
                f"got '{self.config.envelope_type}'"
            )

+        # Ensure the whole metadata directory structure exists
+        rootdir = Path(self._dir, "root_history")
+        rootdir.mkdir(exist_ok=True, parents=True)
+
        with self._lock_metadata():
            if not bootstrap:
                # if no root was provided, use the cached non-versioned root
@ -168,15 +202,11 @@ def __init__(
    @contextlib.contextmanager
    def _lock_metadata(self) -> Iterator[None]:
        """Context manager for locking the metadata directory."""
-        # Ensure the whole metadata directory structure exists
-        rootdir = Path(self._dir, "root_history")
-        rootdir.mkdir(exist_ok=True, parents=True)

-        with open(os.path.join(self._dir, ".lock"), "wb") as f:
-            logger.debug("Getting metadata lock...")
-            _lock_file(f)
+        logger.debug("Getting metadata lock...")
+        with _lock_file(os.path.join(self._dir, ".lock")):
            yield
-            logger.debug("Releasing metadata lock")
+        logger.debug("Released metadata lock")

    def refresh(self) -> None:
        """Refresh top-level metadata.
@ -337,8 +367,7 @@ def download_target(
            targetinfo.verify_length_and_hashes(target_file)

            target_file.seek(0)
-            with open(filepath, "wb") as destination_file:
-                _lock_file(destination_file)
+            with _lock_file(filepath) as destination_file:
                shutil.copyfileobj(target_file, destination_file)

        logger.debug("Downloaded target %s", targetinfo.path)