fix(encoding): add encoding="utf-8" to bare open() / read_text / write_text in examples and scripts (#4118)

Cleanup follow-up to #3797. The check-open-encoding hook was originally scoped with exclude: ^(tests/|examples/|scripts/) because those directories had ~45 pre-existing bare open() calls and addressing them was out of scope for the core Windows bug fix. This commit: * adds encoding="utf-8" to 45 read/write call sites under examples/ and scripts/ — JSON benchmark results, config-doc generators, workflow status pages, and the datetime-timezone pre-commit hook * narrows the hook exclude to ^tests/ only, so future regressions in examples/scripts/ are blocked at commit time Windows users running the benchmark scripts and config-doc generator would previously hit silent failures or UnicodeDecodeErrors on non-ASCII content under cp1252. The package itself was already protected by #3797.
2026-06-15 19:46:56 +03:00 · 2026-05-18 21:45:04 +02:00
parent b9cdfa6db7
commit 653707a556
21 changed files with 94 additions and 46 deletions
--- a/scripts/generate_config_docs.py
+++ b/scripts/generate_config_docs.py
@@ -172,7 +172,7 @@ def get_env_only_settings(
        category = _category_from_filename(filepath.name)

        try:
-            content = filepath.read_text()
+            content = filepath.read_text(encoding="utf-8")
            tree = ast.parse(content)
        except Exception as e:
            print(f"Warning: Could not parse {filepath}: {e}")
@@ -221,7 +221,7 @@ def generate_docs_content(root_dir: Optional[Path] = None) -> str:
    # Recursively find all JSON files
    for json_file in sorted(defaults_dir.rglob("*.json")):
        try:
-            with open(json_file, "r") as f:
+            with open(json_file, "r", encoding="utf-8") as f:
                data = json.load(f)
                settings.update(data)
        except Exception as e:
@@ -344,7 +344,7 @@ def generate_docs(
                "Run 'python scripts/generate_config_docs.py' to generate it."
            )
            return 1
-        existing = output_file.read_text()
+        existing = output_file.read_text(encoding="utf-8")
        if existing == new_content:
            print("OK: Configuration docs are up to date.")
            return 0
@@ -355,7 +355,7 @@ def generate_docs(
        return 1

    output_file.parent.mkdir(parents=True, exist_ok=True)
-    output_file.write_text(new_content)
+    output_file.write_text(new_content, encoding="utf-8")
    print(f"Wrote {output_file}")
    return 0

--- a/scripts/generate_workflow_status.py
+++ b/scripts/generate_workflow_status.py
@@ -172,7 +172,7 @@ def parse_workflow(path: Path) -> dict[str, Any]:
          ],
        }
    """
-    text = path.read_text()
+    text = path.read_text(encoding="utf-8")
    # Use yaml.safe_load on the file but with FullLoader behavior: the
    # `on:` key gets coerced to True (boolean) by safe_load when YAML 1.1
    # legacy boolean parsing kicks in. The simpler workaround is to load
@@ -874,7 +874,7 @@ def merge_with_existing(new_full: str, existing_path: Path) -> str:
    (including the timestamp line) is fully owned by the generator."""
    if not existing_path.exists():
        return new_full
-    existing = existing_path.read_text()
+    existing = existing_path.read_text(encoding="utf-8")
    if BEGIN_MARKER not in existing or END_MARKER not in existing:
        return new_full
    if existing.index(BEGIN_MARKER) > existing.index(END_MARKER):
@@ -1023,7 +1023,7 @@ def cmd_generate(output: Path, verbose: bool) -> int:

    output.parent.mkdir(parents=True, exist_ok=True)
    final = merge_with_existing(full, output)
-    output.write_text(final)
+    output.write_text(final, encoding="utf-8")

    counts = {k: 0 for k, _ in GROUP_ORDER}
    for wf in workflows.values():
@@ -1045,7 +1045,7 @@ def cmd_check_structure(output: Path) -> int:
            file=sys.stderr,
        )
        return 1
-    text = output.read_text()
+    text = output.read_text(encoding="utf-8")
    missing = []
    for p in sorted(WORKFLOWS_DIR.glob("*.yml")):
        if f"`{p.name}`" not in text:
--- a/scripts/pre_commit/check_datetime_timezone.py
+++ b/scripts/pre_commit/check_datetime_timezone.py
@@ -61,7 +61,7 @@ def check_datetime_columns(file_path: Path) -> List[Tuple[int, str, str]]:
    violations = []

    try:
-        with open(file_path, "r") as f:
+        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()
            lines = content.split("\n")
    except Exception as e: