fix(encoding): add encoding="utf-8" to bare open() / read_text / write_text in examples and scripts (#4118)

Cleanup follow-up to #3797. The check-open-encoding hook was originally scoped
with exclude: ^(tests/|examples/|scripts/) because those directories had ~45
pre-existing bare open() calls and addressing them was out of scope for the
core Windows bug fix.

This commit:
  * adds encoding="utf-8" to 45 read/write call sites under examples/ and
    scripts/ — JSON benchmark results, config-doc generators, workflow
    status pages, and the datetime-timezone pre-commit hook
  * narrows the hook exclude to ^tests/ only, so future regressions in
    examples/scripts/ are blocked at commit time

Windows users running the benchmark scripts and config-doc generator would
previously hit silent failures or UnicodeDecodeErrors on non-ASCII content
under cp1252. The package itself was already protected by #3797.
This commit is contained in:
LearningCircuit
2026-05-18 21:45:04 +02:00
committed by GitHub
parent b9cdfa6db7
commit 653707a556
21 changed files with 94 additions and 46 deletions

View File

@@ -172,7 +172,7 @@ def get_env_only_settings(
category = _category_from_filename(filepath.name)
try:
content = filepath.read_text()
content = filepath.read_text(encoding="utf-8")
tree = ast.parse(content)
except Exception as e:
print(f"Warning: Could not parse {filepath}: {e}")
@@ -221,7 +221,7 @@ def generate_docs_content(root_dir: Optional[Path] = None) -> str:
# Recursively find all JSON files
for json_file in sorted(defaults_dir.rglob("*.json")):
try:
with open(json_file, "r") as f:
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
settings.update(data)
except Exception as e:
@@ -344,7 +344,7 @@ def generate_docs(
"Run 'python scripts/generate_config_docs.py' to generate it."
)
return 1
existing = output_file.read_text()
existing = output_file.read_text(encoding="utf-8")
if existing == new_content:
print("OK: Configuration docs are up to date.")
return 0
@@ -355,7 +355,7 @@ def generate_docs(
return 1
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(new_content)
output_file.write_text(new_content, encoding="utf-8")
print(f"Wrote {output_file}")
return 0

View File

@@ -172,7 +172,7 @@ def parse_workflow(path: Path) -> dict[str, Any]:
],
}
"""
text = path.read_text()
text = path.read_text(encoding="utf-8")
# Use yaml.safe_load on the file but with FullLoader behavior: the
# `on:` key gets coerced to True (boolean) by safe_load when YAML 1.1
# legacy boolean parsing kicks in. The simpler workaround is to load
@@ -874,7 +874,7 @@ def merge_with_existing(new_full: str, existing_path: Path) -> str:
(including the timestamp line) is fully owned by the generator."""
if not existing_path.exists():
return new_full
existing = existing_path.read_text()
existing = existing_path.read_text(encoding="utf-8")
if BEGIN_MARKER not in existing or END_MARKER not in existing:
return new_full
if existing.index(BEGIN_MARKER) > existing.index(END_MARKER):
@@ -1023,7 +1023,7 @@ def cmd_generate(output: Path, verbose: bool) -> int:
output.parent.mkdir(parents=True, exist_ok=True)
final = merge_with_existing(full, output)
output.write_text(final)
output.write_text(final, encoding="utf-8")
counts = {k: 0 for k, _ in GROUP_ORDER}
for wf in workflows.values():
@@ -1045,7 +1045,7 @@ def cmd_check_structure(output: Path) -> int:
file=sys.stderr,
)
return 1
text = output.read_text()
text = output.read_text(encoding="utf-8")
missing = []
for p in sorted(WORKFLOWS_DIR.glob("*.yml")):
if f"`{p.name}`" not in text:

View File

@@ -61,7 +61,7 @@ def check_datetime_columns(file_path: Path) -> List[Tuple[int, str, str]]:
violations = []
try:
with open(file_path, "r") as f:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
lines = content.split("\n")
except Exception as e: