mirror of
https://github.com/LearningCircuit/local-deep-research.git
synced 2026-06-15 19:46:56 +03:00
fix(encoding): add encoding="utf-8" to bare open() / read_text / write_text in examples and scripts (#4118)
Cleanup follow-up to #3797. The check-open-encoding hook was originally scoped with exclude: ^(tests/|examples/|scripts/) because those directories had ~45 pre-existing bare open() calls and addressing them was out of scope for the core Windows bug fix. This commit: * adds encoding="utf-8" to 45 read/write call sites under examples/ and scripts/ — JSON benchmark results, config-doc generators, workflow status pages, and the datetime-timezone pre-commit hook * narrows the hook exclude to ^tests/ only, so future regressions in examples/scripts/ are blocked at commit time Windows users running the benchmark scripts and config-doc generator would previously hit silent failures or UnicodeDecodeErrors on non-ASCII content under cp1252. The package itself was already protected by #3797.
This commit is contained in:
@@ -172,7 +172,7 @@ def get_env_only_settings(
|
||||
category = _category_from_filename(filepath.name)
|
||||
|
||||
try:
|
||||
content = filepath.read_text()
|
||||
content = filepath.read_text(encoding="utf-8")
|
||||
tree = ast.parse(content)
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not parse {filepath}: {e}")
|
||||
@@ -221,7 +221,7 @@ def generate_docs_content(root_dir: Optional[Path] = None) -> str:
|
||||
# Recursively find all JSON files
|
||||
for json_file in sorted(defaults_dir.rglob("*.json")):
|
||||
try:
|
||||
with open(json_file, "r") as f:
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
settings.update(data)
|
||||
except Exception as e:
|
||||
@@ -344,7 +344,7 @@ def generate_docs(
|
||||
"Run 'python scripts/generate_config_docs.py' to generate it."
|
||||
)
|
||||
return 1
|
||||
existing = output_file.read_text()
|
||||
existing = output_file.read_text(encoding="utf-8")
|
||||
if existing == new_content:
|
||||
print("OK: Configuration docs are up to date.")
|
||||
return 0
|
||||
@@ -355,7 +355,7 @@ def generate_docs(
|
||||
return 1
|
||||
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_file.write_text(new_content)
|
||||
output_file.write_text(new_content, encoding="utf-8")
|
||||
print(f"Wrote {output_file}")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -172,7 +172,7 @@ def parse_workflow(path: Path) -> dict[str, Any]:
|
||||
],
|
||||
}
|
||||
"""
|
||||
text = path.read_text()
|
||||
text = path.read_text(encoding="utf-8")
|
||||
# Use yaml.safe_load on the file but with FullLoader behavior: the
|
||||
# `on:` key gets coerced to True (boolean) by safe_load when YAML 1.1
|
||||
# legacy boolean parsing kicks in. The simpler workaround is to load
|
||||
@@ -874,7 +874,7 @@ def merge_with_existing(new_full: str, existing_path: Path) -> str:
|
||||
(including the timestamp line) is fully owned by the generator."""
|
||||
if not existing_path.exists():
|
||||
return new_full
|
||||
existing = existing_path.read_text()
|
||||
existing = existing_path.read_text(encoding="utf-8")
|
||||
if BEGIN_MARKER not in existing or END_MARKER not in existing:
|
||||
return new_full
|
||||
if existing.index(BEGIN_MARKER) > existing.index(END_MARKER):
|
||||
@@ -1023,7 +1023,7 @@ def cmd_generate(output: Path, verbose: bool) -> int:
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
final = merge_with_existing(full, output)
|
||||
output.write_text(final)
|
||||
output.write_text(final, encoding="utf-8")
|
||||
|
||||
counts = {k: 0 for k, _ in GROUP_ORDER}
|
||||
for wf in workflows.values():
|
||||
@@ -1045,7 +1045,7 @@ def cmd_check_structure(output: Path) -> int:
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
text = output.read_text()
|
||||
text = output.read_text(encoding="utf-8")
|
||||
missing = []
|
||||
for p in sorted(WORKFLOWS_DIR.glob("*.yml")):
|
||||
if f"`{p.name}`" not in text:
|
||||
|
||||
@@ -61,7 +61,7 @@ def check_datetime_columns(file_path: Path) -> List[Tuple[int, str, str]]:
|
||||
violations = []
|
||||
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
lines = content.split("\n")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user