diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f80ca82da..2ded2f670 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -208,7 +208,7 @@ repos: entry: .pre-commit-hooks/check-open-encoding.py language: script files: \.py$ - exclude: ^(tests/|examples/|scripts/) + exclude: ^tests/ description: "Enforce explicit encoding= on text-mode open() calls (issue #3743)" - id: check-image-pinning name: Check Docker Image Pinning diff --git a/examples/benchmarks/browsecomp/run_browsecomp_fixed_v2.py b/examples/benchmarks/browsecomp/run_browsecomp_fixed_v2.py index f150b7734..7c70a64a9 100755 --- a/examples/benchmarks/browsecomp/run_browsecomp_fixed_v2.py +++ b/examples/benchmarks/browsecomp/run_browsecomp_fixed_v2.py @@ -204,7 +204,7 @@ def run_browsecomp_evaluation( } # Write incrementally to output file - with open(output_path, "a") as f: + with open(output_path, "a", encoding="utf-8") as f: f.write(json.dumps(result) + "\n") results.append(result) @@ -228,7 +228,7 @@ def run_browsecomp_evaluation( "confidence": "0", "is_correct": False, } - with open(output_path, "a") as f: + with open(output_path, "a", encoding="utf-8") as f: f.write(json.dumps(result) + "\n") results.append(result) @@ -247,7 +247,7 @@ def run_browsecomp_evaluation( } report_path = str(Path(output_dir) / "browsecomp_summary.json") - with open(report_path, "w") as f: + with open(report_path, "w", encoding="utf-8") as f: json.dump(report, f, indent=2) print("\nEvaluation complete.") diff --git a/examples/benchmarks/claude_grading/benchmark.py b/examples/benchmarks/claude_grading/benchmark.py index 7afecafde..128691800 100755 --- a/examples/benchmarks/claude_grading/benchmark.py +++ b/examples/benchmarks/claude_grading/benchmark.py @@ -187,7 +187,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): # Save results import json - with open(Path(output_dir) / "simpleqa_results.json", "w") as f: + with open( + Path(output_dir) / "simpleqa_results.json", "w", encoding="utf-8" + ) as f: json.dump(simpleqa_results, f, indent=2) except Exception as e: print(f"Error during SimpleQA evaluation: {e}") @@ -221,7 +223,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): print(f"BrowseComp metrics: {browsecomp_results.get('metrics', {})}") # Save results - with open(Path(output_dir) / "browsecomp_results.json", "w") as f: + with open( + Path(output_dir) / "browsecomp_results.json", "w", encoding="utf-8" + ) as f: json.dump(browsecomp_results, f, indent=2) except Exception as e: print(f"Error during BrowseComp evaluation: {e}") @@ -254,7 +258,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): print(f"Composite score: {composite_results.get('score', 0):.4f}") # Save results - with open(Path(output_dir) / "composite_results.json", "w") as f: + with open( + Path(output_dir) / "composite_results.json", "w", encoding="utf-8" + ) as f: json.dump(composite_results, f, indent=2) except Exception as e: print(f"Error during composite evaluation: {e}") diff --git a/examples/benchmarks/run_browsecomp.py b/examples/benchmarks/run_browsecomp.py index adc70fb79..6c069ff18 100644 --- a/examples/benchmarks/run_browsecomp.py +++ b/examples/benchmarks/run_browsecomp.py @@ -170,7 +170,7 @@ def run_browsecomp_with_canary( results.append(result) # Write result to file - with open(results_file, "a") as f: + with open(results_file, "a", encoding="utf-8") as f: f.write(json.dumps(result) + "\n") except Exception as e: @@ -199,7 +199,7 @@ def run_browsecomp_with_canary( results.append(error_result) # Write error result to file - with open(results_file, "a") as f: + with open(results_file, "a", encoding="utf-8") as f: f.write(json.dumps(error_result) + "\n") logger.info(f"Completed processing {total_examples} examples") diff --git a/examples/benchmarks/run_resumable_parallel_benchmark.py b/examples/benchmarks/run_resumable_parallel_benchmark.py index 963bbbf8d..93030e3a1 100644 --- a/examples/benchmarks/run_resumable_parallel_benchmark.py +++ b/examples/benchmarks/run_resumable_parallel_benchmark.py @@ -49,7 +49,7 @@ def load_existing_results(results_file: str) -> Dict[str, Dict]: results = {} if Path(results_file).exists(): logger.info(f"Loading existing results from: {results_file}") - with open(results_file, "r") as f: + with open(results_file, "r", encoding="utf-8") as f: for line in f: if line.strip(): try: @@ -164,7 +164,7 @@ def run_resumable_benchmark( reused_results_count += 1 # Write to new results file - with open(results_file, "a") as f: + with open(results_file, "a", encoding="utf-8") as f: f.write(json.dumps(existing_result) + "\n") else: # Process new example @@ -215,7 +215,7 @@ def run_resumable_benchmark( new_results_count += 1 # Write to file immediately - with open(results_file, "a") as f: + with open(results_file, "a", encoding="utf-8") as f: f.write(json.dumps(result) + "\n") except Exception as e: @@ -235,7 +235,7 @@ def run_resumable_benchmark( new_results_count += 1 # Write error result - with open(results_file, "a") as f: + with open(results_file, "a", encoding="utf-8") as f: f.write(json.dumps(error_result) + "\n") logger.info( @@ -567,7 +567,9 @@ def main(): } with open( - Path(output_dir) / "parallel_benchmark_summary.json", "w" + Path(output_dir) / "parallel_benchmark_summary.json", + "w", + encoding="utf-8", ) as f: json.dump(summary, f, indent=2) diff --git a/examples/benchmarks/scripts/run_benchmark_with_claude_grading.py b/examples/benchmarks/scripts/run_benchmark_with_claude_grading.py index 0b2408166..4ea06a770 100755 --- a/examples/benchmarks/scripts/run_benchmark_with_claude_grading.py +++ b/examples/benchmarks/scripts/run_benchmark_with_claude_grading.py @@ -186,7 +186,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): # Save results import json - with open(Path(output_dir) / "simpleqa_results.json", "w") as f: + with open( + Path(output_dir) / "simpleqa_results.json", "w", encoding="utf-8" + ) as f: json.dump(simpleqa_results, f, indent=2) except Exception as e: print(f"Error during SimpleQA evaluation: {e}") @@ -220,7 +222,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): print(f"BrowseComp metrics: {browsecomp_results.get('metrics', {})}") # Save results - with open(Path(output_dir) / "browsecomp_results.json", "w") as f: + with open( + Path(output_dir) / "browsecomp_results.json", "w", encoding="utf-8" + ) as f: json.dump(browsecomp_results, f, indent=2) except Exception as e: print(f"Error during BrowseComp evaluation: {e}") @@ -253,7 +257,9 @@ def run_benchmark(strategy="source_based", iterations=1, examples=5): print(f"Composite score: {composite_results.get('score', 0):.4f}") # Save results - with open(Path(output_dir) / "composite_results.json", "w") as f: + with open( + Path(output_dir) / "composite_results.json", "w", encoding="utf-8" + ) as f: json.dump(composite_results, f, indent=2) except Exception as e: print(f"Error during composite evaluation: {e}") diff --git a/examples/benchmarks/scripts/run_focused_benchmark_fixed.py b/examples/benchmarks/scripts/run_focused_benchmark_fixed.py index 2c9fca8c8..3444e4856 100755 --- a/examples/benchmarks/scripts/run_focused_benchmark_fixed.py +++ b/examples/benchmarks/scripts/run_focused_benchmark_fixed.py @@ -181,7 +181,9 @@ def run_direct_evaluation(strategy="source_based", iterations=1, examples=5): # Save results import json - with open(Path(output_dir) / "simpleqa_results.json", "w") as f: + with open( + Path(output_dir) / "simpleqa_results.json", "w", encoding="utf-8" + ) as f: json.dump(simpleqa_results, f, indent=2) except Exception as e: print(f"Error during SimpleQA evaluation: {e}") @@ -215,7 +217,9 @@ def run_direct_evaluation(strategy="source_based", iterations=1, examples=5): print(f"BrowseComp metrics: {browsecomp_results.get('metrics', {})}") # Save results - with open(Path(output_dir) / "browsecomp_results.json", "w") as f: + with open( + Path(output_dir) / "browsecomp_results.json", "w", encoding="utf-8" + ) as f: json.dump(browsecomp_results, f, indent=2) except Exception as e: print(f"Error during BrowseComp evaluation: {e}") @@ -248,7 +252,9 @@ def run_direct_evaluation(strategy="source_based", iterations=1, examples=5): print(f"Composite score: {composite_results.get('score', 0):.4f}") # Save results - with open(Path(output_dir) / "composite_results.json", "w") as f: + with open( + Path(output_dir) / "composite_results.json", "w", encoding="utf-8" + ) as f: json.dump(composite_results, f, indent=2) except Exception as e: print(f"Error during composite evaluation: {e}") diff --git a/examples/benchmarks/scripts/run_grader_only.py b/examples/benchmarks/scripts/run_grader_only.py index 0ce745535..c325537b7 100644 --- a/examples/benchmarks/scripts/run_grader_only.py +++ b/examples/benchmarks/scripts/run_grader_only.py @@ -201,7 +201,7 @@ def generate_summary(evaluation_path, output_dir=None): # Load evaluation results evaluation_results = [] - with open(evaluation_path, "r") as f: + with open(evaluation_path, "r", encoding="utf-8") as f: for line in f: if line.strip(): evaluation_results.append(json.loads(line)) diff --git a/examples/optimization/browsecomp_optimization.py b/examples/optimization/browsecomp_optimization.py index 4ded57581..96a1a958b 100755 --- a/examples/optimization/browsecomp_optimization.py +++ b/examples/optimization/browsecomp_optimization.py @@ -92,7 +92,9 @@ def main(): } with open( - Path(output_dir) / "browsecomp_optimization_summary.json", "w" + Path(output_dir) / "browsecomp_optimization_summary.json", + "w", + encoding="utf-8", ) as f: json.dump(summary, f, indent=2) diff --git a/examples/optimization/example_optimization.py b/examples/optimization/example_optimization.py index de6781e1c..8217b890c 100644 --- a/examples/optimization/example_optimization.py +++ b/examples/optimization/example_optimization.py @@ -82,7 +82,9 @@ def main(): "demo": {"parameters": balanced_params, "score": balanced_score}, } - with open(Path(output_dir) / "optimization_summary.json", "w") as f: + with open( + Path(output_dir) / "optimization_summary.json", "w", encoding="utf-8" + ) as f: json.dump(summary, f, indent=2) print(f"\nDemo complete! Results saved to {output_dir}") diff --git a/examples/optimization/example_quick_optimization.py b/examples/optimization/example_quick_optimization.py index a80cb6d90..455ff5514 100644 --- a/examples/optimization/example_quick_optimization.py +++ b/examples/optimization/example_quick_optimization.py @@ -257,7 +257,9 @@ def main(): }, } - with open(Path(output_dir) / "optimization_summary.json", "w") as f: + with open( + Path(output_dir) / "optimization_summary.json", "w", encoding="utf-8" + ) as f: json.dump(summary, f, indent=2) print( diff --git a/examples/optimization/gemini_optimization.py b/examples/optimization/gemini_optimization.py index 3d85ffb5b..889b7443d 100644 --- a/examples/optimization/gemini_optimization.py +++ b/examples/optimization/gemini_optimization.py @@ -195,7 +195,9 @@ def main(): } with open( - Path(output_dir) / "gemini_optimization_summary.json", "w" + Path(output_dir) / "gemini_optimization_summary.json", + "w", + encoding="utf-8", ) as f: json.dump(summary, f, indent=2) diff --git a/examples/optimization/llm_multi_benchmark.py b/examples/optimization/llm_multi_benchmark.py index 8cdcc5c35..b59c0422b 100644 --- a/examples/optimization/llm_multi_benchmark.py +++ b/examples/optimization/llm_multi_benchmark.py @@ -216,7 +216,11 @@ def main(): # Save results to file import json - with open(Path(output_dir) / "multi_benchmark_results.json", "w") as f: + with open( + Path(output_dir) / "multi_benchmark_results.json", + "w", + encoding="utf-8", + ) as f: json.dump( { "timestamp": timestamp, diff --git a/examples/optimization/multi_benchmark_simulation.py b/examples/optimization/multi_benchmark_simulation.py index d692f95bf..494c4764a 100644 --- a/examples/optimization/multi_benchmark_simulation.py +++ b/examples/optimization/multi_benchmark_simulation.py @@ -388,7 +388,7 @@ def main(): } results_file = str(Path(output_dir) / "multi_benchmark_results.json") - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: # Convert all values to serializable types json.dump( results, diff --git a/examples/optimization/run_gemini_benchmark.py b/examples/optimization/run_gemini_benchmark.py index 2024058c9..1a6261583 100755 --- a/examples/optimization/run_gemini_benchmark.py +++ b/examples/optimization/run_gemini_benchmark.py @@ -217,7 +217,7 @@ def run_benchmarks( try: import json - with open(summary_file, "w") as f: + with open(summary_file, "w", encoding="utf-8") as f: json.dump( { "timestamp": timestamp, diff --git a/examples/optimization/run_optimization.py b/examples/optimization/run_optimization.py index c7dd87a00..3b8c69745 100644 --- a/examples/optimization/run_optimization.py +++ b/examples/optimization/run_optimization.py @@ -186,7 +186,9 @@ def main(): "custom_weights": custom_weights, } - with open(Path(output_dir) / "optimization_summary.json", "w") as f: + with open( + Path(output_dir) / "optimization_summary.json", "w", encoding="utf-8" + ) as f: json.dump(summary, f, indent=2) return 0 diff --git a/examples/optimization/run_parallel_benchmark.py b/examples/optimization/run_parallel_benchmark.py index df7e903e3..2335ec2a7 100755 --- a/examples/optimization/run_parallel_benchmark.py +++ b/examples/optimization/run_parallel_benchmark.py @@ -284,7 +284,9 @@ def main(): } with open( - Path(output_dir) / "parallel_benchmark_summary.json", "w" + Path(output_dir) / "parallel_benchmark_summary.json", + "w", + encoding="utf-8", ) as f: json.dump(summary, f, indent=2) diff --git a/examples/optimization/strategy_benchmark_plan.py b/examples/optimization/strategy_benchmark_plan.py index 332d43efa..db8f36b0a 100755 --- a/examples/optimization/strategy_benchmark_plan.py +++ b/examples/optimization/strategy_benchmark_plan.py @@ -161,7 +161,9 @@ def run_strategy_comparison(): logger.info(f"Best quality score: {best_quality_score}") logger.info(f"Duration: {quality_end - quality_start} seconds") - with open(Path(quality_output_dir) / "results.json", "w") as f: + with open( + Path(quality_output_dir) / "results.json", "w", encoding="utf-8" + ) as f: json.dump(quality_result, f, indent=2) # ====== EXPERIMENT 2: Speed-focused optimization ====== @@ -203,7 +205,9 @@ def run_strategy_comparison(): logger.info(f"Best speed score: {best_speed_score}") logger.info(f"Duration: {speed_end - speed_start} seconds") - with open(Path(speed_output_dir) / "results.json", "w") as f: + with open( + Path(speed_output_dir) / "results.json", "w", encoding="utf-8" + ) as f: json.dump(speed_result, f, indent=2) # ====== EXPERIMENT 3: Balanced optimization ====== @@ -245,7 +249,9 @@ def run_strategy_comparison(): logger.info(f"Best balanced score: {best_balanced_score}") logger.info(f"Duration: {balanced_end - balanced_start} seconds") - with open(Path(balanced_output_dir) / "results.json", "w") as f: + with open( + Path(balanced_output_dir) / "results.json", "w", encoding="utf-8" + ) as f: json.dump(balanced_result, f, indent=2) # ====== EXPERIMENT 4: Multi-Benchmark (SimpleQA + BrowseComp) ====== @@ -288,7 +294,9 @@ def run_strategy_comparison(): logger.info(f"Best multi-benchmark score: {best_multi_score}") logger.info(f"Duration: {multi_end - multi_start} seconds") - with open(Path(multi_output_dir) / "results.json", "w") as f: + with open( + Path(multi_output_dir) / "results.json", "w", encoding="utf-8" + ) as f: json.dump(multi_result, f, indent=2) # ====== Save summary of all executions ====== @@ -297,7 +305,9 @@ def run_strategy_comparison(): ) execution_stats["timestamp"] = timestamp - with open(Path(base_output_dir) / "summary.json", "w") as f: + with open( + Path(base_output_dir) / "summary.json", "w", encoding="utf-8" + ) as f: json.dump(execution_stats, f, indent=2) # Generate summary report @@ -358,7 +368,7 @@ significant sample size of 500 examples per experiment. """ # Write summary to file - with open(Path(base_dir) / "summary_report.md", "w") as f: + with open(Path(base_dir) / "summary_report.md", "w", encoding="utf-8") as f: f.write(summary_text) @@ -448,7 +458,9 @@ def run_strategy_simulation(num_examples=10): "best_score": best_score, } - with open(Path(sim_output_dir) / "simulation_results.json", "w") as f: + with open( + Path(sim_output_dir) / "simulation_results.json", "w", encoding="utf-8" + ) as f: json.dump(sim_result, f, indent=2) return sim_result diff --git a/scripts/generate_config_docs.py b/scripts/generate_config_docs.py index 5fe7c8f78..d2f588ba8 100755 --- a/scripts/generate_config_docs.py +++ b/scripts/generate_config_docs.py @@ -172,7 +172,7 @@ def get_env_only_settings( category = _category_from_filename(filepath.name) try: - content = filepath.read_text() + content = filepath.read_text(encoding="utf-8") tree = ast.parse(content) except Exception as e: print(f"Warning: Could not parse {filepath}: {e}") @@ -221,7 +221,7 @@ def generate_docs_content(root_dir: Optional[Path] = None) -> str: # Recursively find all JSON files for json_file in sorted(defaults_dir.rglob("*.json")): try: - with open(json_file, "r") as f: + with open(json_file, "r", encoding="utf-8") as f: data = json.load(f) settings.update(data) except Exception as e: @@ -344,7 +344,7 @@ def generate_docs( "Run 'python scripts/generate_config_docs.py' to generate it." ) return 1 - existing = output_file.read_text() + existing = output_file.read_text(encoding="utf-8") if existing == new_content: print("OK: Configuration docs are up to date.") return 0 @@ -355,7 +355,7 @@ def generate_docs( return 1 output_file.parent.mkdir(parents=True, exist_ok=True) - output_file.write_text(new_content) + output_file.write_text(new_content, encoding="utf-8") print(f"Wrote {output_file}") return 0 diff --git a/scripts/generate_workflow_status.py b/scripts/generate_workflow_status.py index 73cb56682..d93d36365 100644 --- a/scripts/generate_workflow_status.py +++ b/scripts/generate_workflow_status.py @@ -172,7 +172,7 @@ def parse_workflow(path: Path) -> dict[str, Any]: ], } """ - text = path.read_text() + text = path.read_text(encoding="utf-8") # Use yaml.safe_load on the file but with FullLoader behavior: the # `on:` key gets coerced to True (boolean) by safe_load when YAML 1.1 # legacy boolean parsing kicks in. The simpler workaround is to load @@ -874,7 +874,7 @@ def merge_with_existing(new_full: str, existing_path: Path) -> str: (including the timestamp line) is fully owned by the generator.""" if not existing_path.exists(): return new_full - existing = existing_path.read_text() + existing = existing_path.read_text(encoding="utf-8") if BEGIN_MARKER not in existing or END_MARKER not in existing: return new_full if existing.index(BEGIN_MARKER) > existing.index(END_MARKER): @@ -1023,7 +1023,7 @@ def cmd_generate(output: Path, verbose: bool) -> int: output.parent.mkdir(parents=True, exist_ok=True) final = merge_with_existing(full, output) - output.write_text(final) + output.write_text(final, encoding="utf-8") counts = {k: 0 for k, _ in GROUP_ORDER} for wf in workflows.values(): @@ -1045,7 +1045,7 @@ def cmd_check_structure(output: Path) -> int: file=sys.stderr, ) return 1 - text = output.read_text() + text = output.read_text(encoding="utf-8") missing = [] for p in sorted(WORKFLOWS_DIR.glob("*.yml")): if f"`{p.name}`" not in text: diff --git a/scripts/pre_commit/check_datetime_timezone.py b/scripts/pre_commit/check_datetime_timezone.py index dd9aa72ae..1cc20a4c6 100755 --- a/scripts/pre_commit/check_datetime_timezone.py +++ b/scripts/pre_commit/check_datetime_timezone.py @@ -61,7 +61,7 @@ def check_datetime_columns(file_path: Path) -> List[Tuple[int, str, str]]: violations = [] try: - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read() lines = content.split("\n") except Exception as e: