fix(settings): persist Embeddings page changes and unblock OpenAI test (#4208) (#4212)

Two bugs reported in #4208 had the same shape: a value silently ended up somewhere readers don't look. 1. New `local_search_*` settings saved via PUT /settings/api/<key> were wrapped in `AppSetting`, whose Pydantic validator rewrites every non-`app.`-prefixed key as `app.<key>`. The PUT returned 201 success and the row was created — at `app.local_search_embedding_model` — while every reader queries `local_search_embedding_model` and falls back to the default. Same path affected all nine `local_search_*` keys on the Embeddings page. Fix: in `create_or_update_setting`, only use `AppSetting` for keys that already start with `app.`; route everything else through `BaseSetting` so the key is written verbatim. 2. The OpenAI "Test Embedding Model" button raised `NoSettingsContextError` for `embeddings.openai.dimensions`, which defaults to JSON null. `get_setting_from_snapshot` used None as both "key absent" and a legitimate stored value; with no thread-local context (Flask request thread) and default=None, it raised instead of returning None. The error was caught and surfaced as a misleading "try a dedicated embedding model" message. The same path is hit during real RAG indexing, not just the Test button. Fix: use a proper `_NOT_FOUND` sentinel so found-but-None values are returned. Users hit by bug 1 on prior builds carry an orphan `app.local_search_*` row that nothing reads — harmless, but their previously-chosen model will display as the default until they re-save it once. Reports of "the same model is being downloaded every time" are not addressed here: no LDR code calls Ollama's /api/pull, so the cause is either model-load-into-memory log lines being read as downloads, or a docker volume not persisting Ollama's model cache. Confirming with the reporter before any code change.
2026-06-15 19:46:56 +03:00 · 2026-05-22 21:23:40 +02:00
parent e07171c950
commit a6bc6c4f5e
5 changed files with 103 additions and 14 deletions
--- a/src/local_deep_research/config/thread_settings.py
+++ b/src/local_deep_research/config/thread_settings.py
@@ -20,6 +20,13 @@ class NoSettingsContextError(Exception):
 # Shared thread-local storage for settings context
 _thread_local = threading.local()

+# Sentinel distinguishing "key absent from snapshot" from "key present with
+# value None". Using None for both collapses legitimately-stored null values
+# (e.g. embeddings.openai.dimensions, which defaults to JSON null) into the
+# "not found" path, raising NoSettingsContextError in Flask request threads
+# that have no thread-local context. See #4208.
+_NOT_FOUND = object()
+

 def set_settings_context(settings_context):
    """Set a settings context for the current thread."""
@@ -81,18 +88,21 @@ def get_setting_from_snapshot(
    Raises:
        RuntimeError: If no settings context is available
    """
-    # First check if we have settings_snapshot passed directly
-    value = None
+    # First check if we have settings_snapshot passed directly.
+    # _NOT_FOUND (not None) is the absence sentinel so a key whose stored
+    # value is None is still treated as found.  See #4208.
+    value = _NOT_FOUND
    if settings_snapshot and key in settings_snapshot:
-        value = settings_snapshot[key]
+        raw = settings_snapshot[key]
        # Handle both full format {"value": x} and simplified format (just x)
-        if isinstance(value, dict) and "value" in value:
+        if isinstance(raw, dict) and "value" in raw:
            value = get_typed_setting_value(
                key,
-                value["value"],
-                value.get("ui_element", "text"),
+                raw["value"],
+                raw.get("ui_element", "text"),
            )
-        # else: value is already the raw value from simplified snapshot
+        else:
+            value = raw
    # Search for child keys.
    elif settings_snapshot:
        for k, v in settings_snapshot.items():
@@ -104,12 +114,12 @@ def get_setting_from_snapshot(
                        k, v["value"], v.get("ui_element", "text")
                    )
                # else: v is already the raw value from simplified snapshot
-                if value is None:
+                if value is _NOT_FOUND:
                    value = {k: v}
                else:
                    value[k] = v

-    if value is not None:
+    if value is not _NOT_FOUND:
        # Extract value from dict structure if needed
        return value

--- a/src/local_deep_research/settings/manager.py
+++ b/src/local_deep_research/settings/manager.py
@@ -913,8 +913,16 @@ class SettingsManager(ISettingsManager):
                    setting_obj = SearchSetting(**setting)
                elif key.startswith("report."):
                    setting_obj = ReportSetting(**setting)
-                else:
+                elif key.startswith("app."):
                    setting_obj = AppSetting(**setting)
+                else:
+                    # Keys outside the four buckets (e.g. local_search_*,
+                    # embeddings.*, rag.*) live in their own namespaces.
+                    # Use BaseSetting so the key is written verbatim —
+                    # AppSetting's validator would otherwise prepend
+                    # `app.` and silently relocate the row away from
+                    # where every reader looks it up.  See #4208.
+                    setting_obj = BaseSetting(type=SettingType.APP, **setting)
            else:
                # Use generic BaseSetting
                setting_obj = BaseSetting(**setting)
--- a/tests/config/test_thread_settings.py
+++ b/tests/config/test_thread_settings.py
@@ -142,6 +142,35 @@ class TestGetSettingFromSnapshot:
        result = get_setting_from_snapshot("test.key", default="my_default")
        assert result == "my_default"

+    def test_returns_none_when_snapshot_value_is_none(self, clean_thread_local):
+        """Key present in snapshot with value None should return None,
+        not raise NoSettingsContextError. Regression for #4208: the
+        OpenAI embeddings test endpoint blew up here because
+        embeddings.openai.dimensions defaults to JSON null."""
+        snapshot = {
+            "embeddings.openai.dimensions": {
+                "value": None,
+                "ui_element": "number",
+            }
+        }
+        result = get_setting_from_snapshot(
+            "embeddings.openai.dimensions",
+            default=None,
+            settings_snapshot=snapshot,
+        )
+        assert result is None
+
+    def test_returns_none_when_simplified_snapshot_value_is_none(
+        self, clean_thread_local
+    ):
+        """Simplified-format snapshot (raw value, not {value: ...} dict)
+        with explicit None should also return None, not raise."""
+        snapshot = {"some.key": None}
+        result = get_setting_from_snapshot(
+            "some.key", default=None, settings_snapshot=snapshot
+        )
+        assert result is None
+

 class TestClearSettingsContext:
    """Tests for clear_settings_context function."""
--- a/tests/config/test_thread_settings_high_value.py
+++ b/tests/config/test_thread_settings_high_value.py
@@ -120,14 +120,18 @@ class TestGetSettingFromSnapshot(unittest.TestCase):
        result = get_setting_from_snapshot("key", settings_snapshot=snapshot)
        assert result == "from_snapshot"

-    def test_none_value_in_snapshot_not_treated_as_found(self):
-        # None in snapshot means key exists but value is None
+    def test_none_value_in_snapshot_is_treated_as_found(self):
+        # Regression for #4208: a key present in the snapshot with an
+        # explicit None must be returned as None, not collapsed into
+        # "not found" and replaced with the default. The previous
+        # behavior broke the OpenAI embedding test path because
+        # embeddings.openai.dimensions defaults to JSON null, then
+        # raised NoSettingsContextError when no thread context was set.
        clear_settings_context()
-        # Should fall through to default since value is None -> not truthy for `if value is not None`
        result = get_setting_from_snapshot(
            "key", default="default_val", settings_snapshot={"key": None}
        )
-        assert result == "default_val"
+        assert result is None


 class TestGetBoolSettingFromSnapshot(unittest.TestCase):
--- a/tests/test_settings_manager.py
+++ b/tests/test_settings_manager.py
@@ -340,6 +340,44 @@ def test_get_all_settings_db_error(mock_db_session, mock_logger):
    # You can check the log message content if needed


+def test_create_or_update_setting_does_not_app_prefix_local_search_keys(
+    setup_database_for_all_tests,
+):
+    """Regression for #4208: keys outside the llm./search./report./app.
+    buckets (e.g. local_search_*) must be written verbatim. Before the
+    fix, create_or_update_setting wrapped the input dict in AppSetting,
+    whose Pydantic validator silently rewrote `local_search_embedding_model`
+    to `app.local_search_embedding_model` — readers never found the value,
+    breaking every save on the Embeddings page.
+    """
+    session_local_class = setup_database_for_all_tests
+    session = session_local_class()
+
+    with session:
+        settings_manager = SettingsManager(db_session=session)
+        setting_dict = {
+            "key": "local_search_embedding_model",
+            "value": "all-mpnet-base-v2",
+            "name": "Local Search Embedding Model",
+            "description": "Test",
+        }
+        result = settings_manager.create_or_update_setting(setting_dict)
+        assert result is not None
+        # The key must be persisted as-is, not relocated to app.<key>
+        assert result.key == "local_search_embedding_model"
+
+        roundtrip = settings_manager.get_setting("local_search_embedding_model")
+        assert roundtrip == "all-mpnet-base-v2"
+
+        # And the buggy `app.local_search_*` row must not exist.
+        bad_row = (
+            session.query(Setting)
+            .filter(Setting.key == "app.local_search_embedding_model")
+            .first()
+        )
+        assert bad_row is None
+
+
 def test_get_setting_with_substring_keys(setup_database_for_all_tests):
    """
    Tests that we can get the correct value for a setting, even when its key