diff --git a/README.md b/README.md index e109b6a0e..2eeff6f50 100644 --- a/README.md +++ b/README.md @@ -204,12 +204,18 @@ Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt We will only track usage details that help us make product and roadmap decisions, specifically: - Type of your installation (Docker or Desktop) -- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use. -- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider. -- Type of LLM in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider. -- Chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the event is sent - we have no information on the nature or content of the chat itself. -You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. No IP or other identifying information is collected. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service. +- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use. + +- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider. + +- Type of LLM provider & model tag in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider or model, or combination thereof. eg: reasoning vs regular, multi-modal models, etc. + +- When a chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the **event** is sent - we have no information on the nature or content of the chat itself. + +You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. **No IP or other identifying information is collected**. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service. + +We take privacy very seriously and hopefully you can understand our position to also glimpse into how our tool is used with asking for annoying popup surveys so we can build something worth using. The anonymous data is _never_ shared with third parties, ever. [View all telemetry events in source code](https://github.com/search?q=repo%3AMintplex-Labs%2Fanything-llm%20.sendTelemetry\(&type=code) diff --git a/server/endpoints/admin.js b/server/endpoints/admin.js index 72a5b0796..211f50465 100644 --- a/server/endpoints/admin.js +++ b/server/endpoints/admin.js @@ -513,8 +513,6 @@ function adminEndpoints(app) { try { const user = await userFromSession(request, response); const { apiKey, error } = await ApiKey.create(user.id); - - await Telemetry.sendTelemetry("api_key_created"); await EventLogs.logEvent( "api_key_created", { createdBy: user?.username }, diff --git a/server/endpoints/api/openai/index.js b/server/endpoints/api/openai/index.js index e5bd1771b..e3c70171c 100644 --- a/server/endpoints/api/openai/index.js +++ b/server/endpoints/api/openai/index.js @@ -12,6 +12,7 @@ const { EventLogs } = require("../../../models/eventLogs"); const { OpenAICompatibleChat, } = require("../../../utils/chats/openaiCompatible"); +const { getModelTag } = require("../../utils"); function apiOpenAICompatibleEndpoints(app) { if (!app) return; @@ -181,6 +182,7 @@ function apiOpenAICompatibleEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 82cc8b5a8..c8e435f6f 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -14,6 +14,7 @@ const { writeResponseChunk, } = require("../../../utils/helpers/chat/responses"); const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); +const { getModelTag } = require("../../utils"); function apiWorkspaceEndpoints(app) { if (!app) return; @@ -87,6 +88,7 @@ function apiWorkspaceEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent("api_workspace_created", { workspaceName: workspace?.name || "Unknown Workspace", diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index 3ce65df97..9a85b346a 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -13,6 +13,7 @@ const { const { WorkspaceChats } = require("../../../models/workspaceChats"); const { User } = require("../../../models/user"); const { ApiChatHandler } = require("../../../utils/chats/apiChatHandler"); +const { getModelTag } = require("../../utils"); function apiWorkspaceThreadEndpoints(app) { if (!app) return; @@ -436,6 +437,7 @@ function apiWorkspaceThreadEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, @@ -606,6 +608,7 @@ function apiWorkspaceThreadEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js index 7e8a72b61..c8770857e 100644 --- a/server/endpoints/chat.js +++ b/server/endpoints/chat.js @@ -16,6 +16,7 @@ const { writeResponseChunk } = require("../utils/helpers/chat/responses"); const { WorkspaceThread } = require("../models/workspaceThread"); const { User } = require("../models/user"); const truncate = require("truncate"); +const { getModelTag } = require("./utils"); function chatEndpoints(app) { if (!app) return; @@ -75,6 +76,7 @@ function chatEndpoints(app) { VectorDbSelection: process.env.VECTOR_DB || "lancedb", multiModal: Array.isArray(attachments) && attachments?.length !== 0, TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent( @@ -179,6 +181,7 @@ function chatEndpoints(app) { VectorDbSelection: process.env.VECTOR_DB || "lancedb", multiModal: Array.isArray(attachments) && attachments?.length !== 0, TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent( diff --git a/server/endpoints/system.js b/server/endpoints/system.js index 05c0cf7c9..4077844e7 100644 --- a/server/endpoints/system.js +++ b/server/endpoints/system.js @@ -908,7 +908,6 @@ function systemEndpoints(app) { } const { apiKey, error } = await ApiKey.create(); - await Telemetry.sendTelemetry("api_key_created"); await EventLogs.logEvent( "api_key_created", {}, diff --git a/server/endpoints/utils.js b/server/endpoints/utils.js index 8b08eb2be..399d8cf5f 100644 --- a/server/endpoints/utils.js +++ b/server/endpoints/utils.js @@ -55,7 +55,101 @@ async function getDiskStorage() { } } +/** + * Returns the model tag based on the provider set in the environment. + * This information is used to identify the parent model for the system + * so that we can prioritize the correct model and types for future updates + * as well as build features in AnythingLLM directly for a specific model or capabilities. + * + * Disable with {@link https://github.com/Mintplex-Labs/anything-llm?tab=readme-ov-file#telemetry--privacy|Disable Telemetry} + * @returns {string} The model tag. + */ +function getModelTag() { + let model = null; + const provider = process.env.LLM_PROVIDER; + + switch (provider) { + case "openai": + model = process.env.OPEN_MODEL_PREF; + break; + case "anthropic": + model = process.env.ANTHROPIC_MODEL_PREF; + break; + case "lmstudio": + model = process.env.LMSTUDIO_MODEL_PREF; + break; + case "ollama": + model = process.env.OLLAMA_MODEL_PREF; + break; + case "groq": + model = process.env.GROQ_MODEL_PREF; + break; + case "togetherai": + model = process.env.TOGETHER_AI_MODEL_PREF; + break; + case "azure": + model = process.env.OPEN_MODEL_PREF; + break; + case "koboldcpp": + model = process.env.KOBOLD_CPP_MODEL_PREF; + break; + case "localai": + model = process.env.LOCAL_AI_MODEL_PREF; + break; + case "openrouter": + model = process.env.OPENROUTER_MODEL_PREF; + break; + case "mistral": + model = process.env.MISTRAL_MODEL_PREF; + break; + case "generic-openai": + model = process.env.GENERIC_OPEN_AI_MODEL_PREF; + break; + case "perplexity": + model = process.env.PERPLEXITY_MODEL_PREF; + break; + case "textgenwebui": + model = "textgenwebui-default"; + break; + case "bedrock": + model = process.env.AWS_BEDROCK_LLM_MODEL_PREFERENCE; + break; + case "fireworksai": + model = process.env.FIREWORKS_AI_LLM_MODEL_PREF; + break; + case "deepseek": + model = process.env.DEEPSEEK_MODEL_PREF; + break; + case "litellm": + model = process.env.LITE_LLM_MODEL_PREF; + break; + case "apipie": + model = process.env.APIPIE_LLM_MODEL_PREF; + break; + case "xai": + model = process.env.XAI_LLM_MODEL_PREF; + break; + case "novita": + model = process.env.NOVITA_LLM_MODEL_PREF; + break; + case "nvidia-nim": + model = process.env.NVIDIA_NIM_LLM_MODEL_PREF; + break; + case "ppio": + model = process.env.PPIO_MODEL_PREF; + break; + case "gemini": + model = process.env.GEMINI_LLM_MODEL_PREF; + break; + default: + model = "--"; + break; + } + return model; +} + module.exports = { utilEndpoints, getGitVersion, + getModelTag, }; diff --git a/server/endpoints/workspaceThreads.js b/server/endpoints/workspaceThreads.js index 426503963..a34616cfa 100644 --- a/server/endpoints/workspaceThreads.js +++ b/server/endpoints/workspaceThreads.js @@ -18,6 +18,7 @@ const { } = require("../utils/middleware/validWorkspace"); const { WorkspaceChats } = require("../models/workspaceChats"); const { convertToChatHistory } = require("../utils/helpers/chat/responses"); +const { getModelTag } = require("./utils"); function workspaceThreadEndpoints(app) { if (!app) return; @@ -41,6 +42,7 @@ function workspaceThreadEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }, user?.id ); diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index 95fe61b49..c01f09cfb 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -34,6 +34,7 @@ const { getTTSProvider } = require("../utils/TextToSpeech"); const { WorkspaceThread } = require("../models/workspaceThread"); const truncate = require("truncate"); const { purgeDocument } = require("../utils/files/purgeDocument"); +const { getModelTag } = require("./utils"); function workspaceEndpoints(app) { if (!app) return; @@ -56,6 +57,7 @@ function workspaceEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }, user?.id ); diff --git a/server/models/documents.js b/server/models/documents.js index e937eb2b1..a28331153 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -4,6 +4,7 @@ const prisma = require("../utils/prisma"); const { Telemetry } = require("./telemetry"); const { EventLogs } = require("./eventLogs"); const { safeJsonParse } = require("../utils/http"); +const { getModelTag } = require("../endpoints/utils"); const Document = { writable: ["pinned", "watched", "lastUpdatedAt"], @@ -130,6 +131,7 @@ const Document = { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", TTSSelection: process.env.TTS_PROVIDER || "native", + LLMModel: getModelTag(), }); await EventLogs.logEvent( "workspace_documents_added", @@ -169,12 +171,6 @@ const Document = { } } - await Telemetry.sendTelemetry("documents_removed_in_workspace", { - LLMSelection: process.env.LLM_PROVIDER || "openai", - Embedder: process.env.EMBEDDING_ENGINE || "inherit", - VectorDbSelection: process.env.VECTOR_DB || "lancedb", - TTSSelection: process.env.TTS_PROVIDER || "native", - }); await EventLogs.logEvent( "workspace_documents_removed", { diff --git a/server/models/telemetry.js b/server/models/telemetry.js index 98a9b0fef..4c2e35769 100644 --- a/server/models/telemetry.js +++ b/server/models/telemetry.js @@ -21,6 +21,12 @@ const Telemetry = { agent_chat_sent: 1800, agent_chat_started: 1800, agent_tool_call: 1800, + + // Document mgmt events + document_uploaded: 30, + documents_embedded_in_workspace: 30, + link_uploaded: 30, + raw_document_uploaded: 30, }, id: async function () { diff --git a/server/utils/database/index.js b/server/utils/database/index.js index 75f5f7116..0fa2e1d02 100644 --- a/server/utils/database/index.js +++ b/server/utils/database/index.js @@ -83,7 +83,7 @@ async function validateTablePragmas(force = false) { // Telemetry is anonymized and your data is never read. This can be disabled by setting // DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use // of how AnythingLLM is used and how to improve this product! -// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim. +// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim. async function setupTelemetry() { if (process.env.DISABLE_TELEMETRY === "true") { console.log( diff --git a/server/utils/telemetry/index.js b/server/utils/telemetry/index.js index 1945de3fe..feb7d5c43 100644 --- a/server/utils/telemetry/index.js +++ b/server/utils/telemetry/index.js @@ -4,7 +4,7 @@ const { Telemetry } = require("../../models/telemetry"); // Telemetry is anonymized and your data is never read. This can be disabled by setting // DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use // of how AnythingLLM is used and how to improve this product! -// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim. +// You can see all Telemetry events by ctrl+f `Telemetry.sendTelemetry` calls to verify this claim. async function setupTelemetry() { if (process.env.DISABLE_TELEMETRY === "true") { console.log(