ProjectTech4DevAI · nishika26 · Feb 24, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 24, 2026
diff --git a/backend/app/api/docs/collections/info.md b/backend/app/api/docs/collections/info.md
@@ -1,5 +1,13 @@
-Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization,
-timestamps, and associated LLM service details (`llm_service_id` and `llm_service_name`).
+Retrieve detailed information about a specific collection by its collection id. This endpoint returns the collection object including its project, organization, timestamps, and service-specific details.
+
+**Response Fields:**
+
+**Note:** While the API schema shows both `llm_service_id`/`llm_service_name` AND `knowledge_base_id`/`knowledge_base_provider`, the actual response will only include the fields relevant to what was created:
+
+- **If an Assistant was created** (with model + instructions): The response will only include `llm_service_id` and `llm_service_name`
+- **If only a Vector Store was created** (without model/instructions): The response will only include `knowledge_base_id` and `knowledge_base_provider`
+
+**Including Documents:**
 
 If the `include_docs` flag in the request body is true then you will get a list of document IDs associated with a given collection as well. Note that, documents returned are not only stored by Kaapi, but also by Vector store provider.
 

diff --git a/backend/app/api/docs/collections/job_info.md b/backend/app/api/docs/collections/job_info.md
@@ -2,8 +2,13 @@ Retrieve information about a collection job by the collection job ID. This endpo
 
 * Fetching the collection job object, including the collection job ID, the current status, and the associated collection details.
 
-* If the job has finished, has been successful and it was a job of creation of collection then this endpoint will fetch the associated collection details from the collection table, including:
-  - `llm_service_id` and `llm_service_name`.
-  - Collection metadata such as ID, project, organization, and timestamps.
+* If the job has finished, has been successful and it was a job of creation of collection then this endpoint will fetch the associated collection details.
 
-* If the delete-collection job succeeds, the status is set to “successful” and the `collection` key contains the ID of the collection that has been deleted.
+* If the delete-collection job succeeds, the status is set to "successful" and the `collection` key contains the ID of the collection that has been deleted.
+
+**Response Fields for Successful Creation Jobs:**
+
+**Note:** While the API schema shows both `llm_service_id`/`llm_service_name` AND `knowledge_base_id`/`knowledge_base_provider`, the actual collection object in the response will only include the fields relevant to what was created:
+
+- **If an Assistant was created** (with model + instructions): The response will only include `llm_service_id` and `llm_service_name`
+- **If only a Vector Store was created** (without model/instructions): The response will only include `knowledge_base_id` and `knowledge_base_provider`
diff --git a/backend/app/api/docs/collections/list.md b/backend/app/api/docs/collections/list.md
@@ -1,5 +1,8 @@
-List all _active_ collections that have been created and are not deleted
+List all _active_ collections that have been created and are not deleted.
 
-If a vector store was created - `llm_service_name` and `llm_service_id` in the response denotes the name of the vector store (eg. 'openai vector store') and its id respectively.
+**Response Fields:**
 
-[Deprecated] If an assistant was created, `llm_service_name` and `llm_service_id` in the response denotes the name of the model used in the assistant (eg. 'gpt-4o') and assistant id.
+**Note:** While the API schema shows both `llm_service_id`/`llm_service_name` AND `knowledge_base_id`/`knowledge_base_provider`, each collection in the response will only include the fields relevant to what was created:
+
+- **If an Assistant was created** (with model + instructions): The response will only include `llm_service_id` and `llm_service_name` (e.g., `llm_service_name: "gpt-4o"` and the assistant ID)
+- **If only a Vector Store was created** (without model/instructions): The response will only include `knowledge_base_id` and `knowledge_base_provider` (e.g., `knowledge_base_provider: "openai vector store"` and the vector store ID)
diff --git a/backend/app/api/routes/collection_job.py b/backend/app/api/routes/collection_job.py
@@ -19,7 +19,7 @@
 )
 from app.models.collection import CollectionPublic
 from app.utils import APIResponse, load_description
-from app.services.collections.helpers import extract_error_message
+from app.services.collections.helpers import extract_error_message, to_collection_public
 
 
 logger = logging.getLogger(__name__)
@@ -49,7 +49,7 @@ def collection_job_info(
         ):
             collection_crud = CollectionCrud(session, current_user.project_.id)
             collection = collection_crud.read_one(collection_job.collection_id)
-            job_out.collection = CollectionPublic.model_validate(collection)
+            job_out.collection = to_collection_public(collection)
 
         elif collection_job.action_type == CollectionActionType.DELETE:
             job_out.collection = CollectionIDPublic(id=collection_job.collection_id)

diff --git a/backend/app/api/routes/collections.py b/backend/app/api/routes/collections.py
@@ -28,7 +28,7 @@
     CollectionPublic,
 )
 from app.utils import APIResponse, load_description, validate_callback_url
-from app.services.collections.helpers import ensure_unique_name
+from app.services.collections.helpers import ensure_unique_name, to_collection_public
 from app.services.collections import (
     create_collection as create_service,
     delete_collection as delete_service,
@@ -71,7 +71,10 @@ def list_collections(
     collection_crud = CollectionCrud(session, current_user.project_.id)
     rows = collection_crud.read_all()
 
-    return APIResponse.success_response(rows)
+    # Convert each collection to CollectionPublic with correct field mapping
+    public_collections = [to_collection_public(collection) for collection in rows]
+
+    return APIResponse.success_response(public_collections)
 
 
 @router.post(
@@ -190,7 +193,7 @@ def collection_info(
         description="If true, include documents linked to this collection",
     ),
     include_url: bool = Query(
-        True, description="Include a signed URL to access the document"
+        False, description="Include a signed URL to access the document"
     ),
     limit: int
     | None = Query(
@@ -203,7 +206,9 @@ def collection_info(
     collection_crud = CollectionCrud(session, current_user.project_.id)
     collection = collection_crud.read_one(collection_id)
 
-    collection_with_docs = CollectionWithDocsPublic.model_validate(collection)
+    # Convert to CollectionPublic with correct field mapping, then to WithDocs
+    collection_public = to_collection_public(collection)
+    collection_with_docs = CollectionWithDocsPublic.model_validate(collection_public)
 
     if include_docs:
         document_collection_crud = DocumentCollectionCrud(session)

diff --git a/backend/app/api/routes/documents.py b/backend/app/api/routes/documents.py
@@ -111,7 +111,7 @@ async def upload_doc(
     target_format: str
     | None = Form(
         None,
-        description="Desired output format for the uploaded document (e.g., pdf, docx, txt).",
+        description="Desired output format for the uploaded document",
     ),
     transformer: str
     | None = Form(

diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py
@@ -3,7 +3,7 @@
 from typing import Any, Literal
 from uuid import UUID, uuid4
 
-from pydantic import HttpUrl, model_validator
+from pydantic import HttpUrl, model_validator, model_serializer
 from sqlalchemy import UniqueConstraint, Index, text
 from sqlmodel import Field, Relationship, SQLModel
 
@@ -211,14 +211,78 @@ class CollectionIDPublic(SQLModel):
 
 class CollectionPublic(SQLModel):
     id: UUID
-    llm_service_id: str
-    llm_service_name: str
+    llm_service_id: str | None = Field(
+        default=None,
+        description="LLM service ID (e.g., Assistant ID) when model and instructions were provided",
+    )
+    llm_service_name: str | None = Field(
+        default=None,
+        description="LLM service name (e.g., model name) when model and instructions were provided",
+    )
+    knowledge_base_id: str | None = Field(
+        default=None,
+        description="Knowledge base ID (e.g., Vector Store ID) when only vector store was created",
+    )
+    knowledge_base_provider: str | None = Field(
+        default=None,
+        description="Knowledge base provider name when only vector store was created",
+    )
     project_id: int
 
     inserted_at: datetime
     updated_at: datetime
     deleted_at: datetime | None = None
 
+    @model_validator(mode="after")
+    def validate_service_fields(self) -> "CollectionPublic":
+        """Ensure either LLM service fields or knowledge base fields are set, not both."""
+        has_llm = self.llm_service_id is not None or self.llm_service_name is not None
+        has_kb = (
+            self.knowledge_base_id is not None
+            or self.knowledge_base_provider is not None
+        )
+
+        if has_llm and has_kb:
+            raise ValueError(
+                "Cannot have both LLM service fields and knowledge base fields set"
+            )
+
+        if not has_llm and not has_kb:
+            raise ValueError(
+                "Either LLM service fields or knowledge base fields must be set"
+            )
+
+        # Ensure both fields in the pair are set or both are None
+        if has_llm and (
+            (self.llm_service_id is None) != (self.llm_service_name is None)
+        ):
+            raise ValueError("Both llm_service_id and llm_service_name must be set")
+
+        if has_kb and (
+            (self.knowledge_base_id is None) != (self.knowledge_base_provider is None)
+        ):
+            raise ValueError(
+                "Both knowledge_base_id and knowledge_base_provider must be set"
+            )
+
+        return self
+
+    @model_serializer(mode="wrap", when_used="json")
+    def _serialize_model(self, serializer: Any, info: Any) -> dict[str, Any]:
+        """Exclude unused service fields from JSON serialization."""
+        data = serializer(self)
+
+        # If this is a knowledge base, remove llm_service fields
+        if data.get("knowledge_base_id") is not None:
+            data.pop("llm_service_id", None)
+            data.pop("llm_service_name", None)
+        # If this is an assistant, remove knowledge_base fields
+        elif data.get("llm_service_id") is not None:
+            data.pop("knowledge_base_id", None)
+            data.pop("knowledge_base_provider", None)
+
+        return data
+
 
 class CollectionWithDocsPublic(CollectionPublic):
     documents: list[DocumentPublic] | None = None
diff --git a/backend/app/services/collections/create_collection.py b/backend/app/services/collections/create_collection.py
@@ -22,7 +22,10 @@
     CollectionJobPublic,
     CreationRequest,
 )
-from app.services.collections.helpers import extract_error_message
+from app.services.collections.helpers import (
+    extract_error_message,
+    to_collection_public,
+)
 from app.services.collections.providers.registry import get_llm_provider
 from app.celery.utils import start_low_priority_job
 from app.utils import send_callback, APIResponse
@@ -75,10 +78,12 @@ def build_success_payload(
       "metadata": null
     }
     """
-    collection_public = CollectionPublic.model_validate(collection)
+    collection_public = to_collection_public(collection)
+    collection_dict = collection_public.model_dump(mode="json", exclude_none=True)
+
     job_public = CollectionJobPublic.model_validate(
         collection_job,
-        update={"collection": collection_public},
+        update={"collection": collection_dict},
     )
     return APIResponse.success_response(job_public).model_dump(
         mode="json", exclude={"data": {"error_message"}}

diff --git a/backend/app/services/collections/helpers.py b/backend/app/services/collections/helpers.py
@@ -7,11 +7,10 @@
 
 from fastapi import HTTPException
 from sqlmodel import select
-from openai import OpenAIError
 
 from app.crud import DocumentCrud, CollectionCrud
 from app.api.deps import SessionDep
-from app.models import DocumentCollection, Collection
+from app.models import DocumentCollection, Collection, CollectionPublic
 
 
 logger = logging.getLogger(__name__)
@@ -119,3 +118,38 @@ def ensure_unique_name(
         )
 
     return requested_name
+
+
+def to_collection_public(collection: Collection) -> CollectionPublic:
+    """
+    Convert a Collection DB model to CollectionPublic response model.
+
+    Maps fields based on service type:
+    - If llm_service_name is a vector store (matches get_service_name pattern),
+      use knowledge_base_id/knowledge_base_provider
+    - Otherwise (assistant), use llm_service_id/llm_service_name
+    """
+    is_vector_store = collection.llm_service_name == get_service_name(
+        collection.provider
+    )
+
+    if is_vector_store:
+        return CollectionPublic(
+            id=collection.id,
+            knowledge_base_id=collection.llm_service_id,
+            knowledge_base_provider=collection.llm_service_name,
+            project_id=collection.project_id,
+            inserted_at=collection.inserted_at,
+            updated_at=collection.updated_at,
+            deleted_at=collection.deleted_at,
+        )
+    else:
+        return CollectionPublic(
+            id=collection.id,
+            llm_service_id=collection.llm_service_id,
+            llm_service_name=collection.llm_service_name,
+            project_id=collection.project_id,
+            inserted_at=collection.inserted_at,
+            updated_at=collection.updated_at,
+            deleted_at=collection.deleted_at,
+        )
diff --git a/backend/app/tests/api/routes/collections/test_collection_info.py b/backend/app/tests/api/routes/collections/test_collection_info.py
@@ -167,8 +167,12 @@ def test_collection_info_vector_store_collection(
     payload = data["data"]
 
     assert payload["id"] == str(collection.id)
-    assert payload["llm_service_name"] == get_service_name("openai")
-    assert payload["llm_service_id"] == collection.llm_service_id
+    # Vector store collection should have knowledge_base fields, not llm_service fields
+    assert payload["knowledge_base_provider"] == get_service_name("openai")
+    assert payload["knowledge_base_id"] == collection.llm_service_id
+    # LLM service fields should not be present in the response
+    assert "llm_service_name" not in payload
+    assert "llm_service_id" not in payload
 
     docs = payload.get("documents", [])
     assert len(docs) >= 1

diff --git a/backend/app/tests/api/routes/collections/test_collection_list.py b/backend/app/tests/api/routes/collections/test_collection_list.py
@@ -102,8 +102,12 @@ def test_list_collections_includes_vector_store_collection_with_fields(
 
     row = matching[0]
     assert row["project_id"] == project.id
-    assert row["llm_service_name"] == get_service_name("openai")
-    assert row["llm_service_id"] == collection.llm_service_id
+    # Vector store collection should have knowledge_base fields, not llm_service fields
+    assert row["knowledge_base_provider"] == get_service_name("openai")
+    assert row["knowledge_base_id"] == collection.llm_service_id
+    # LLM service fields should not be present in the response
+    assert "llm_service_name" not in row
+    assert "llm_service_id" not in row
 
 
 def test_list_collections_does_not_error_with_no_collections(