Source code for langchain_core.indexing.in_memory
import uuid
from typing import Any, Dict, List, Optional, Sequence, cast
from langchain_core._api import beta
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.indexing import UpsertResponse
from langchain_core.indexing.base import DeleteResponse, DocumentIndex
from langchain_core.pydantic_v1 import Field
[docs]
@beta(message="Introduced in version 0.2.29. Underlying abstraction subject to change.")
class InMemoryDocumentIndex(DocumentIndex):
    """In memory document index.
    This is an in-memory document index that stores documents in a dictionary.
    It provides a simple search API that returns documents by the number of
    counts the given query appears in the document.
    .. versionadded:: 0.2.29
    """
    store: Dict[str, Document] = Field(default_factory=dict)
    top_k: int = 4
[docs]
    def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
        """Upsert items into the index."""
        ok_ids = []
        for item in items:
            if item.id is None:
                id_ = str(uuid.uuid4())
                item_ = item.copy()
                item_.id = id_
            else:
                item_ = item
                id_ = item.id
            self.store[id_] = item_
            ok_ids.append(cast(str, item_.id))
        return UpsertResponse(succeeded=ok_ids, failed=[]) 
[docs]
    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> DeleteResponse:
        """Delete by ID."""
        if ids is None:
            raise ValueError("IDs must be provided for deletion")
        ok_ids = []
        for id_ in ids:
            if id_ in self.store:
                del self.store[id_]
                ok_ids.append(id_)
        return DeleteResponse(
            succeeded=ok_ids, num_deleted=len(ok_ids), num_failed=0, failed=[]
        ) 
[docs]
    def get(self, ids: Sequence[str], /, **kwargs: Any) -> List[Document]:
        """Get by ids."""
        found_documents = []
        for id_ in ids:
            if id_ in self.store:
                found_documents.append(self.store[id_])
        return found_documents 
    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        counts_by_doc = []
        for document in self.store.values():
            count = document.page_content.count(query)
            counts_by_doc.append((document, count))
        counts_by_doc.sort(key=lambda x: x[1], reverse=True)
        return [doc.copy() for doc, count in counts_by_doc[: self.top_k]]