Skip to content

vector_store

VectorStore

Vector store class

Source code in core/vector_store.py
class VectorStore:
    """Vector store class"""

    def __init__(
        self,
        collection_name: Annotated[
            Optional[str],
            "Collection name",
        ] = "default_collection",
        client: Annotated[
            Optional[chromadb.api.client.Client],
            "Chroma client",
        ] = chromadb.Client(),
        embeddings: Annotated[
            Optional[chromadb.Embeddings],
            "Embeddings function",
        ] = embedding_functions.DefaultEmbeddingFunction(),
    ):
        """
        Initialize the vector store

        Args:
            collection_name (str): Collection name
            client (chromadb.api.client.Client): ChromaDB client
            embeddings (chromadb.Embeddings): Embeddings function
        """
        self.collection = client.get_or_create_collection(
            name=collection_name,
            embedding_function=embeddings,
            metadata={"hnsw:space": "cosine"},
        )

    def add_documents(
        self,
        documents: Annotated[
            list[str],
            "List of documents",
        ],
        reference_id: Annotated[
            Optional[str],
            "Reference ID",
        ] = None,
    ) -> Annotated[
        list[str],
        "List of document IDs",
    ]:
        """
        Add documents to the vector store

        Args:
            documents (list[Document]): List of documents
            reference_id (str): Reference id

        Returns:
            list[str]: List of document IDs
        """
        metadatas = (
            [
                {
                    "reference_id": reference_id,
                }
                for _ in range(len(documents))
            ]
            if reference_id is not None
            else [{} for _ in range(len(documents))]
        )
        ids = [nanoid.generate() for _ in range(len(documents))]
        self.collection.add(
            documents=documents,
            ids=ids,
            metadatas=metadatas,
        )
        return ids

    def similarity_search(
        self,
        query: Annotated[
            str,
            "Query string",
        ],
        reference_id: Annotated[
            Optional[str],
            "Reference ID",
        ] = None,
        k: Annotated[
            int,
            "Number of result documents",
        ] = 3,
    ) -> list[Tuple[Document, float]]:
        """
        Search for similar documents

        Args:
            query (str): Query string
            reference_id (str): Reference ID
            k (int): Number of result documents

        Returns:
            list[Tuple[Document, float]]: List of documents and their similarity scores
        """
        where = {"reference_id": reference_id} if reference_id is not None else None
        res = self.collection.query(
            query_texts=[query],
            n_results=k,
            where=where,
        )
        return [
            (
                Document(
                    page_content=res["documents"][0][i],
                    metadata=res["metadatas"][0][i],
                ),
                res["distances"][0][i] / 100,
            )
            for i in range(len(res["documents"][0]))
        ]

    def delete_by_reference_id(
        self,
        reference_id: Annotated[
            str,
            "Reference ID",
        ],
    ) -> None:
        """
        Delete documents by reference_id

        Args:
            reference_id (str): Reference ID

        Returns:
            None
        """
        self.collection.delete(where={"reference_id": reference_id})

__init__(collection_name='default_collection', client=chromadb.Client(), embeddings=embedding_functions.DefaultEmbeddingFunction())

Initialize the vector store

Parameters:

Name Type Description Default
collection_name str

Collection name

'default_collection'
client Client

ChromaDB client

Client()
embeddings Embeddings

Embeddings function

DefaultEmbeddingFunction()
Source code in core/vector_store.py
def __init__(
    self,
    collection_name: Annotated[
        Optional[str],
        "Collection name",
    ] = "default_collection",
    client: Annotated[
        Optional[chromadb.api.client.Client],
        "Chroma client",
    ] = chromadb.Client(),
    embeddings: Annotated[
        Optional[chromadb.Embeddings],
        "Embeddings function",
    ] = embedding_functions.DefaultEmbeddingFunction(),
):
    """
    Initialize the vector store

    Args:
        collection_name (str): Collection name
        client (chromadb.api.client.Client): ChromaDB client
        embeddings (chromadb.Embeddings): Embeddings function
    """
    self.collection = client.get_or_create_collection(
        name=collection_name,
        embedding_function=embeddings,
        metadata={"hnsw:space": "cosine"},
    )

add_documents(documents, reference_id=None)

Add documents to the vector store

Parameters:

Name Type Description Default
documents list[Document]

List of documents

required
reference_id str

Reference id

None

Returns:

Type Description
Annotated[list[str], 'List of document IDs']

list[str]: List of document IDs

Source code in core/vector_store.py
def add_documents(
    self,
    documents: Annotated[
        list[str],
        "List of documents",
    ],
    reference_id: Annotated[
        Optional[str],
        "Reference ID",
    ] = None,
) -> Annotated[
    list[str],
    "List of document IDs",
]:
    """
    Add documents to the vector store

    Args:
        documents (list[Document]): List of documents
        reference_id (str): Reference id

    Returns:
        list[str]: List of document IDs
    """
    metadatas = (
        [
            {
                "reference_id": reference_id,
            }
            for _ in range(len(documents))
        ]
        if reference_id is not None
        else [{} for _ in range(len(documents))]
    )
    ids = [nanoid.generate() for _ in range(len(documents))]
    self.collection.add(
        documents=documents,
        ids=ids,
        metadatas=metadatas,
    )
    return ids

delete_by_reference_id(reference_id)

Delete documents by reference_id

Parameters:

Name Type Description Default
reference_id str

Reference ID

required

Returns:

Type Description
None

None

Source code in core/vector_store.py
def delete_by_reference_id(
    self,
    reference_id: Annotated[
        str,
        "Reference ID",
    ],
) -> None:
    """
    Delete documents by reference_id

    Args:
        reference_id (str): Reference ID

    Returns:
        None
    """
    self.collection.delete(where={"reference_id": reference_id})

Search for similar documents

Parameters:

Name Type Description Default
query str

Query string

required
reference_id str

Reference ID

None
k int

Number of result documents

3

Returns:

Type Description
list[Tuple[Document, float]]

list[Tuple[Document, float]]: List of documents and their similarity scores

Source code in core/vector_store.py
def similarity_search(
    self,
    query: Annotated[
        str,
        "Query string",
    ],
    reference_id: Annotated[
        Optional[str],
        "Reference ID",
    ] = None,
    k: Annotated[
        int,
        "Number of result documents",
    ] = 3,
) -> list[Tuple[Document, float]]:
    """
    Search for similar documents

    Args:
        query (str): Query string
        reference_id (str): Reference ID
        k (int): Number of result documents

    Returns:
        list[Tuple[Document, float]]: List of documents and their similarity scores
    """
    where = {"reference_id": reference_id} if reference_id is not None else None
    res = self.collection.query(
        query_texts=[query],
        n_results=k,
        where=where,
    )
    return [
        (
            Document(
                page_content=res["documents"][0][i],
                metadata=res["metadatas"][0][i],
            ),
            res["distances"][0][i] / 100,
        )
        for i in range(len(res["documents"][0]))
    ]