Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dev = [
"ipython==8.10.0",
"jupyter>=1.0.0,<2",
"llama-index-embeddings-openai>=0.5.0,<0.6",
"llama-index-embeddings-azure_openai>=0.4.0,<0.5",
"llama-index-llms-openai>=0.5.0,<0.6",
"llama-index-readers-file>=0.5.0,<0.6",
"mypy==0.991",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,34 @@
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import Document, TextNode
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.llms.azure_openai import AzureOpenAI
from pymongo import MongoClient

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

import threading

lock = threading.Lock()


@pytest.fixture(scope="session")
def embed_model() -> OpenAIEmbedding:
if "OPENAI_API_KEY" in os.environ:
return OpenAIEmbedding()
if "AZURE_OPENAI_API_KEY" in os.environ:
deployment_name = os.environ.get(
"AZURE_TEXT_DEPLOYMENT", "text-embedding-3-small"
)
api_key = os.environ["AZURE_OPENAI_API_KEY"]
embedding = AzureOpenAIEmbedding(
api_key=api_key, deployment_name=deployment_name
)
Settings.embed_model = embedding
return embedding
pytest.skip("Requires OPENAI_API_KEY or AZURE_OPENAI_API_KEY in os.environ")


@pytest.fixture(scope="session")
def documents() -> List[Document]:
"""
Expand All @@ -29,17 +47,13 @@ def documents() -> List[Document]:


@pytest.fixture(scope="session")
def nodes(documents) -> List[TextNode]:
if OPENAI_API_KEY is None:
return None

def nodes(documents, embed_model) -> List[TextNode]:
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(chunk_size=1024, chunk_overlap=200),
OpenAIEmbedding(),
embed_model,
],
)

return pipeline.run(documents=documents)


Expand All @@ -52,18 +66,23 @@ def nodes(documents) -> List[TextNode]:
@pytest.fixture(scope="session")
def atlas_client() -> MongoClient:
if MONGODB_URI is None:
return None
raise pytest.skip("Requires MONGODB_URI in os.environ")

client = MongoClient(MONGODB_URI)
assert DB_NAME in client.list_database_names()
return client


@pytest.fixture()
def vector_store(atlas_client: MongoClient) -> MongoDBAtlasVectorSearch:
if MONGODB_URI is None:
return None

def vector_store(
atlas_client: MongoClient, embed_model: OpenAIEmbedding
) -> MongoDBAtlasVectorSearch:
# Set up the default llm to be used in tests.
if isinstance(embed_model, AzureOpenAIEmbedding):
deployment_name = os.environ.get("AZURE_LLM_DEPLOYMENT", "gpt-4o-mini")
Settings.llm = AzureOpenAI(
engine=deployment_name, api_key=os.environ["AZURE_OPENAI_API_KEY"]
)
return MongoDBAtlasVectorSearch(
mongodb_client=atlas_client,
db_name=DB_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@
provide a valid OPENAI_API_KEY.
"""

import os
from time import sleep
from typing import List

import pytest
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.core.schema import Document
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
Expand All @@ -20,18 +18,11 @@
from .conftest import lock


@pytest.mark.skipif(
os.environ.get("MONGODB_URI") is None, reason="Requires MONGODB_URI in os.environ"
)
def test_mongodb_connection(atlas_client: MongoClient) -> None:
"""Confirm that the connection to the datastore works."""
assert atlas_client.admin.command("ping")["ok"]


@pytest.mark.skipif(
os.environ.get("MONGODB_URI") is None or os.environ.get("OPENAI_API_KEY") is None,
reason="Requires MONGODB_URI and OPENAI_API_KEY in os.environ",
)
def test_index(
documents: List[Document], vector_store: MongoDBAtlasVectorSearch
) -> None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import os
from time import sleep
from typing import List

import pytest
from llama_index.core.schema import Document, TextNode
from llama_index.core.vector_stores.types import (
FilterCondition,
Expand All @@ -24,22 +22,16 @@ def test_documents(documents: List[Document]) -> None:
assert isinstance(documents[0], Document)


@pytest.mark.skipif(
os.environ.get("OPENAI_API_KEY") is None,
reason="Requires OPENAI_API_KEY in os.environ",
)
def test_nodes(nodes: List[TextNode]) -> None:
"""Test Ingestion Pipeline transforming documents into nodes with embeddings."""
assert isinstance(nodes, list)
assert isinstance(nodes[0], TextNode)


@pytest.mark.skipif(
os.environ.get("MONGODB_URI") is None or os.environ.get("OPENAI_API_KEY") is None,
reason="Requires MONGODB_URI and OPENAI_API_KEY in os.environ",
)
def test_vectorstore(
nodes: List[TextNode], vector_store: MongoDBAtlasVectorSearch
nodes: List[TextNode],
vector_store: MongoDBAtlasVectorSearch,
embed_model: OpenAIEmbedding,
) -> None:
"""Test add, query, delete API of MongoDBAtlasVectorSearch."""
with lock:
Expand All @@ -54,7 +46,7 @@ def test_vectorstore(
# 2a. test query(): default (vector search)
query_str = "What are LLMs useful for?"
n_similar = 2
query_embedding = OpenAIEmbedding().get_text_embedding(query_str)
query_embedding = embed_model.get_text_embedding(query_str)
query = VectorStoreQuery(
query_embedding=query_embedding,
similarity_top_k=n_similar,
Expand All @@ -70,7 +62,7 @@ def test_vectorstore(
sleep(2)
retries -= 1

assert all(score > 0.89 for score in query_responses.similarities)
assert all(score > 0.75 for score in query_responses.similarities)
assert any("LLM" in node.text for node in query_responses.nodes)
assert all(id_res in ids for id_res in query_responses.ids)

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading