Iterating with RAG on Milvus¶
Setup: To get up and running, you'll first need to install Docker and Milvus. Find instructions below:
- Docker Compose (Instructions)
- Milvus Standalone (Instructions)
In [ ]:
Copied!
# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index==0.8.4 pymilvus==2.3.0 nltk==3.8.1 html2text==2020.1.16 tenacity==8.2.3
# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index==0.8.4 pymilvus==2.3.0 nltk==3.8.1 html2text==2020.1.16 tenacity==8.2.3
Add API keys¶
For this quickstart, you will need Open AI and Huggingface keys
In [ ]:
Copied!
import os
os.environ["OPENAI_API_KEY"] = "..."
import os
os.environ["OPENAI_API_KEY"] = "..."
Import from LlamaIndex and TruLens¶
In [ ]:
Copied!
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index import ServiceContext
from llama_index import VectorStoreIndex
from llama_index.llms import OpenAI
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores import MilvusVectorStore
from tenacity import retry
from tenacity import stop_after_attempt
from tenacity import wait_exponential
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.apps.llamaindex import TruLlama
from trulens.providers.openai import OpenAI as fOpenAI
session = TruSession()
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from llama_index import ServiceContext
from llama_index import VectorStoreIndex
from llama_index.llms import OpenAI
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores import MilvusVectorStore
from tenacity import retry
from tenacity import stop_after_attempt
from tenacity import wait_exponential
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.apps.llamaindex import TruLlama
from trulens.providers.openai import OpenAI as fOpenAI
session = TruSession()
First we need to load documents. We can use SimpleWebPageReader¶
In [ ]:
Copied!
from llama_index import WikipediaReader
cities = [
"Los Angeles",
"Houston",
"Honolulu",
"Tucson",
"Mexico City",
"Cincinatti",
"Chicago",
]
wiki_docs = []
for city in cities:
try:
doc = WikipediaReader().load_data(pages=[city])
wiki_docs.extend(doc)
except Exception as e:
print(f"Error loading page for city {city}: {e}")
from llama_index import WikipediaReader
cities = [
"Los Angeles",
"Houston",
"Honolulu",
"Tucson",
"Mexico City",
"Cincinatti",
"Chicago",
]
wiki_docs = []
for city in cities:
try:
doc = WikipediaReader().load_data(pages=[city])
wiki_docs.extend(doc)
except Exception as e:
print(f"Error loading page for city {city}: {e}")
Now write down our test prompts¶
In [ ]:
Copied!
test_prompts = [
"What's the best national park near Honolulu",
"What are some famous universities in Tucson?",
"What bodies of water are near Chicago?",
"What is the name of Chicago's central business district?",
"What are the two most famous universities in Los Angeles?",
"What are some famous festivals in Mexico City?",
"What are some famous festivals in Los Angeles?",
"What professional sports teams are located in Los Angeles",
"How do you classify Houston's climate?",
"What landmarks should I know about in Cincinatti",
]
test_prompts = [
"What's the best national park near Honolulu",
"What are some famous universities in Tucson?",
"What bodies of water are near Chicago?",
"What is the name of Chicago's central business district?",
"What are the two most famous universities in Los Angeles?",
"What are some famous festivals in Mexico City?",
"What are some famous festivals in Los Angeles?",
"What professional sports teams are located in Los Angeles",
"How do you classify Houston's climate?",
"What landmarks should I know about in Cincinatti",
]
Build a prototype RAG¶
In [ ]:
Copied!
vector_store = MilvusVectorStore(
index_params={"index_type": "IVF_FLAT", "metric_type": "L2"},
search_params={"nprobe": 20},
overwrite=True,
)
llm = OpenAI(model="gpt-3.5-turbo")
embed_v12 = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_v12, llm=llm)
index = VectorStoreIndex.from_documents(
wiki_docs, service_context=service_context, storage_context=storage_context
)
query_engine = index.as_query_engine(top_k=5)
@retry(
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def call_query_engine(prompt):
return query_engine.query(prompt)
for prompt in test_prompts:
call_query_engine(prompt)
vector_store = MilvusVectorStore(
index_params={"index_type": "IVF_FLAT", "metric_type": "L2"},
search_params={"nprobe": 20},
overwrite=True,
)
llm = OpenAI(model="gpt-3.5-turbo")
embed_v12 = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(embed_model=embed_v12, llm=llm)
index = VectorStoreIndex.from_documents(
wiki_docs, service_context=service_context, storage_context=storage_context
)
query_engine = index.as_query_engine(top_k=5)
@retry(
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def call_query_engine(prompt):
return query_engine.query(prompt)
for prompt in test_prompts:
call_query_engine(prompt)
Set up Evaluation.¶
In [ ]:
Copied!
import numpy as np
# Initialize OpenAI-based feedback function collection class:
provider = fOpenAI()
# Define groundedness
f_groundedness = (
Feedback(
provider.groundedness_measure_with_cot_reasons, name="Groundedness"
)
.on(TruLlama.select_context())
.on_output()
)
# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
Feedback(
provider.context_relevance_with_cot_reasons, name="Context Relevance"
)
.on_input()
.on(TruLlama.select_context())
.aggregate(np.mean)
)
import numpy as np
# Initialize OpenAI-based feedback function collection class:
provider = fOpenAI()
# Define groundedness
f_groundedness = (
Feedback(
provider.groundedness_measure_with_cot_reasons, name="Groundedness"
)
.on(TruLlama.select_context())
.on_output()
)
# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
Feedback(
provider.context_relevance_with_cot_reasons, name="Context Relevance"
)
.on_input()
.on(TruLlama.select_context())
.aggregate(np.mean)
)
Find the best configuration.¶
In [ ]:
Copied!
index_params = ["IVF_FLAT", "HNSW"]
embed_v12 = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)
embed_ft3_v12 = HuggingFaceEmbeddings(
model_name="Sprylab/paraphrase-multilingual-MiniLM-L12-v2-fine-tuned-3"
)
embed_ada = OpenAIEmbeddings(model_name="text-embedding-ada-002")
embed_models = [embed_v12, embed_ada]
top_ks = [1, 3]
chunk_sizes = [200, 500]
index_params = ["IVF_FLAT", "HNSW"]
embed_v12 = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)
embed_ft3_v12 = HuggingFaceEmbeddings(
model_name="Sprylab/paraphrase-multilingual-MiniLM-L12-v2-fine-tuned-3"
)
embed_ada = OpenAIEmbeddings(model_name="text-embedding-ada-002")
embed_models = [embed_v12, embed_ada]
top_ks = [1, 3]
chunk_sizes = [200, 500]
In [ ]:
Copied!
import itertools
for index_param, embed_model, top_k, chunk_size in itertools.product(
index_params, embed_models, top_ks, chunk_sizes
):
if embed_model == embed_v12:
embed_model_name = "v12"
elif embed_model == embed_ft3_v12:
embed_model_name = "ft3_v12"
elif embed_model == embed_ada:
embed_model_name = "ada"
vector_store = MilvusVectorStore(
index_params={"index_type": index_param, "metric_type": "L2"},
search_params={"nprobe": 20},
overwrite=True,
)
llm = OpenAI(model="gpt-3.5-turbo")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
embed_model=embed_model, llm=llm, chunk_size=chunk_size
)
index = VectorStoreIndex.from_documents(
wiki_docs,
service_context=service_context,
storage_context=storage_context,
)
query_engine = index.as_query_engine(similarity_top_k=top_k)
tru_query_engine = TruLlama(
query_engine,
feedbacks=[f_groundedness, f_qa_relevance, f_context_relevance],
metadata={
"index_param": index_param,
"embed_model": embed_model_name,
"top_k": top_k,
"chunk_size": chunk_size,
},
)
@retry(
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def call_tru_query_engine(prompt):
return tru_query_engine.query(prompt)
for prompt in test_prompts:
call_tru_query_engine(prompt)
import itertools
for index_param, embed_model, top_k, chunk_size in itertools.product(
index_params, embed_models, top_ks, chunk_sizes
):
if embed_model == embed_v12:
embed_model_name = "v12"
elif embed_model == embed_ft3_v12:
embed_model_name = "ft3_v12"
elif embed_model == embed_ada:
embed_model_name = "ada"
vector_store = MilvusVectorStore(
index_params={"index_type": index_param, "metric_type": "L2"},
search_params={"nprobe": 20},
overwrite=True,
)
llm = OpenAI(model="gpt-3.5-turbo")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(
embed_model=embed_model, llm=llm, chunk_size=chunk_size
)
index = VectorStoreIndex.from_documents(
wiki_docs,
service_context=service_context,
storage_context=storage_context,
)
query_engine = index.as_query_engine(similarity_top_k=top_k)
tru_query_engine = TruLlama(
query_engine,
feedbacks=[f_groundedness, f_qa_relevance, f_context_relevance],
metadata={
"index_param": index_param,
"embed_model": embed_model_name,
"top_k": top_k,
"chunk_size": chunk_size,
},
)
@retry(
stop=stop_after_attempt(10),
wait=wait_exponential(multiplier=1, min=4, max=10),
)
def call_tru_query_engine(prompt):
return tru_query_engine.query(prompt)
for prompt in test_prompts:
call_tru_query_engine(prompt)
Explore in a Dashboard¶
In [ ]:
Copied!
from trulens.dashboard import run_dashboard
run_dashboard(session) # open a local streamlit app to explore
# stop_dashboard(session) # stop if needed
from trulens.dashboard import run_dashboard
run_dashboard(session) # open a local streamlit app to explore
# stop_dashboard(session) # stop if needed
Or view results directly in your notebook¶
In [ ]:
Copied!
session.get_records_and_feedback()[0]
session.get_records_and_feedback()[0]