LangChain Ensemble Retriever¶
The LangChain EnsembleRetriever takes a list of retrievers as input and ensemble the results of their get_relevant_documents() methods and rerank the results based on the Reciprocal Rank Fusion algorithm. With TruLens, we have the ability to evaluate the context of each component retriever along with the ensemble retriever. This example walks through that process.
Setup¶
In [ ]:
Copied!
# !pip install trulens trulens-apps-langchain trulens-providers-openai openai langchain langchain_community langchain_openai rank_bm25 faiss_cpu
# !pip install trulens trulens-apps-langchain trulens-providers-openai openai langchain langchain_community langchain_openai rank_bm25 faiss_cpu
In [ ]:
Copied!
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
In [ ]:
Copied!
# Imports main tools:
# Imports from LangChain to build app
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.apps.langchain import TruChain
session = TruSession()
session.reset_database()
# Imports main tools:
# Imports from LangChain to build app
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.apps.langchain import TruChain
session = TruSession()
session.reset_database()
In [ ]:
Copied!
doc_list_1 = [
"I like apples",
"I like oranges",
"Apples and oranges are fruits",
]
# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_texts(
doc_list_1, metadatas=[{"source": 1}] * len(doc_list_1)
)
bm25_retriever.k = 2
doc_list_2 = [
"You like apples",
"You like oranges",
]
embedding = OpenAIEmbeddings()
faiss_vectorstore = FAISS.from_texts(
doc_list_2, embedding, metadatas=[{"source": 2}] * len(doc_list_2)
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})
# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
)
doc_list_1 = [
"I like apples",
"I like oranges",
"Apples and oranges are fruits",
]
# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_texts(
doc_list_1, metadatas=[{"source": 1}] * len(doc_list_1)
)
bm25_retriever.k = 2
doc_list_2 = [
"You like apples",
"You like oranges",
]
embedding = OpenAIEmbeddings()
faiss_vectorstore = FAISS.from_texts(
doc_list_2, embedding, metadatas=[{"source": 2}] * len(doc_list_2)
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})
# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
)
Initialize Context Relevance checks for each component retriever + ensemble¶
This requires knowing the feedback selector for each. You can find this path by logging a run of your application and examining the application traces on the Evaluations page.
Read more in our docs: https://www.trulens.org/trulens/selecting_components/
In [ ]:
Copied!
import numpy as np
from trulens.core.schema import Select
from trulens.providers.openai import OpenAI
# Initialize provider class
openai = OpenAI()
bm25_context = (
Select.RecordCalls.retrievers[0]
._get_relevant_documents.rets[:]
.page_content
)
faiss_context = (
Select.RecordCalls.retrievers[1]
._get_relevant_documents.rets[:]
.page_content
)
ensemble_context = Select.RecordCalls.invoke.rets[:].page_content
# Question/statement relevance between question and each context chunk.
f_context_relevance_bm25 = (
Feedback(openai.context_relevance, name="BM25")
.on_input()
.on(bm25_context)
.aggregate(np.mean)
)
f_context_relevance_faiss = (
Feedback(openai.context_relevance, name="FAISS")
.on_input()
.on(faiss_context)
.aggregate(np.mean)
)
f_context_relevance_ensemble = (
Feedback(openai.context_relevance, name="Ensemble")
.on_input()
.on(ensemble_context)
.aggregate(np.mean)
)
import numpy as np
from trulens.core.schema import Select
from trulens.providers.openai import OpenAI
# Initialize provider class
openai = OpenAI()
bm25_context = (
Select.RecordCalls.retrievers[0]
._get_relevant_documents.rets[:]
.page_content
)
faiss_context = (
Select.RecordCalls.retrievers[1]
._get_relevant_documents.rets[:]
.page_content
)
ensemble_context = Select.RecordCalls.invoke.rets[:].page_content
# Question/statement relevance between question and each context chunk.
f_context_relevance_bm25 = (
Feedback(openai.context_relevance, name="BM25")
.on_input()
.on(bm25_context)
.aggregate(np.mean)
)
f_context_relevance_faiss = (
Feedback(openai.context_relevance, name="FAISS")
.on_input()
.on(faiss_context)
.aggregate(np.mean)
)
f_context_relevance_ensemble = (
Feedback(openai.context_relevance, name="Ensemble")
.on_input()
.on(ensemble_context)
.aggregate(np.mean)
)
Add feedbacks¶
In [ ]:
Copied!
tru_recorder = TruChain(
ensemble_retriever,
app_name="Ensemble Retriever",
feedbacks=[
f_context_relevance_bm25,
f_context_relevance_faiss,
f_context_relevance_ensemble,
],
)
tru_recorder = TruChain(
ensemble_retriever,
app_name="Ensemble Retriever",
feedbacks=[
f_context_relevance_bm25,
f_context_relevance_faiss,
f_context_relevance_ensemble,
],
)
In [ ]:
Copied!
with tru_recorder as recording:
ensemble_retriever.invoke("apples")
with tru_recorder as recording:
ensemble_retriever.invoke("apples")
See and compare results from each retriever¶
In [ ]:
Copied!
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "Ensemble")
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "Ensemble")
In [ ]:
Copied!
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "BM25")
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "BM25")
In [ ]:
Copied!
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "FAISS")
from trulens.dashboard.display import get_feedback_result
last_record = recording.records[-1]
get_feedback_result(last_record, "FAISS")
Explore in a Dashboard¶
In [ ]:
Copied!
from trulens.dashboard import run_dashboard
run_dashboard(session) # open a local streamlit app to explore
# stop_dashboard(session) # stop if needed
from trulens.dashboard import run_dashboard
run_dashboard(session) # open a local streamlit app to explore
# stop_dashboard(session) # stop if needed
Alternatively, you can run trulens
from a command line in the same folder to start the dashboard.