Query Planning in LlamaIndex¶

Query planning is a useful tool to leverage the ability of LLMs to structure the user inputs into multiple different queries, either sequentially or in parallel before answering the questions. This method improvers the response by allowing the question to be decomposed into smaller, more answerable questions.

Sub-question queries are one such method. Sub-question queries decompose the user input into multiple different sub-questions. This is great for answering complex questions that require knowledge from different documents.

Relatedly, there are a great deal of configurations for this style of application that must be selected. In this example, we'll iterate through several of these choices and evaluate each with TruLens.

Import from LlamaIndex and TruLens¶

In [ ]:

Copied!

# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index  llama-index-readers-web==0.2.2
# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index  llama-index-readers-web==0.2.2

In [ ]:

Copied!





from llama_index.core import VectorStoreIndex
from llama_index.core.tools import ToolMetadata
from llama_index.readers.web import SimpleWebPageReader
from trulens.core import TruSession

session = TruSession()
from llama_index.core import VectorStoreIndex
from llama_index.core.tools import ToolMetadata
from llama_index.readers.web import SimpleWebPageReader
from trulens.core import TruSession

session = TruSession()

In [ ]:

Copied!





# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio

nest_asyncio.apply()
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio

nest_asyncio.apply()

Set keys¶

For this example we need an OpenAI key

In [ ]:

Copied!

import os

os.environ["OPENAI_API_KEY"] = "sk-..."
import os

os.environ["OPENAI_API_KEY"] = "sk-..."

Run the dashboard¶

By starting the dashboard ahead of time, we can watch as the evaluations get logged. This is especially useful for longer-running applications.

In [ ]:

Copied!

from trulens.dashboard import run_dashboard

run_dashboard(session)
from trulens.dashboard import run_dashboard

run_dashboard(session)

Load Data¶

In [ ]:

Copied!





# load data
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://www.gutenberg.org/files/11/11-h/11-h.htm"]
)
# load data
documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["https://www.gutenberg.org/files/11/11-h/11-h.htm"]
)

Create base query engine¶

In [ ]:

Copied!

# build index and query engine
index = VectorStoreIndex.from_documents(documents)

# create embedding-based query engine from index
query_engine = index.as_query_engine()
# build index and query engine
index = VectorStoreIndex.from_documents(documents)

# create embedding-based query engine from index
query_engine = index.as_query_engine()

Define Evaluation Metrics¶

In [ ]:

Copied!





import numpy as np
from trulens.apps.llamaindex import TruLlama
from trulens.core import Feedback
from trulens.providers.openai import OpenAI

# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.

context = TruLlama.select_context(query_engine)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context.collect())  # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(context)
    .aggregate(np.mean)
)
import numpy as np
from trulens.apps.llamaindex import TruLlama
from trulens.core import Feedback
from trulens.providers.openai import OpenAI

# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.

context = TruLlama.select_context(query_engine)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context.collect())  # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

Set configuration space¶

In [ ]:

Copied!

query_engine_types = ["VectorStoreIndex", "SubQuestionQueryEngine"]
query_engine_types = ["VectorStoreIndex", "SubQuestionQueryEngine"]

Set test prompts¶

In [ ]:

Copied!





# set test prompts
prompts = [
    "Describe Alice's growth from meeting the White Rabbit to challenging the Queen of Hearts?",
    "Relate aspects of enchantment to the nostalgia that Alice experiences in Wonderland. Why is Alice both fascinated and frustrated by her encounters below-ground?",
    "Describe the White Rabbit's function in Alice.",
    "Describe some of the ways that Carroll achieves humor at Alice's expense.",
    "Compare the Duchess' lullaby to the 'You Are Old, Father William' verse",
    "Compare the sentiment of the Mouse's long tale, the Mock Turtle's story and the Lobster-Quadrille.",
    "Summarize the role of the mad hatter in Alice's journey",
    "How does the Mad Hatter influence the arc of the story throughout?",
]
# set test prompts
prompts = [
    "Describe Alice's growth from meeting the White Rabbit to challenging the Queen of Hearts?",
    "Relate aspects of enchantment to the nostalgia that Alice experiences in Wonderland. Why is Alice both fascinated and frustrated by her encounters below-ground?",
    "Describe the White Rabbit's function in Alice.",
    "Describe some of the ways that Carroll achieves humor at Alice's expense.",
    "Compare the Duchess' lullaby to the 'You Are Old, Father William' verse",
    "Compare the sentiment of the Mouse's long tale, the Mock Turtle's story and the Lobster-Quadrille.",
    "Summarize the role of the mad hatter in Alice's journey",
    "How does the Mad Hatter influence the arc of the story throughout?",
]

Iterate through configuration space¶

In [ ]:

Copied!





from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool

for query_engine_type in query_engine_types:

    if query_engine_type == "SubQuestionQueryEngine":
        query_engine_tools = [
            QueryEngineTool(
                    query_engine=query_engine,
                    metadata=ToolMetadata(
                        name="Alice in Wonderland",
                        description="THE MILLENNIUM FULCRUM EDITION 3.0",
                    ),
                )
            ]
        query_engine = SubQuestionQueryEngine.from_defaults(
                query_engine_tools=query_engine_tools,
            )
    else:
        pass

    tru_query_engine_recorder = TruLlama(
            app_name=f"Alice in Wonderland QA",
            app_version=f"{query_engine_type}",
            metadata={
                "query_engine_type": query_engine_type,
            },
            app=query_engine,
            feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance],
        )

        # tru_query_engine_recorder as context manager
    with tru_query_engine_recorder as recording:
        for prompt in prompts:
            query_engine.query(prompt)
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool

for query_engine_type in query_engine_types:

    if query_engine_type == "SubQuestionQueryEngine":
        query_engine_tools = [
            QueryEngineTool(
                    query_engine=query_engine,
                    metadata=ToolMetadata(
                        name="Alice in Wonderland",
                        description="THE MILLENNIUM FULCRUM EDITION 3.0",
                    ),
                )
            ]
        query_engine = SubQuestionQueryEngine.from_defaults(
                query_engine_tools=query_engine_tools,
            )
    else:
        pass

    tru_query_engine_recorder = TruLlama(
            app_name=f"Alice in Wonderland QA",
            app_version=f"{query_engine_type}",
            metadata={
                "query_engine_type": query_engine_type,
            },
            app=query_engine,
            feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance],
        )

        # tru_query_engine_recorder as context manager
    with tru_query_engine_recorder as recording:
        for prompt in prompts:
            query_engine.query(prompt)