❄️ Snowflake Arctic Quickstart with Cortex LLM Functions¶

In this quickstart you will learn build and evaluate a RAG application with Snowflake Arctic.

Building and evaluating RAG applications with Snowflake Arctic offers developers a unique opportunity to leverage a top-tier, enterprise-focused LLM that is both cost-effective and open-source. Arctic excels in enterprise tasks like SQL generation and coding, providing a robust foundation for developing intelligent applications with significant cost savings. Learn more about Snowflake Arctic

In this example, we will use Arctic Embed (snowflake-arctic-embed-m) as our embedding model via HuggingFace, and Arctic, a 480B hybrid MoE LLM for both generation and as the LLM to power TruLens feedback functions. The Arctic LLM is fully-mananaged by Cortex LLM functions

Note, you'll need to have an active Snowflake account to run Cortex LLM functions from Snowflake's data warehouse.

In [ ]:

Copied!

# !pip install trulens trulens-providers-cortex chromadb sentence-transformers snowflake-snowpark-python
# !pip install trulens trulens-providers-cortex chromadb sentence-transformers snowflake-snowpark-python

In [ ]:

Copied!





import os

from snowflake.snowpark import Session
from trulens.core.utils.keys import check_keys

check_keys("SNOWFLAKE_ACCOUNT", "SNOWFLAKE_USER", "SNOWFLAKE_USER_PASSWORD")


connection_params = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
    "role": os.environ.get("SNOWFLAKE_ROLE", "ENGINEER"),
    "database": os.environ.get("SNOWFLAKE_DATABASE"),
    "schema": os.environ.get("SNOWFLAKE_SCHEMA"),
    "warehouse": os.environ.get("SNOWFLAKE_WAREHOUSE"),
}



# Create a Snowflake session
snowflake_session = Session.builder.configs(connection_params).create()
import os

from snowflake.snowpark import Session
from trulens.core.utils.keys import check_keys

check_keys("SNOWFLAKE_ACCOUNT", "SNOWFLAKE_USER", "SNOWFLAKE_USER_PASSWORD")


connection_params = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
    "role": os.environ.get("SNOWFLAKE_ROLE", "ENGINEER"),
    "database": os.environ.get("SNOWFLAKE_DATABASE"),
    "schema": os.environ.get("SNOWFLAKE_SCHEMA"),
    "warehouse": os.environ.get("SNOWFLAKE_WAREHOUSE"),
}



# Create a Snowflake session
snowflake_session = Session.builder.configs(connection_params).create()

Get Data¶

In this case, we'll just initialize some simple text in the notebook.

In [ ]:

Copied!





university_info = """
The University of Washington, founded in 1861 in Seattle, is a public research university
with over 45,000 students across three campuses in Seattle, Tacoma, and Bothell.
As the flagship institution of the six public universities in Washington state,
UW encompasses over 500 buildings and 20 million square feet of space,
including one of the largest library systems in the world.
"""
university_info = """
The University of Washington, founded in 1861 in Seattle, is a public research university
with over 45,000 students across three campuses in Seattle, Tacoma, and Bothell.
As the flagship institution of the six public universities in Washington state,
UW encompasses over 500 buildings and 20 million square feet of space,
including one of the largest library systems in the world.
"""

Create Vector Store¶

Create a chromadb vector store in memory.

In [ ]:

Copied!

from sentence_transformers import SentenceTransformer

model = SentenceTransformer("Snowflake/snowflake-arctic-embed-m")
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("Snowflake/snowflake-arctic-embed-m")

In [ ]:

Copied!

document_embeddings = model.encode([university_info])
document_embeddings = model.encode([university_info])

In [ ]:

Copied!

import chromadb

chroma_client = chromadb.Client()
vector_store = chroma_client.get_or_create_collection(name="Universities")
import chromadb

chroma_client = chromadb.Client()
vector_store = chroma_client.get_or_create_collection(name="Universities")

Add the university_info to the embedding database.

In [ ]:

Copied!

vector_store.add(
    "uni_info", documents=university_info, embeddings=document_embeddings
)
vector_store.add(
    "uni_info", documents=university_info, embeddings=document_embeddings
)

Build RAG from scratch¶

Build a custom RAG from scratch, and add TruLens custom instrumentation.

In [ ]:

Copied!

from trulens.core import TruSession
from trulens.apps.custom import instrument

session = TruSession()
session.reset_database()
from trulens.core import TruSession
from trulens.apps.custom import instrument

session = TruSession()
session.reset_database()

In [ ]:

Copied!





import json


class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = vector_store.query(
            query_embeddings=model.encode([query], prompt_name="query"),
            n_results=2,
        )
        return results["documents"]

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """

        def escape_string_for_sql(input_string):
            escaped_string = input_string.replace("\\", "\\\\")
            escaped_string = escaped_string.replace("'", "''")
            return escaped_string

        prompt = escape_string_for_sql(f"""
         We have provided context information below. 
            {context_str}
            Given this information, please answer the question: {query}
        """)

        cursor = snowflake_session.connection.cursor()
        try:
            # We use `snowflake.connector.cursor.SnowflakeCursor::execute` to
            # execute the query instead of
            # `snowflake.snowpark.session.Session::sql` since the latter is not
            # thread-safe.
            res = cursor.execute(f"""SELECT SNOWFLAKE.CORTEX.COMPLETE(
                'snowflake-arctic',
                [
                {{'role': 'user', 'content': '{prompt}'}}
                ], {{
                    'temperature': 0
                }}
                )""").fetchall()
        finally:
            cursor.close()

        if len(res) == 0:
            return "No response from cortex function"
        completion = json.loads(res[0][0])["choices"][0]["messages"]
        print("full response from cortex function:")
        print(res)
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion


rag = RAG_from_scratch()
import json


class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        results = vector_store.query(
            query_embeddings=model.encode([query], prompt_name="query"),
            n_results=2,
        )
        return results["documents"]

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """

        def escape_string_for_sql(input_string):
            escaped_string = input_string.replace("\\", "\\\\")
            escaped_string = escaped_string.replace("'", "''")
            return escaped_string

        prompt = escape_string_for_sql(f"""
         We have provided context information below. 
            {context_str}
            Given this information, please answer the question: {query}
        """)

        cursor = snowflake_session.connection.cursor()
        try:
            # We use `snowflake.connector.cursor.SnowflakeCursor::execute` to
            # execute the query instead of
            # `snowflake.snowpark.session.Session::sql` since the latter is not
            # thread-safe.
            res = cursor.execute(f"""SELECT SNOWFLAKE.CORTEX.COMPLETE(
                'snowflake-arctic',
                [
                {{'role': 'user', 'content': '{prompt}'}}
                ], {{
                    'temperature': 0
                }}
                )""").fetchall()
        finally:
            cursor.close()

        if len(res) == 0:
            return "No response from cortex function"
        completion = json.loads(res[0][0])["choices"][0]["messages"]
        print("full response from cortex function:")
        print(res)
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        completion = self.generate_completion(query, context_str)
        return completion


rag = RAG_from_scratch()

Dev Note as of June 2024:¶

Alternatively, we can use Cortex's Python API (documentation) directly to have cleaner interface and avoid constructing SQL commands ourselves. The reason we are invoking the SQL function directly via cursor.execute() is that the response from Cortex's Python API is still experimental and not as feature-rich as the one from SQL function as of the time of writing. i.e. inconsistency issues with structured json outputs and missing usage information have been observed, lack of support for advanced chat-style (multi-message), etc. Below is a minimal example of using Python API instead.

In [ ]:

Copied!





# from snowflake.cortex import Complete
# def complete(user_query) -> str:
#     completion = Complete(
#         model="snowflake-arctic",
#         prompt=f"[FILL IN SYSTEM PROMPTS IF NEEDED ]{user_query}",
#         session=snowflake_session,
#     )
#     return completion
# from snowflake.cortex import Complete
# def complete(user_query) -> str:
#     completion = Complete(
#         model="snowflake-arctic",
#         prompt=f"[FILL IN SYSTEM PROMPTS IF NEEDED ]{user_query}",
#         session=snowflake_session,
#     )
#     return completion

Set up feedback functions.¶

Here we'll use groundedness, answer relevance and context relevance to detect hallucination.

In [ ]:

Copied!





import numpy as np
import snowflake.connector
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.cortex import Cortex
import snowflake.connector


# Create a Snowflake connection
snowflake_connection = snowflake.connector.connect(
    **connection_params
)
provider = Cortex(
    snowflake_connection,
    model_engine="snowflake-arctic",
)


# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

f_coherence = Feedback(
    provider.coherence_with_cot_reasons, name="coherence"
).on_output()

import numpy as np
import snowflake.connector
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.cortex import Cortex
import snowflake.connector


# Create a Snowflake connection
snowflake_connection = snowflake.connector.connect(
    **connection_params
)
provider = Cortex(
    snowflake_connection,
    model_engine="snowflake-arctic",
)


# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(Select.RecordCalls.retrieve.rets.collect())
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
    .on(Select.RecordCalls.retrieve.args.query)
    .on_output()
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on(Select.RecordCalls.retrieve.args.query)
    .on(Select.RecordCalls.retrieve.rets.collect())
    .aggregate(np.mean)
)

f_coherence = Feedback(
    provider.coherence_with_cot_reasons, name="coherence"
).on_output()

Construct the app¶

Wrap the custom RAG with TruCustomApp, add list of feedbacks for eval

In [ ]:

Copied!





from trulens.apps.custom import TruCustomApp

tru_rag = TruCustomApp(
    rag,
    app_name="RAG",
    app_version="v1",
    feedbacks=[
        f_groundedness,
        f_answer_relevance,
        f_context_relevance,
        f_coherence,
    ],
)
from trulens.apps.custom import TruCustomApp

tru_rag = TruCustomApp(
    rag,
    app_name="RAG",
    app_version="v1",
    feedbacks=[
        f_groundedness,
        f_answer_relevance,
        f_context_relevance,
        f_coherence,
    ],
)

In [ ]:

Copied!

session.reset_database()
session.reset_database()

Run the app¶

Use tru_rag as a context manager for the custom RAG-from-scratch app.

In [ ]:

Copied!

with tru_rag as recording:
    resp = rag.query("When is University of Washington founded?")
with tru_rag as recording:
    resp = rag.query("When is University of Washington founded?")

In [ ]:

Copied!

resp
resp

In [ ]:

Copied!

session.get_leaderboard(app_ids=[])
session.get_leaderboard(app_ids=[])

In [ ]:

Copied!

from trulens.dashboard import run_dashboard

run_dashboard(session)
from trulens.dashboard import run_dashboard

run_dashboard(session)