In [1]:
import torch
from src.vdr import Retriever
In [2]:
# Initialize the retriever
vdr = Retriever.from_pretrained("vsearch/vdr-nq")
vdr = vdr.to("cuda")
In [3]:
# Define a query and a list of passages
query = "What are the benefits of drinking green tea?"
passages = [
    "Green tea is known for its antioxidant properties, which can help protect cells from damage caused by free radicals. It also contains catechins, which have been shown to have anti-inflammatory and anti-cancer effects. Drinking green tea regularly may help improve overall health and well-being.",
    "The history of coffee dates back to ancient times, with its origins in Ethiopia. Coffee is one of the most popular beverages in the world and is enjoyed by millions of people every day.",
    "Yoga is a mind-body practice that combines physical postures, breathing exercises, and meditation. It has been practiced for thousands of years and is known for its many health benefits, including stress reduction and improved flexibility.",
    "Eating a balanced diet that includes a variety of fruits, vegetables, whole grains, and lean proteins is essential for maintaining good health. It provides the body with the nutrients it needs to function properly and can help prevent chronic diseases."
]
In [4]:
# Embed and compute relevance
q_emb = vdr.encoder_q.embed(query)  # Shape: [1, V]
p_emb = vdr.encoder_p.embed(passages)  # Shape: [4, V]
scores = q_emb @ p_emb.t()
print(scores)
tensor([[91.1257, 17.6930, 13.0358, 12.4576]], device='cuda:0')

Inspectation¶

In [5]:
# Disentangle the query emebdding
disentanglement = vdr.encoder_q.dst(query, k=768, visual=True)

Retrieval reasoning¶

In [6]:
# Explain query-passage relevance
explanation = vdr.explain(q=query, p=passages[0], k=768, visual=True)