Vision RAG
In [ ]:
Copied!
!git clone https://github.com/adithya-s-k/VARAG
%cd VARAG
%pwd
!git clone https://github.com/adithya-s-k/VARAG
%cd VARAG
%pwd
In [ ]:
Copied!
!apt-get update && apt-get install -y && apt-get install -y poppler-utils
!apt-get update && apt-get install -y && apt-get install -y poppler-utils
In [ ]:
Copied!
%pip install -e .
%pip install -e .
In [ ]:
Copied!
from sentence_transformers import SentenceTransformer
from varag.rag import VisionRAG
from varag.vlms import OpenAI
import lancedb
import os
from dotenv import load_dotenv
os.environ["OPENAI_API_KEY"] = "api-key"
load_dotenv()
from sentence_transformers import SentenceTransformer
from varag.rag import VisionRAG
from varag.vlms import OpenAI
import lancedb
import os
from dotenv import load_dotenv
os.environ["OPENAI_API_KEY"] = "api-key"
load_dotenv()
In [ ]:
Copied!
shared_db = lancedb.connect("~/shared_rag_db")
# Initialize VisionRAG and VLM
embedding_model = SentenceTransformer("jinaai/jina-clip-v1", trust_remote_code=True)
vision_rag = VisionRAG(
image_embedding_model=embedding_model,
db=shared_db,
table_name="visionDemo",
)
vlm = OpenAI()
shared_db = lancedb.connect("~/shared_rag_db")
# Initialize VisionRAG and VLM
embedding_model = SentenceTransformer("jinaai/jina-clip-v1", trust_remote_code=True)
vision_rag = VisionRAG(
image_embedding_model=embedding_model,
db=shared_db,
table_name="visionDemo",
)
vlm = OpenAI()
In [ ]:
Copied!
vision_rag.index(
"./examples/data",
overwrite=False,
recursive=False,
verbose=True
)
vision_rag.index(
"./examples/data",
overwrite=False,
recursive=False,
verbose=True
)
In [ ]:
Copied!
query = "What is Colpali"
num_results = 5
results = vision_rag.search(query, k=5)
images = [result["image"] for result in results]
# Display the images
for i, img in enumerate(images, 1):
print(f"Image {i}:")
display(img)
query = "What is Colpali"
num_results = 5
results = vision_rag.search(query, k=5)
images = [result["image"] for result in results]
# Display the images
for i, img in enumerate(images, 1):
print(f"Image {i}:")
display(img)
In [ ]:
Copied!
from IPython.display import display, Markdown
response = vlm.query(query, images, max_tokens=1000)
display(Markdown(response))
from IPython.display import display, Markdown
response = vlm.query(query, images, max_tokens=1000)
display(Markdown(response))
Run Gradio Demo¶
In [ ]:
Copied!
%cd examples
!python visionDemo.py --share
%cd examples
!python visionDemo.py --share