Hybrid Colpali RAG
In [ ]:
Copied!
!git clone https://github.com/adithya-s-k/VARAG
%cd VARAG
%pwd
!git clone https://github.com/adithya-s-k/VARAG
%cd VARAG
%pwd
In [ ]:
Copied!
!apt-get update && apt-get install -y && apt-get install -y poppler-utils
!apt-get update && apt-get install -y && apt-get install -y poppler-utils
In [ ]:
Copied!
%pip install -e .
%pip install -e .
In [ ]:
Copied!
from sentence_transformers import SentenceTransformer
from varag.rag import HybridColpaliRAG
from varag.llms import OpenAI
from varag.utils import get_model_colpali
import lancedb
import os
from dotenv import load_dotenv
os.environ["OPENAI_API_KEY"] = "api-key"
load_dotenv()
from sentence_transformers import SentenceTransformer
from varag.rag import HybridColpaliRAG
from varag.llms import OpenAI
from varag.utils import get_model_colpali
import lancedb
import os
from dotenv import load_dotenv
os.environ["OPENAI_API_KEY"] = "api-key"
load_dotenv()
In [ ]:
Copied!
shared_db = lancedb.connect("~/shared_rag_db")
model, processor = get_model_colpali("vidore/colpali-v1.2")
embedding_model = SentenceTransformer("jinaai/jina-clip-v1", trust_remote_code=True)
colpali_hybrid_rag = HybridColpaliRAG(
colpali_model=model,
colpali_processor=processor,
db=shared_db,
image_embedding_model=embedding_model,
table_name="hybridColpaliDemo",
)
vlm = OpenAI()
shared_db = lancedb.connect("~/shared_rag_db")
model, processor = get_model_colpali("vidore/colpali-v1.2")
embedding_model = SentenceTransformer("jinaai/jina-clip-v1", trust_remote_code=True)
colpali_hybrid_rag = HybridColpaliRAG(
colpali_model=model,
colpali_processor=processor,
db=shared_db,
image_embedding_model=embedding_model,
table_name="hybridColpaliDemo",
)
vlm = OpenAI()
In [ ]:
Copied!
colpali_hybrid_rag.index(
"./examples/data",
overwrite=False,
recursive=False,
verbose=True
)
colpali_hybrid_rag.index(
"./examples/data",
overwrite=False,
recursive=False,
verbose=True
)
In [ ]:
Copied!
query = "What is Colpali"
num_results = 5
results = colpali_hybrid_rag.search(query, k=5)
images = [result["image"] for result in results]
# Display the images
for i, img in enumerate(images, 1):
print(f"Image {i}:")
display(img)
query = "What is Colpali"
num_results = 5
results = colpali_hybrid_rag.search(query, k=5)
images = [result["image"] for result in results]
# Display the images
for i, img in enumerate(images, 1):
print(f"Image {i}:")
display(img)
In [ ]:
Copied!
from IPython.display import display, Markdown
response = vlm.query(query, images, max_tokens=1000)
display(Markdown(response))
from IPython.display import display, Markdown
response = vlm.query(query, images, max_tokens=1000)
display(Markdown(response))
Run Gradio Demo¶
In [ ]:
Copied!
%cd examples
!python hybridColpaliDemo.py --share
%cd examples
!python hybridColpaliDemo.py --share