Reputation: 1
I'm expirementing with llama 3.2 vision 11B and I'm having a bit of a rough time attaching an image, wether it's local or online, to the chat. Here's my Python code:
import io
import base64
import requests
from PIL import Image
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
# Define exit keywords
EXIT_KEYWORDS = ["exit", "quit", "stop", "bye", "end"]
def encode_image_to_base64(image_path: str) -> str:
"""Encodes the image at the given path to a Base64 string."""
try:
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
return encoded_string
except FileNotFoundError:
print(f"Error: The file {image_path} was not found.")
return None
except Exception as e:
print(f"Error: Could not process the image at {image_path}. Reason: {e}")
return None
def fetch_image_from_url(image_url: str) -> str:
"""Fetches an image from a URL and encodes it as Base64."""
try:
response = requests.get(image_url)
response.raise_for_status() # Raise an exception for HTTP errors
image = Image.open(io.BytesIO(response.content))
buffered = io.BytesIO()
image.save(buffered, format=image.format) # Preserve original format
return base64.b64encode(buffered.getvalue()).decode("utf-8")
except Exception as e:
print(f"Error: Could not fetch the image from URL. Reason: {e}")
return None
def handle_conversation():
"""Handles the conversation loop with the user."""
context = ""
model = OllamaLLM(model="llama3.2-vision")
prompt = ChatPromptTemplate.from_template("""
Answer the question below. If an image is provided, analyze it and provide relevant information.
Here is the conversation history: {context}
Question: {question}
Image (if any): {image_data}
Answer:
""")
chain = prompt | model
while True:
user_input = input("You: ")
if user_input.lower() in EXIT_KEYWORDS:
print("Goodbye!")
break
image_data = None
text_input = user_input.strip()
# Check if user input includes "image:"
if "image:" in user_input:
try:
text_input, image_reference = user_input.split("image:", 1)
text_input = text_input.strip()
image_reference = image_reference.strip()
if image_reference.startswith("http://") or image_reference.startswith("https://"):
# Handle online image URL
image_data = fetch_image_from_url(image_reference)
else:
# Handle local image path
image_data = encode_image_to_base64(image_reference)
if not image_data:
print("Error: Unable to process the provided image.")
continue
except ValueError:
print("Error: Invalid input format. Use 'Your question here image: [path_to_image or URL]'.")
continue
# Invoke the model with the current context, question, and image data
result = chain.invoke({"context": context, "question": text_input, "image_data": image_data})
print(f"Bot: {result}")
# Update the context with the latest user input and bot response
context += f"\nUser: {text_input}\nAI: {result}"
if __name__ == "__main__":
handle_conversation()
The lack of proper documentation makes this harder. Any help/resources are much appreciated.
Simple text prompting works just fine. It's the image attaching that I'm having trouble with.
Upvotes: 0
Views: 248