langchain_ollama attach image to prompt

Question

I'm expirementing with llama 3.2 vision 11B and I'm having a bit of a rough time attaching an image, wether it's local or online, to the chat. Here's my Python code:

import io
import base64
import requests
from PIL import Image
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate

# Define exit keywords
EXIT_KEYWORDS = ["exit", "quit", "stop", "bye", "end"]

def encode_image_to_base64(image_path: str) -> str:
    """Encodes the image at the given path to a Base64 string."""
    try:
        with open(image_path, "rb") as image_file:
            encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
            return encoded_string
    except FileNotFoundError:
        print(f"Error: The file {image_path} was not found.")
        return None
    except Exception as e:
        print(f"Error: Could not process the image at {image_path}. Reason: {e}")
        return None

def fetch_image_from_url(image_url: str) -> str:
    """Fetches an image from a URL and encodes it as Base64."""
    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        image = Image.open(io.BytesIO(response.content))
        buffered = io.BytesIO()
        image.save(buffered, format=image.format)  # Preserve original format
        return base64.b64encode(buffered.getvalue()).decode("utf-8")
    except Exception as e:
        print(f"Error: Could not fetch the image from URL. Reason: {e}")
        return None

def handle_conversation():
    """Handles the conversation loop with the user."""
    context = ""
    model = OllamaLLM(model="llama3.2-vision")
    prompt = ChatPromptTemplate.from_template("""
        Answer the question below. If an image is provided, analyze it and provide relevant information.

        Here is the conversation history: {context}

        Question: {question}

        Image (if any): {image_data}

        Answer:
    """)
    chain = prompt | model

    while True:
        user_input = input("You: ")
        if user_input.lower() in EXIT_KEYWORDS:
            print("Goodbye!")
            break
        image_data = None
        text_input = user_input.strip()

        # Check if user input includes "image:"
        if "image:" in user_input:
            try:
                text_input, image_reference = user_input.split("image:", 1)
                text_input = text_input.strip()
                image_reference = image_reference.strip()

                if image_reference.startswith("http://") or image_reference.startswith("https://"):
                    # Handle online image URL
                    image_data = fetch_image_from_url(image_reference)
                else:
                    # Handle local image path
                    image_data = encode_image_to_base64(image_reference)

                if not image_data:
                    print("Error: Unable to process the provided image.")
                    continue
            except ValueError:
                print("Error: Invalid input format. Use 'Your question here image: [path_to_image or URL]'.")
                continue

        # Invoke the model with the current context, question, and image data
        result = chain.invoke({"context": context, "question": text_input, "image_data": image_data})
        print(f"Bot: {result}")

        # Update the context with the latest user input and bot response
        context += f"
User: {text_input}
AI: {result}"

if __name__ == "__main__":
    handle_conversation()

The lack of proper documentation makes this harder. Any help/resources are much appreciated.

Simple text prompting works just fine. It's the image attaching that I'm having trouble with.

langchain_ollama attach image to prompt

Answers (0)

Related Questions