How to implement bind_tools function for custom LLM in langchain

Question

I am very new to langchain. I am trying to build a agent that uses a custom or local llm, and should have the tool calling ability and memory. I am using create_tool_calling_agent() from langchain docs

But I am getting the error -

Traceback (most recent call last):
  File "/home/kundeshwar/Abhay/agent4.py", line 69, in 
    agent = create_tool_calling_agent(model, tools, prompt)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/kundeshwar/Abhay/langchain/lib/python3.12/site-packages/langchain/agents/tool_calling_agent/base.py", line 95, in create_tool_calling_agent
    raise ValueError(
ValueError: This function requires a .bind_tools method be implemented on the LLM.

I used the below code for creating my custom LLM class as mentioned in langchain docs. I am using llama 3 8b instruct model from hugging face as llm.

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

def llama3_instruct(prompt):

    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(model.device)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = model.generate(
        input_ids,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )
    response = outputs[0][input_ids.shape[-1]:]

    return tokenizer.decode(response, skip_special_tokens=True)
    

class CustomLLM(LLM):

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Run the LLM on the given input.

        Override this method to implement the LLM logic.

        Args:
            prompt: The prompt to generate from.
            stop: Stop words to use when generating. Model output is cut off at the
                first occurrence of any of the stop substrings.
                If stop tokens are not supported consider raising NotImplementedError.
            run_manager: Callback manager for the run.
            **kwargs: Arbitrary additional keyword arguments. These are usually passed
                to the model provider API call.

        Returns:
            The model output as a string. Actual completions SHOULD NOT include the prompt.
        """
        
        res = llama3_instruct(prompt)
        return res

    def _stream(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[GenerationChunk]:
        """Stream the LLM on the given prompt.

        This method should be overridden by subclasses that support streaming.

        If not implemented, the default behavior of calls to stream will be to
        fallback to the non-streaming version of the model and return
        the output as a single chunk.

        Args:
            prompt: The prompt to generate from.
            stop: Stop words to use when generating. Model output is cut off at the
                first occurrence of any of these substrings.
            run_manager: Callback manager for the run.
            **kwargs: Arbitrary additional keyword arguments. These are usually passed
                to the model provider API call.

        Returns:
            An iterator of GenerationChunks.
        """
        res = self._call(prompt)

        for char in res:
            chunk = GenerationChunk(text=char)
            if run_manager:
                run_manager.on_llm_new_token(chunk.text, chunk=chunk)

            yield chunk

        

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        """Return a dictionary of identifying parameters."""
        return {
            "model_name": "CustomChatModel",
        }

    @property
    def _llm_type(self) -> str:
        """Get the type of language model used by this chat model"""
        return "custom"

Now in different program I am importing this class and using this as llm and creating the agent, for which the code is given below -

os.environ["TAVILY_API_KEY"] = ""
tools = [TavilySearchResults(max_results=2)]

llm = CustomLLM()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. You should use tools only if needed."
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
)


agent = create_tool_calling_agent(llm, tools, prompt)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
agent_executor = AgentExecutor(agent=agent, tools=tools, handle_parsing_errors=True, memory=memory, verbose=True)

while True:
    user_input = input("User: ")
    chat_history = memory.buffer_as_messages
    
    response = agent_executor.invoke({
        "input": user_input,
        "chat_history": chat_history,
    })
    print("Agent:", response['output'])

Now I want to ask how to implement the bind_tools method in my custom LLM class. Also any idea how to implement the _stream() method in my custom LLM class, because i guess the agent_executor.invoke() use that instead of _call()

How to implement bind_tools function for custom LLM in langchain

Answers (0)

Related Questions