From 63efca03f8ccdf4dae614c5df6834a55e4030bcc Mon Sep 17 00:00:00 2001 From: Jarno Date: Sun, 18 Jan 2026 14:49:19 +0200 Subject: [PATCH] argument parsing, logging etc. --- .../__init__.py | 0 src/AiAgentScraper/agent.py | 40 ++++++++++++++++++ src/AiAgentScraper/geminiagent.py | 0 src/AiAgentScraper/logger.py | 16 +++++++ src/AiAgentScraper/main.py | 42 +++++++++++++++++++ src/AiAgentScraper/ollamaagent.py | 42 +++++++++++++++++++ .../scrape.py | 15 ++++--- .../tools.py | 0 src/python-scraper/agent.py | 38 ----------------- src/python-scraper/main.py | 13 ------ 10 files changed, 149 insertions(+), 57 deletions(-) rename src/{python-scraper => AiAgentScraper}/__init__.py (100%) create mode 100644 src/AiAgentScraper/agent.py create mode 100644 src/AiAgentScraper/geminiagent.py create mode 100644 src/AiAgentScraper/logger.py create mode 100644 src/AiAgentScraper/main.py create mode 100644 src/AiAgentScraper/ollamaagent.py rename src/{python-scraper => AiAgentScraper}/scrape.py (72%) rename src/{python-scraper => AiAgentScraper}/tools.py (100%) delete mode 100644 src/python-scraper/agent.py delete mode 100644 src/python-scraper/main.py diff --git a/src/python-scraper/__init__.py b/src/AiAgentScraper/__init__.py similarity index 100% rename from src/python-scraper/__init__.py rename to src/AiAgentScraper/__init__.py diff --git a/src/AiAgentScraper/agent.py b/src/AiAgentScraper/agent.py new file mode 100644 index 0000000..03e0685 --- /dev/null +++ b/src/AiAgentScraper/agent.py @@ -0,0 +1,40 @@ +import os +from enum import Enum +from logger import get_logger +import ollama + +from tools import available_functions +from ollamaagent import OllamaAgent + + +class Backend(Enum): + OLLAMA = "ollama" + GEMINI = "gemini" + + def __str__(self): + return self.value + + +logger = get_logger(__name__) + +tools = [available_functions["fetch_web_page"].spec] + + +def run_agent(prompt: str, backend: Backend, model: str): + if backend == Backend.OLLAMA: + agent = OllamaAgent(model=model, tools=tools) + return agent.prompt(message=prompt) + else: + raise NotImplementedError + + +def execute_function(tool): + function_name = tool["function"]["name"] + args = tool["function"]["arguments"] + logger.info(f"Agent is calling: {function_name}({args})") + f = available_functions[function_name].function + return { + "role": "tool", + "content": f(**args), + "name": function_name, + } diff --git a/src/AiAgentScraper/geminiagent.py b/src/AiAgentScraper/geminiagent.py new file mode 100644 index 0000000..e69de29 diff --git a/src/AiAgentScraper/logger.py b/src/AiAgentScraper/logger.py new file mode 100644 index 0000000..2e48f9c --- /dev/null +++ b/src/AiAgentScraper/logger.py @@ -0,0 +1,16 @@ +import logging + +ROOT_LOGGER = "ai" +FORMAT = '%(asctime)s|%(levelname)-5s|%(name)s| %(message)s' + +def configure_logger(level: str): + logger = logging.getLogger(ROOT_LOGGER) + logger.setLevel(logging.getLevelNamesMapping()[level]) + console_handler = logging.StreamHandler() + formatter = logging.Formatter(FORMAT) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + +def get_logger(name: str|None) -> logging.Logger: + if name: return logging.getLogger(f"{ROOT_LOGGER}.{name}") + return logging.getLogger(ROOT_LOGGER) diff --git a/src/AiAgentScraper/main.py b/src/AiAgentScraper/main.py new file mode 100644 index 0000000..494ba32 --- /dev/null +++ b/src/AiAgentScraper/main.py @@ -0,0 +1,42 @@ +import sys +import argparse +import logging +from logger import get_logger, configure_logger +from agent import run_agent, Backend + +parser = argparse.ArgumentParser(description="AI scraper agent") +parser.add_argument("prompt", nargs=argparse.REMAINDER, help="Prompt for the agent") +parser.add_argument( + "-b", + "--backend", + type=Backend, + choices=list(Backend), + default=Backend.OLLAMA, + help=f"LLM backend to use.", +) +parser.add_argument( + "-l", + "--log", + choices=list(logging.getLevelNamesMapping().keys()), + default="DEBUG", + help="Set logging level for the agent", +) +parser.add_argument( + "-m", + "--model", + help="LLM model name, like ministral-3:8b or gemini-3-flash-preview", +) + + +def main(): + args = parser.parse_args() + configure_logger(args.log) + logger = get_logger(None) + prompt = " ".join(args.prompt) + logger.debug(f'Prompt: "{prompt}"') + logger.info(f"Backend: {args.backend}") + logger.info(run_agent(prompt=prompt, backend=args.backend, model=args.model)) + + +if __name__ == "__main__": + main() diff --git a/src/AiAgentScraper/ollamaagent.py b/src/AiAgentScraper/ollamaagent.py new file mode 100644 index 0000000..b1b391d --- /dev/null +++ b/src/AiAgentScraper/ollamaagent.py @@ -0,0 +1,42 @@ +import ollama + +from logger import get_logger +from agent import execute_function + +DEFAULT_MODEL = "ministral-3:8b" + +logger = get_logger(__name__) + +system_prompt = { + "role": "system", + "content": "You are an agent and you are allowed to fetch web pages if the user requests using the given tools.", +} + +class OllamaAgent: + + def __init__(self, model, tools, max_loop=10): + if model: + self.model = model + else: + self.model = DEFAULT_MODEL + logger.info("Model: {self.model}") + self.tools = tools + self.max_loop = max_loop + + def prompt(message): + messages = [ + system_prompt, + {"role": "user", "content": message}, + ] + loops = 0 + response = ollama.chat(model=self.model, messages=messages, tools=self.tools) + rmessage = response["message"] + while "tool_calls" in rmessage and loops < max_loop: + max_loop += 1 + logger.debug(f"Tool calls: {len(rmessage["tool_calls"])}") + for tool in rmessage["tool_calls"]: + messages.append(execute_function(tool)) + response = ollama.chat(model=self.model, messages=messages, tools=self.tools) + rmessage = response["message"] + return rmessage["content"] + diff --git a/src/python-scraper/scrape.py b/src/AiAgentScraper/scrape.py similarity index 72% rename from src/python-scraper/scrape.py rename to src/AiAgentScraper/scrape.py index a80f5ed..04a1291 100644 --- a/src/python-scraper/scrape.py +++ b/src/AiAgentScraper/scrape.py @@ -1,24 +1,27 @@ import requests from bs4 import BeautifulSoup from markdownify import markdownify as md +from logger import get_logger + +logger = get_logger(__name__) + def fetch_web_page(url: str) -> str: return f"{fetch_page(url)}" + def fetch_page(url: str) -> str: - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'} + headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: return f"Error fetching URL: {e}" - soup = BeautifulSoup(response.text, 'lxml') + soup = BeautifulSoup(response.text, "lxml") for junk in soup(["script", "style", "nav"]): junk.decompose() text = md(str(soup), heading_style="ATX") - print("=========") - print(text) - print("=========") - + logger.debug(text) + return text diff --git a/src/python-scraper/tools.py b/src/AiAgentScraper/tools.py similarity index 100% rename from src/python-scraper/tools.py rename to src/AiAgentScraper/tools.py diff --git a/src/python-scraper/agent.py b/src/python-scraper/agent.py deleted file mode 100644 index 8383744..0000000 --- a/src/python-scraper/agent.py +++ /dev/null @@ -1,38 +0,0 @@ -import ollama -import os -from tools import available_functions - -tools = [ - available_functions["fetch_web_page"].spec -] - -system_prompt = {"role": "system", "content": "You are an agent and you are allowed to fetch web pages if the user requests using the given tools."} - -def run_agent(prompt: str, model: str = "ministral-3:8b", max_loop: int = 10): - messages = [ - system_prompt, - {"role": "user", "content": prompt}, - ] - loops = 0 - response = ollama.chat(model=model, messages=messages, tools=tools) - rmessage = response["message"] - while "tool_calls" in rmessage and loops < max_loop: - max_loop += 1 - print(f"Tool calls: {len(rmessage["tool_calls"])}") - for tool in rmessage["tool_calls"]: - messages.append(execute_function(tool)) - response = ollama.chat(model=model, messages=messages, tools=tools) - rmessage = response["message"] - return rmessage["content"] - - -def execute_function(tool): - function_name = tool["function"]["name"] - args = tool["function"]["arguments"] - print(f"Agent is calling: {function_name}({args})") - f = available_functions[function_name].function - return { - "role": "tool", - "content": f(**args), - "name": function_name, - } diff --git a/src/python-scraper/main.py b/src/python-scraper/main.py deleted file mode 100644 index d64fab8..0000000 --- a/src/python-scraper/main.py +++ /dev/null @@ -1,13 +0,0 @@ -import sys -from agent import run_agent -import argparse - -parser = argparse.ArgumentParser(description="AI scraper agent") -parser.add_argument("prompt", nargs="+", help="Prompt for the agent") - -def main(): - args = parser.parse_args() - print(run_agent(" ".join(args.prompt))) - -if __name__ == "__main__": - main()