argument parsing, logging etc.
This commit is contained in:
40
src/AiAgentScraper/agent.py
Normal file
40
src/AiAgentScraper/agent.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import os
|
||||
from enum import Enum
|
||||
from logger import get_logger
|
||||
import ollama
|
||||
|
||||
from tools import available_functions
|
||||
from ollamaagent import OllamaAgent
|
||||
|
||||
|
||||
class Backend(Enum):
|
||||
OLLAMA = "ollama"
|
||||
GEMINI = "gemini"
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
tools = [available_functions["fetch_web_page"].spec]
|
||||
|
||||
|
||||
def run_agent(prompt: str, backend: Backend, model: str):
|
||||
if backend == Backend.OLLAMA:
|
||||
agent = OllamaAgent(model=model, tools=tools)
|
||||
return agent.prompt(message=prompt)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def execute_function(tool):
|
||||
function_name = tool["function"]["name"]
|
||||
args = tool["function"]["arguments"]
|
||||
logger.info(f"Agent is calling: {function_name}({args})")
|
||||
f = available_functions[function_name].function
|
||||
return {
|
||||
"role": "tool",
|
||||
"content": f(**args),
|
||||
"name": function_name,
|
||||
}
|
||||
0
src/AiAgentScraper/geminiagent.py
Normal file
0
src/AiAgentScraper/geminiagent.py
Normal file
16
src/AiAgentScraper/logger.py
Normal file
16
src/AiAgentScraper/logger.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import logging
|
||||
|
||||
ROOT_LOGGER = "ai"
|
||||
FORMAT = '%(asctime)s|%(levelname)-5s|%(name)s| %(message)s'
|
||||
|
||||
def configure_logger(level: str):
|
||||
logger = logging.getLogger(ROOT_LOGGER)
|
||||
logger.setLevel(logging.getLevelNamesMapping()[level])
|
||||
console_handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(FORMAT)
|
||||
console_handler.setFormatter(formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
def get_logger(name: str|None) -> logging.Logger:
|
||||
if name: return logging.getLogger(f"{ROOT_LOGGER}.{name}")
|
||||
return logging.getLogger(ROOT_LOGGER)
|
||||
42
src/AiAgentScraper/main.py
Normal file
42
src/AiAgentScraper/main.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
from logger import get_logger, configure_logger
|
||||
from agent import run_agent, Backend
|
||||
|
||||
parser = argparse.ArgumentParser(description="AI scraper agent")
|
||||
parser.add_argument("prompt", nargs=argparse.REMAINDER, help="Prompt for the agent")
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--backend",
|
||||
type=Backend,
|
||||
choices=list(Backend),
|
||||
default=Backend.OLLAMA,
|
||||
help=f"LLM backend to use.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--log",
|
||||
choices=list(logging.getLevelNamesMapping().keys()),
|
||||
default="DEBUG",
|
||||
help="Set logging level for the agent",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
help="LLM model name, like ministral-3:8b or gemini-3-flash-preview",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
configure_logger(args.log)
|
||||
logger = get_logger(None)
|
||||
prompt = " ".join(args.prompt)
|
||||
logger.debug(f'Prompt: "{prompt}"')
|
||||
logger.info(f"Backend: {args.backend}")
|
||||
logger.info(run_agent(prompt=prompt, backend=args.backend, model=args.model))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
42
src/AiAgentScraper/ollamaagent.py
Normal file
42
src/AiAgentScraper/ollamaagent.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import ollama
|
||||
|
||||
from logger import get_logger
|
||||
from agent import execute_function
|
||||
|
||||
DEFAULT_MODEL = "ministral-3:8b"
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
system_prompt = {
|
||||
"role": "system",
|
||||
"content": "You are an agent and you are allowed to fetch web pages if the user requests using the given tools.",
|
||||
}
|
||||
|
||||
class OllamaAgent:
|
||||
|
||||
def __init__(self, model, tools, max_loop=10):
|
||||
if model:
|
||||
self.model = model
|
||||
else:
|
||||
self.model = DEFAULT_MODEL
|
||||
logger.info("Model: {self.model}")
|
||||
self.tools = tools
|
||||
self.max_loop = max_loop
|
||||
|
||||
def prompt(message):
|
||||
messages = [
|
||||
system_prompt,
|
||||
{"role": "user", "content": message},
|
||||
]
|
||||
loops = 0
|
||||
response = ollama.chat(model=self.model, messages=messages, tools=self.tools)
|
||||
rmessage = response["message"]
|
||||
while "tool_calls" in rmessage and loops < max_loop:
|
||||
max_loop += 1
|
||||
logger.debug(f"Tool calls: {len(rmessage["tool_calls"])}")
|
||||
for tool in rmessage["tool_calls"]:
|
||||
messages.append(execute_function(tool))
|
||||
response = ollama.chat(model=self.model, messages=messages, tools=self.tools)
|
||||
rmessage = response["message"]
|
||||
return rmessage["content"]
|
||||
|
||||
@@ -1,24 +1,27 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from markdownify import markdownify as md
|
||||
from logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def fetch_web_page(url: str) -> str:
|
||||
return f"<webpage_content>{fetch_page(url)}</webpage_content>"
|
||||
|
||||
|
||||
def fetch_page(url: str) -> str:
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
|
||||
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response.raise_for_status()
|
||||
except Exception as e:
|
||||
return f"Error fetching URL: {e}"
|
||||
soup = BeautifulSoup(response.text, 'lxml')
|
||||
soup = BeautifulSoup(response.text, "lxml")
|
||||
for junk in soup(["script", "style", "nav"]):
|
||||
junk.decompose()
|
||||
text = md(str(soup), heading_style="ATX")
|
||||
|
||||
print("=========")
|
||||
print(text)
|
||||
print("=========")
|
||||
logger.debug(text)
|
||||
|
||||
return text
|
||||
@@ -1,38 +0,0 @@
|
||||
import ollama
|
||||
import os
|
||||
from tools import available_functions
|
||||
|
||||
tools = [
|
||||
available_functions["fetch_web_page"].spec
|
||||
]
|
||||
|
||||
system_prompt = {"role": "system", "content": "You are an agent and you are allowed to fetch web pages if the user requests using the given tools."}
|
||||
|
||||
def run_agent(prompt: str, model: str = "ministral-3:8b", max_loop: int = 10):
|
||||
messages = [
|
||||
system_prompt,
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
loops = 0
|
||||
response = ollama.chat(model=model, messages=messages, tools=tools)
|
||||
rmessage = response["message"]
|
||||
while "tool_calls" in rmessage and loops < max_loop:
|
||||
max_loop += 1
|
||||
print(f"Tool calls: {len(rmessage["tool_calls"])}")
|
||||
for tool in rmessage["tool_calls"]:
|
||||
messages.append(execute_function(tool))
|
||||
response = ollama.chat(model=model, messages=messages, tools=tools)
|
||||
rmessage = response["message"]
|
||||
return rmessage["content"]
|
||||
|
||||
|
||||
def execute_function(tool):
|
||||
function_name = tool["function"]["name"]
|
||||
args = tool["function"]["arguments"]
|
||||
print(f"Agent is calling: {function_name}({args})")
|
||||
f = available_functions[function_name].function
|
||||
return {
|
||||
"role": "tool",
|
||||
"content": f(**args),
|
||||
"name": function_name,
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
import sys
|
||||
from agent import run_agent
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="AI scraper agent")
|
||||
parser.add_argument("prompt", nargs="+", help="Prompt for the agent")
|
||||
|
||||
def main():
|
||||
args = parser.parse_args()
|
||||
print(run_agent(" ".join(args.prompt)))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user