In [None]:
import json
import os
import re
import pickle
from tqdm import tqdm 
import time
import pandas as pd
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import random
from collections import defaultdict

from openai import OpenAI
import google.generativeai as genai
import anthropic
from utils import run_gpt, run_claude

from langchain_community.utilities import SearchApiAPIWrapper
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain.tools import Tool, DuckDuckGoSearchResults


In [2]:
search =  GoogleSearchAPIWrapper()
ddg_search = DuckDuckGoSearchResults()

driver_path = 'chromedriver-linux64/chromedriver'



options = webdriver.ChromeOptions()
options.binary_location = '/usr/bin/google-chrome'
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--headless')  
options.add_argument('--disable-gpu')  
options.add_argument("--ignore-certificate-errors")


def search_internet(search_term: str):
    return  str(search.results(search_term, 10))


def is_clickable(tag):
    if tag.name == "a" and tag.get("href"):
        return True
    if tag.name == "button":
        return True
    if tag.name == "input" and tag.get("type") in ["submit", "button"]:
        return True
    if tag.get("onclick"):
        return True
    return False


def navigate_website(url: str):
    service = Service(driver_path)
    driver = webdriver.Chrome(service=service, options=options)
    try:
        driver.get(url)
        new_url = driver.current_url
        
        response = requests.get(url, verify = False)
        response.raise_for_status()  # Ensure we notice bad responses
        time.sleep(2)
        soup = BeautifulSoup(response.text, 'html.parser')

        texts = soup.find_all(['p', 'h1', 'h2', 'h3', 'div', 'span'])
        # clean_texts = [tag.get_text(strip=True) for tag in texts if tag.get_text(strip=True)]
        remove  = soup.find_all(['script'])
        remove_texts_set = set(remove)
        filtered_texts = [elem for elem in texts if elem not in remove_texts_set]
        clean_texts = set(tag.get_text(strip=True) for tag in filtered_texts if tag.get_text(strip=True))

        return f"Current URL: {new_url}, Scrape text from the url result: {clean_texts}\
                If there is no information you're looking for, find clickable elements from this page and navigate it again."
    except requests.RequestException as e:
        return str(e)  

    except Exception as e:
        return "An error occurred: {}".format(e)



def get_clickable_elements(url: str):
    try:
        response = requests.get(url, verify = False)
        response.raise_for_status()  # Ensure we notice bad responses
        time.sleep(2)
        soup = BeautifulSoup(response.text, 'html.parser')
        clickable_elements = soup.find_all(is_clickable)

        element_details = []

        for element in clickable_elements:
            if element.name == "a":
                href = element.get("href")
                text = element.get_text(strip=True)
                element_details.append({"url": href, "description": text})
            elif element.name == "button":
                text = element.get_text(strip=True)
                onclick_url = element.get("onclick")  # Extract URL from onclick if present
                element_details.append({"url": onclick_url, "description": text})
            elif element.name == "input" and element.get("type") in ["submit", "button"]:
                value = element.get("value")  # Button label
                onclick_url = element.get("onclick")  # Extract URL from onclick if present
                element_details.append({"url": onclick_url, "description": value})

        return "Find clickable elements successfully. Choose appropriate elements and corresponding url among: {}".format(clickable_elements)
    except requests.RequestException as e:
        return str(e)  



In [17]:
question = "what date is it today?"
sub_question = [] #You can add additional questions here.

In [18]:
###GPT, SearchAgent###
web_use = True
navi_use = False
llm = "gpt-4o-2024-05-13"
client = OpenAI(api_key = OPENAI_API)
result, response, msg = run_gpt(client, question, sub_question, llm, web = web_use, navigation = navi_use)

In [21]:
###Claude, SearchAgent###
web_use = True
navi_use = False
llm = "claude-3-5-sonnet-20240620"
client = anthropic.Anthropic(api_key= CLAUDE_API)
result, response, msg = run_claude(client, question, sub_question, llm, web = web_use, navigation = navi_use )

In [None]:
###Gemini  SearchAgent###
os.environ["GOOGLE_CSE_ID"] = GOOGLE_CSE_ID
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

llm = "gemini-1.5-flash"
model = genai.GenerativeModel(model_name= llm, tools = [search_internet])
chat = model.start_chat(enable_automatic_function_calling= web_use)
result = chat.send_message(question)
result = result.text