KrishGoyani commited on
Commit
7f7c409
·
verified ·
1 Parent(s): 05c8348

Update browser_tools.py

Browse files
Files changed (1) hide show
  1. browser_tools.py +52 -52
browser_tools.py CHANGED
@@ -1,53 +1,53 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- from crewai import Agent, Task
4
- from langchain.tools import tool
5
- import os
6
- from langchain_google_genai import ChatGoogleGenerativeAI
7
-
8
- class BrowserTools:
9
- @tool("Scrape website content")
10
- def scrape_and_summarize_website(website):
11
- """Useful to scrape and summarize a website content"""
12
- # Fetch the webpage content
13
- headers = {
14
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
15
- }
16
- print("=====================used===================")
17
- response = requests.get(website, headers=headers)
18
-
19
- # Parse the HTML content
20
- soup = BeautifulSoup(response.content, 'html.parser')
21
-
22
- # Extract text content
23
- for script in soup(["script", "style"]):
24
- script.decompose()
25
- content = soup.get_text(separator="\n")
26
-
27
- # Clean up the text
28
- lines = (line.strip() for line in content.splitlines())
29
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
30
- content = '\n'.join(chunk for chunk in chunks if chunk)
31
- print(content)
32
- # Split content into chunks
33
- content_chunks = [content[i:i + 8000] for i in range(0, len(content), 8000)]
34
-
35
- summaries = []
36
- for chunk in content_chunks:
37
- agent = Agent(
38
- role='Principal Researcher',
39
- goal='Do amazing research and summaries based on the content you are working with',
40
- backstory="You're a Principal Researcher at a big company and you need to do research about a given topic.",
41
- llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key = os.getenv("GOOGLE_API_KEY")),
42
- allow_delegation=False
43
- )
44
- task = Task(
45
- agent=agent,
46
- description=f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}',
47
-
48
-
49
- )
50
- summary = task.execute()
51
- summaries.append(summary)
52
-
53
  return "\n\n".join(summaries)
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from crewai import Agent, Task
4
+ from langchain.tools import tool
5
+ import os
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+
8
+ class BrowserTools:
9
+ @tool("Scrape website content")
10
+ def scrape_and_summarize_website(website):
11
+ """Useful to scrape and summarize a website content"""
12
+ # Fetch the webpage content
13
+ headers = {
14
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
15
+ }
16
+
17
+ response = requests.get(website, headers=headers)
18
+
19
+ # Parse the HTML content
20
+ soup = BeautifulSoup(response.content, 'html.parser')
21
+
22
+ # Extract text content
23
+ for script in soup(["script", "style"]):
24
+ script.decompose()
25
+ content = soup.get_text(separator="\n")
26
+
27
+ # Clean up the text
28
+ lines = (line.strip() for line in content.splitlines())
29
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
30
+ content = '\n'.join(chunk for chunk in chunks if chunk)
31
+ print(content)
32
+ # Split content into chunks
33
+ content_chunks = [content[i:i + 8000] for i in range(0, len(content), 8000)]
34
+
35
+ summaries = []
36
+ for chunk in content_chunks:
37
+ agent = Agent(
38
+ role='Principal Researcher',
39
+ goal='Do amazing research and summaries based on the content you are working with',
40
+ backstory="You're a Principal Researcher at a big company and you need to do research about a given topic.",
41
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",google_api_key = os.getenv("GOOGLE_API_KEY")),
42
+ allow_delegation=False
43
+ )
44
+ task = Task(
45
+ agent=agent,
46
+ description=f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}',
47
+
48
+
49
+ )
50
+ summary = task.execute()
51
+ summaries.append(summary)
52
+
53
  return "\n\n".join(summaries)