Spaces:
Runtime error
Runtime error
langchain-qa-bot
/
docs
/langchain
/libs
/community
/langchain_community
/document_loaders
/browserbase.py
from typing import Iterator, Optional, Sequence | |
from langchain_core.documents import Document | |
from langchain_community.document_loaders.base import BaseLoader | |
class BrowserbaseLoader(BaseLoader): | |
"""Load pre-rendered web pages using a headless browser hosted on Browserbase. | |
Depends on `browserbase` package. | |
Get your API key from https://browserbase.com | |
""" | |
def __init__( | |
self, | |
urls: Sequence[str], | |
text_content: bool = False, | |
api_key: Optional[str] = None, | |
project_id: Optional[str] = None, | |
session_id: Optional[str] = None, | |
proxy: Optional[bool] = None, | |
): | |
self.urls = urls | |
self.text_content = text_content | |
self.session_id = session_id | |
self.proxy = proxy | |
try: | |
from browserbase import Browserbase | |
except ImportError: | |
raise ImportError( | |
"You must run " | |
"`pip install --upgrade " | |
"browserbase` " | |
"to use the Browserbase loader." | |
) | |
self.browserbase = Browserbase(api_key, project_id) | |
def lazy_load(self) -> Iterator[Document]: | |
"""Load pages from URLs""" | |
pages = self.browserbase.load_urls( | |
self.urls, self.text_content, self.session_id, self.proxy | |
) | |
for i, page in enumerate(pages): | |
yield Document( | |
page_content=page, | |
metadata={ | |
"url": self.urls[i], | |
}, | |
) | |