Spaces:
Running
Running
import os | |
os.system("pip install 'https://modelscope-studios.oss-cn-zhangjiakou.aliyuncs.com/SDK/gradio/gradio-4.44.0-py3-none-any.whl?OSSAccessKeyId=LTAI5tCGZWFdkWKivGKCtvTD&Expires=361727611665&Signature=iynlOFVFiaF3OmxatNMHUBPfb3o%3D'") | |
os.system("pip install starlette==0.38.6 fastapi==0.112.4") | |
from typing import List, Tuple, Union | |
from web_ui import WebUI | |
import math | |
from qwen_agent.agents import Assistant | |
from qwen_agent.tools.base import register_tool | |
from qwen_agent.tools.doc_parser import Record | |
from qwen_agent.tools.search_tools.base_search import RefMaterialOutput, BaseSearch | |
from qwen_agent.log import logger | |
from qwen_agent.gui.gradio import gr | |
POSITIVE_INFINITY = math.inf | |
class NoSearch(BaseSearch): | |
def call(self, params: Union[str, dict], docs: List[Union[Record, str, List[str]]] = None, **kwargs) -> list: | |
"""The basic search algorithm | |
Args: | |
params: The dict parameters. | |
docs: The list of parsed doc, each doc has unique url. | |
Returns: | |
The list of retrieved chunks from each doc. | |
""" | |
params = self._verify_json_format_args(params) | |
# Compatible with the parameter passing of the qwen-agent version <= 0.0.3 | |
max_ref_token = kwargs.get('max_ref_token', self.max_ref_token) | |
# The query is a string that may contain only the original question, | |
# or it may be a json string containing the generated keywords and the original question | |
if not docs: | |
return [] | |
return self._get_the_front_part(docs, max_ref_token) | |
def _get_the_front_part(docs: List[Record], max_ref_token: int) -> list: | |
all_tokens = 0 | |
_ref_list = [] | |
for doc in docs: | |
text = [] | |
for page in doc.raw: | |
text.append(page.content) | |
all_tokens += page.token | |
now_ref_list = RefMaterialOutput(url=doc.url, text=text).to_dict() | |
_ref_list.append(now_ref_list) | |
logger.info(f'Using tokens: {all_tokens}') | |
if all_tokens > max_ref_token: | |
raise gr.Error(f"Your document files (around {all_tokens} tokens) exceed the maximum context length ({max_ref_token} tokens).") | |
return _ref_list | |
def sort_by_scores(self, | |
query: str, | |
docs: List[Record], | |
max_ref_token: int, | |
**kwargs) -> List[Tuple[str, int, float]]: | |
raise NotImplementedError | |
def app_gui(): | |
# Define the agent | |
bot = Assistant(llm={ | |
'model': 'qwen-turbo-1101', | |
'generate_cfg': { | |
'max_input_tokens': 1000000, | |
'max_retries': 10, | |
}}, | |
name='Qwen-Turbo-1M', | |
description='Qwen-Turbo natively supports input length of up to 1M tokens. You can upload documents for Q&A (eg., pdf/docx/pptx/txt/html).', | |
rag_cfg={'max_ref_token': 1000000, 'rag_searchers': ['no_search']}, | |
) | |
chatbot_config = { | |
'input.placeholder': "Type \"/clear\" to clear the history", | |
'verbose': True, | |
} | |
WebUI(bot, chatbot_config=chatbot_config).run() | |
if __name__ == '__main__': | |
import patching # patch qwen-agent to accelerate 1M processing | |
app_gui() | |