import json import time import requests import schedule import gradio as gr import pandas as pd from tqdm import tqdm from bs4 import BeautifulSoup from urllib.parse import urlparse TIMEOUT = 15 DELAY = 1 def get_studios(username: str): # 请求负载 payload = { "PageNumber": 1, "PageSize": 1000, "Name": "", "SortBy": "gmt_modified", "Order": "desc", } try: # 发送PUT请求 response = requests.put( f"https://www.modelscope.cn/api/v1/studios/{username}/list", data=json.dumps(payload), headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" }, timeout=TIMEOUT, ) # 检查请求是否成功 response.raise_for_status() # 解析JSON响应 spaces: list = response.json()["Data"]["Studios"] if spaces: studios = [] for space in spaces: studios.append( f"https://www.modelscope.cn/studios/{username}/{space['Name']}" ) return studios except requests.exceptions.HTTPError as errh: print(f"HTTP错误发生: {errh}") except requests.exceptions.ConnectionError as errc: print(f"连接错误发生: {errc}") except requests.exceptions.Timeout as errt: print(f"请求超时: {errt}, retrying...") time.sleep(DELAY) return get_studios(username) except requests.exceptions.RequestException as err: print(f"请求发生错误: {err}") return [] def get_spaces(username: str): try: # 发送GET请求 response = requests.get( "https://huggingface.co/spaces-json", params={"sort": "trending", "search": username}, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537" }, timeout=TIMEOUT, ) # 检查请求是否成功 response.raise_for_status() # 解析JSON响应 spaces: list = response.json()["spaces"] studios = [] for space in spaces: if space["author"] == username: studios.append(f"https://huggingface.co/spaces/{space['id']}") return studios except requests.exceptions.HTTPError as errh: print(f"HTTP错误发生: {errh}") except requests.exceptions.Timeout as errt: print(f"请求超时: {errt}, retrying...") time.sleep(DELAY) return get_spaces(username) except requests.exceptions.RequestException as err: print(f"请求发生错误: {err}") return [] def activate_space(url: str): success = "success" try: # 发送GET请求获取页面内容 response = requests.get(url, timeout=TIMEOUT) response.raise_for_status() # 确保请求成功 web_page_content = response.text # 使用BeautifulSoup解析页面 soup = BeautifulSoup(web_page_content, "html.parser") # 寻找class为btn-lg的按钮 button = soup.find("button", class_="btn-lg") if not button: # print("未找到class为'btn-lg'的按钮。") return success # 获取表单的action属性和method属性 form = button.find_parent("form") if not form: return "按钮未在表单内找到。" form_action = form.get("action") form_method = form.get("method", "GET").upper() # 默认为GET host = "https://" + urlparse(url).hostname if not host in form_action: form_action = host + form_action # 提取表单数据 form_data = { input_tag.get("name"): input_tag.get("value") for input_tag in form.find_all("input") if input_tag.has_attr("name") } # 发送请求提交表单 if form_method == "POST": response = requests.post(form_action, data=form_data) else: response = requests.get(form_action, params=form_data) # 打印响应内容 print("提交表单后的页面内容:") print(response.text) except requests.exceptions.Timeout as errt: print(f"请求超时: {errt}, retrying...") time.sleep(DELAY) return activate_space(url) except requests.RequestException as e: success = f"{e}" except Exception as e: success = f"{e}" return success def activate(hf_users: str, ms_users: str): hf_usernames = hf_users.split(";") ms_usernames = ms_users.split(";") spaces = [] for user in tqdm(hf_usernames, desc="Collecting spaces..."): username = user.strip() if username: spaces += get_spaces(username) time.sleep(DELAY) for user in tqdm(ms_usernames, desc="Collecting studios..."): username = user.strip() if username: spaces += get_studios(username) time.sleep(DELAY) output = [] for space in tqdm(spaces, desc="Activating spaces..."): output.append({"space": space, "status": activate_space(space)}) time.sleep(DELAY) print("Activation complete!") return pd.DataFrame(output) def monitor(hf_users: str, ms_users: str, period=1): if schedule.get_jobs(): return print(f"监控开启中...每日触发") schedule.every(period).days.do(lambda: activate(hf_users, ms_users)) while True: schedule.run_pending() time.sleep(DELAY) def list_tasks(): jobs = schedule.get_jobs() if jobs: return f"{jobs}".replace("[", "").replace("]", "") return "None" with gr.Blocks() as iface: gr.Interface( title="Start keeping all spaces active daily", fn=monitor, inputs=[ gr.Textbox(label="HuggingFace", placeholder="Usernames joint by ;"), gr.Textbox(label="ModelScope", placeholder="Usernames joint by ;"), ], outputs=None, allow_flagging=False, ) gr.Interface( title="See current task status", fn=list_tasks, inputs=None, outputs=gr.Textbox(label="Current task details"), allow_flagging=False, ) gr.Interface( title="Test activation for all spaces once", fn=activate, inputs=[ gr.Textbox(label="HuggingFace", placeholder="Usernames joint by ;"), gr.Textbox(label="ModelScope", placeholder="Usernames joint by ;"), ], outputs=gr.Dataframe(label="Activated spaces"), allow_flagging=False, ) iface.launch()