import gradio as gr from huggingface_hub import InferenceClient import evaluate # 创建困惑度计算工具 perplexity = evaluate.load("perplexity", module_type="metric") # 创建推理客户端 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def compute_perplexity(message): # 制备消息列表,这里只有用户消息 messages = [{"role": "user", "content": message}] # 通过客户端完成聊天生成任务 response = client.chat_completion( messages, max_tokens=512, stream=False, temperature=0.7, top_p=0.95 ) # 获取生成的文本内容 generated_text = response.choices[0].delta.content # 计算困惑度 perplexity_results = perplexity.compute(model_id='gpt2', add_start_token=False, predictions=[generated_text]) perplexity_value = perplexity_results['perplexity'] # 返回困惑度结果 return f"Perplexity of the response: {perplexity_value}" # 设置 Gradio 界面 demo = gr.Interface( fn=compute_perplexity, inputs="text", outputs="text", title="Compute Perplexity", description="Enter a text to compute its perplexity based on the gpt2 model." ) if __name__ == "__main__": demo.launch()