import streamlit as st from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import requests # 加载模型和处理器 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") def load_image(image_file): img = Image.open(image_file) return img def predict(image): # 处理图片 pixel_values = processor(images=image, return_tensors="pt").pixel_values # 生成预测 output_ids = model.generate(pixel_values) # 转换输出文本 output_text = processor.decode(output_ids[0], skip_special_tokens=True) return output_text def main(): st.title("图片物体识别") image_file = st.file_uploader("上传一张图片", type=["jpg", "png", "jpeg"]) if image_file is not None: # 显示图片 image = load_image(image_file) st.image(image, caption='上传的图片', use_column_width=True) # 预测图片 if st.button("识别图片"): result_text = predict(image) st.write(f"识别结果: {result_text}") if __name__ == '__main__': main()