File size: 2,632 Bytes
1687073
c0d1625
56427b4
5bb878a
9b8bf99
38fb884
a66f412
3e60b85
d9df1f9
a66f412
53a41f9
1687073
53a41f9
1687073
53a41f9
1687073
 
5bb878a
1687073
c0d1625
d398fa0
76209e3
c0d1625
 
5bb878a
 
9b8bf99
3e60b85
38fb884
 
3e60b85
38fb884
 
5bb878a
28092bb
56427b4
5bb878a
76209e3
19f284d
 
3e60b85
 
19f284d
3e60b85
19f284d
 
3e60b85
 
 
 
 
 
 
 
 
 
 
 
 
83ed30d
3e60b85
 
35ec859
a66f412
3e60b85
a66f412
3e60b85
35ec859
3e60b85
 
 
 
83ed30d
5bb878a
 
73bdba2
3e60b85
19f284d
a41dac0
3e60b85
 
5bb878a
56427b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from transformers import AutoProcessor, AutoModelForImageTextToText, MllamaForConditionalGeneration
import torch
from threading import Thread
import gradio as gr
import spaces
from PIL import Image
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# from zipnn import zipnn_hf

# zipnn_hf()

# model_id = "royleibov/Llama-3.2-11B-Vision-Instruct-ZipNN-Compressed"
model_id = "unsloth/Llama-3.2-11B-Vision"

model = AutoModelForImageTextToText.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)


@spaces.GPU
def ocr(input_img):
    # Convert numpy array to PIL Image
    pil_image = Image.fromarray(input_img.astype('uint8'))

    # Now use the PIL Image as before
    image = pil_image.convert("RGB").resize((224, 224))

    prompt = "<|begin_of_text|><|image|>this script says"
    inputs = processor(image, prompt, return_tensors="pt").to(model.device)

    output = model.generate(**inputs, max_new_tokens=200)
    res = processor.decode(output[0])
    return res[len(prompt):res.find("<ORC>") - 5]


prompt = '''Review the following essay and score it. The output format is:
Score: *<score>*
Reason: ...
Suggestions: ...
'''

def score_with_gpt(text):
    messages = [
        {"role": "system", "content":
            "You are a high school English teacher,"
            + "and you are grading a student's English essay. You are given a student's essay, and you need to score it based on the following criteria:"
            + "1. The essay is well-organized and flows logically."
            + "2. The essay is well-written and uses correct grammar and punctuation."
            + "3. The words are spelled correctly."
            + "4. The words and phrases are appropriate for the context."
            + "5. Scale the score from 0 to 100."
            },
        {"role": "user", "content": f"{prompt}, The essay is: \"{text} \""}
    ]

    response = client.chat.completions.create(
        model="gpt-4o-mini",  # or "gpt-3.5-turbo"
        messages=messages,
        max_tokens=500,
    )
    return response.choices[0].message.content


def score_it(input_img):
    text = ocr(input_img)
    return text, score_with_gpt(text)


demo = gr.Interface(fn=score_it, title="Upload your English script and get the score",
                    inputs=[gr.Image()],
                    outputs=[gr.Textbox(label="Text", lines=10), gr.Markdown(
                        label="Score", show_label=True)],
                    stop_btn="Stop Generation",
                    )

demo.launch(debug=True)