|
|
|
|
|
import requests |
|
|
|
import sys |
|
import numpy as np |
|
|
|
|
|
|
|
import cv2 |
|
|
|
sys.path.append('./test') |
|
from ocr.normalization import word_normalization, letter_normalization |
|
from ocr import page, words, characters |
|
from ocr.helpers import implt, resize |
|
from ocr.tfhelpers import Model |
|
from ocr.datahelpers import idx2char |
|
|
|
import os |
|
import json |
|
from llamaapi import LlamaAPI |
|
from openai import OpenAI |
|
|
|
|
|
llama = LlamaAPI("LL-AirERHEk0jLIE1yEPvMXeobNfLsqLWJWcxLRS53obrZ3XyqMTfZc4EAuOs7r3wso") |
|
|
|
api_key = "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" |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
client = OpenAI() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transcribe_handwriting(image_path): |
|
if image_path[:4] == 'http': |
|
response = requests.get(image_path) |
|
open('data/test.jpg', 'wb').write(response.content) |
|
IMG = 'data/test.jpg' |
|
else: |
|
IMG = image_path |
|
LANG = 'en' |
|
|
|
|
|
MODEL_LOC_CHARS = f'models/char-clas/{LANG}/CharClassifier' |
|
MODEL_LOC_CTC = 'models/word-clas/CTC/Classifier1' |
|
|
|
CHARACTER_MODEL = Model(MODEL_LOC_CHARS) |
|
CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction') |
|
|
|
image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB) |
|
|
|
|
|
|
|
crop = page.detection(image) |
|
|
|
boxes = words.detection(crop) |
|
lines = words.sort_words(boxes) |
|
|
|
def recognise(img): |
|
"""Recognising words using CTC Model.""" |
|
img = word_normalization( |
|
img, |
|
64, |
|
border=False, |
|
tilt=False, |
|
hyst_norm=False) |
|
length = img.shape[1] |
|
|
|
input_imgs = np.zeros( |
|
(1, 64, length, 1), dtype=np.uint8) |
|
input_imgs[0][:, :length, 0] = img |
|
|
|
pred = CTC_MODEL.eval_feed({ |
|
'inputs:0': input_imgs, |
|
'inputs_length:0': [length], |
|
'keep_prob:0': 1})[0] |
|
|
|
word = '' |
|
for i in pred: |
|
word += idx2char(i + 1) |
|
return word |
|
|
|
|
|
out = '' |
|
for line in lines: |
|
r = " ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line]) |
|
out += r + '\n' |
|
print(r) |
|
return out |
|
|
|
|
|
def chat_assessment_with_model(prompt, model_type="gpt-4"): |
|
""" |
|
Function to chat with either the GPT-4 or LLaMA model. |
|
|
|
Parameters: |
|
- prompt (str): The message or question to send to the model. |
|
- model_type (str): The type of model to use ("gpt-4" or "llama"). |
|
|
|
Returns: |
|
- str: The response from the selected model. |
|
""" |
|
if model_type == "gpt-4": |
|
try: |
|
chat_completion = client.chat.completions.create( |
|
model="gpt-4", |
|
messages=[ |
|
{"role": "system", "content": "Ask for input if user did not enter a writing." |
|
"Then, Evaluate student writing based on ETS Rubrics regarding two aspects and provide a score: " |
|
"For Intergrated wirting, Score 5: Successfully selects and coherently presents important information from the lecture in relation to the reading. The response is well-organized with only occasional language errors that do not hinder accuracy or clarity." |
|
"Score 4: Good at selecting and presenting important lecture information in relation to the reading but may have minor inaccuracies or imprecisions. Minor language errors are more frequent but do not significantly affect clarity." |
|
"Score 3: Contains some important information from the lecture and some relevant connections to the reading but may be vague, imprecise, or contain one major omission. Frequent errors may obscure meanings or connections." |
|
"Score 2: Contains relevant information from the lecture but has significant language difficulties or inaccuracies in conveying important ideas or connections. Errors likely obscure key points for readers unfamiliar with the topics." |
|
"Score 1: Provides little to no meaningful content from the lecture, with very low language level making it difficult to derive meaning." |
|
"Score 0: Merely copies sentences from the reading, off-topic, written in a foreign language, consists of keystroke characters, or is blank." |
|
"For academic discussion, Score 5: Relevant and clearly expressed contribution with consistent facility in language use, showcasing relevant explanations, effective syntactic variety, precise word choice, and almost no errors." |
|
"Score 4: Relevant contribution that is easily understood, displaying adequate elaboration, syntactic variety, appropriate word choice, and few lexical or grammatical errors." |
|
"Score 3: Mostly relevant and understandable contribution with some facility in language use. Some parts may be missing, unclear, or irrelevant, with noticeable lexical and grammatical errors." |
|
"Score 2: Attempt to contribute with limited language use making ideas hard to follow, limited syntactic and vocabulary range, and an accumulation of structural and lexical errors." |
|
"Score 1: Ineffective attempt with severely limited language use preventing expression of ideas. Few coherent ideas, with any coherent language mostly borrowed." |
|
"Score 0: Blank, off-topic, not in English, entirely copied, unconnected to the prompt, or consists of arbitrary keystrokes." |
|
"Lastly, provide feedback and answer questions if the user has."}, |
|
{"role": "user", "content": prompt}, |
|
] |
|
) |
|
return chat_completion.choices[0].message.content.strip() |
|
except Exception as e: |
|
return f"An error occurred with GPT-4: {e}" |
|
elif model_type.startswith("llama"): |
|
api_request_json = { |
|
"model": model_type, |
|
"messages": [ |
|
{"role": "system", "content": "Ask for input if user did not enter a writing." |
|
"Then, Evaluate student writing based on ETS Rubrics regarding two aspects and provide a score: " |
|
"For Intergrated wirting, Score 5: Successfully selects and coherently presents important information from the lecture in relation to the reading. The response is well-organized with only occasional language errors that do not hinder accuracy or clarity." |
|
"Score 4: Good at selecting and presenting important lecture information in relation to the reading but may have minor inaccuracies or imprecisions. Minor language errors are more frequent but do not significantly affect clarity." |
|
"Score 3: Contains some important information from the lecture and some relevant connections to the reading but may be vague, imprecise, or contain one major omission. Frequent errors may obscure meanings or connections." |
|
"Score 2: Contains relevant information from the lecture but has significant language difficulties or inaccuracies in conveying important ideas or connections. Errors likely obscure key points for readers unfamiliar with the topics." |
|
"Score 1: Provides little to no meaningful content from the lecture, with very low language level making it difficult to derive meaning." |
|
"Score 0: Merely copies sentences from the reading, off-topic, written in a foreign language, consists of keystroke characters, or is blank." |
|
"For academic discussion, Score 5: Relevant and clearly expressed contribution with consistent facility in language use, showcasing relevant explanations, effective syntactic variety, precise word choice, and almost no errors." |
|
"Score 4: Relevant contribution that is easily understood, displaying adequate elaboration, syntactic variety, appropriate word choice, and few lexical or grammatical errors." |
|
"Score 3: Mostly relevant and understandable contribution with some facility in language use. Some parts may be missing, unclear, or irrelevant, with noticeable lexical and grammatical errors." |
|
"Score 2: Attempt to contribute with limited language use making ideas hard to follow, limited syntactic and vocabulary range, and an accumulation of structural and lexical errors." |
|
"Score 1: Ineffective attempt with severely limited language use preventing expression of ideas. Few coherent ideas, with any coherent language mostly borrowed." |
|
"Score 0: Blank, off-topic, not in English, entirely copied, unconnected to the prompt, or consists of arbitrary keystrokes." |
|
"Lastly, provide feedback and answer questions if the user has."}, |
|
{"role": "user", "content": prompt}, |
|
] |
|
} |
|
try: |
|
response = llama.run(api_request_json) |
|
response_data = response.json() |
|
return response_data["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
return f"An error occurred with LLaMA: {e}" |
|
else: |
|
return "Unsupported model type." |