Spaces:

Parechan
/

lanpip

Runtime error

App Files Files Community

lanpip / bots /assessment.py

Parechan

Upload 18 files

f134294 verified 11 months ago

raw

history blame

10.6 kB

	# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	# from PIL import Image
	import requests
	# from llamaapi import LlamaAPI
	import sys
	import numpy as np
	# import pandas as pd
	# import matplotlib.pyplot as plt
	# import tensorflow as tf
	import cv2

	sys.path.append('./test')
	from ocr.normalization import word_normalization, letter_normalization
	from ocr import page, words, characters
	from ocr.helpers import implt, resize
	from ocr.tfhelpers import Model
	from ocr.datahelpers import idx2char

	import os
	import json
	from llamaapi import LlamaAPI
	from openai import OpenAI

	# Initialize
	llama = LlamaAPI("LL-AirERHEk0jLIE1yEPvMXeobNfLsqLWJWcxLRS53obrZ3XyqMTfZc4EAuOs7r3wso")

	api_key = "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"
	os.environ["OPENAI_API_KEY"] = api_key
	client = OpenAI()

	# def transcribe_handwriting(image_path):
	# """
	# Transcribes text from a handwriting image located at a local path using TrOCR.
	#
	# Parameters:
	# - image_path (str): The local path to the handwriting image file.
	#
	# Returns:
	# - str: The transcribed text. Returns False if transcription fails.
	# """
	# try:
	# url = image_path
	# image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
	#
	# processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
	# model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
	# pixel_values = processor(images=image, return_tensors="pt").pixel_values
	#
	# generated_ids = model.generate(pixel_values)
	# generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]# Prepare the image for the model
	#
	# return generated_text
	# except Exception as e:
	# print(f"An error occurred while processing the image: {e}")
	# return False
	def transcribe_handwriting(image_path):
	if image_path[:4] == 'http':
	response = requests.get(image_path)
	open('data/test.jpg', 'wb').write(response.content)
	IMG = 'data/test.jpg' # 1, 2, 3
	else:
	IMG = image_path
	LANG = 'en'
	# You can use only one of these two
	# You HABE TO train the CTC model by yourself using word_classifier_CTC.ipynb
	MODEL_LOC_CHARS = f'models/char-clas/{LANG}/CharClassifier'
	MODEL_LOC_CTC = 'models/word-clas/CTC/Classifier1'

	CHARACTER_MODEL = Model(MODEL_LOC_CHARS)
	CTC_MODEL = Model(MODEL_LOC_CTC, 'word_prediction')

	image = cv2.cvtColor(cv2.imread(IMG), cv2.COLOR_BGR2RGB)
	# implt(image)

	# Crop image and get bounding boxes
	crop = page.detection(image)
	# implt(crop)
	boxes = words.detection(crop)
	lines = words.sort_words(boxes)

	def recognise(img):
	"""Recognising words using CTC Model."""
	img = word_normalization(
	img,
	64,
	border=False,
	tilt=False,
	hyst_norm=False)
	length = img.shape[1]
	# Input has shape [batch_size, height, width, 1]
	input_imgs = np.zeros(
	(1, 64, length, 1), dtype=np.uint8)
	input_imgs[0][:, :length, 0] = img

	pred = CTC_MODEL.eval_feed({
	'inputs:0': input_imgs,
	'inputs_length:0': [length],
	'keep_prob:0': 1})[0]

	word = ''
	for i in pred:
	word += idx2char(i + 1)
	return word

	# implt(crop)
	out = ''
	for line in lines:
	r = " ".join([recognise(crop[y1:y2, x1:x2]) for (x1, y1, x2, y2) in line])
	out += r + '\n'
	print(r)
	return out


	def chat_assessment_with_model(prompt, model_type="gpt-4"):
	"""
	Function to chat with either the GPT-4 or LLaMA model.

	Parameters:
	- prompt (str): The message or question to send to the model.
	- model_type (str): The type of model to use ("gpt-4" or "llama").

	Returns:
	- str: The response from the selected model.
	"""
	if model_type == "gpt-4":
	try:
	chat_completion = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "Ask for input if user did not enter a writing."
	"Then, Evaluate student writing based on ETS Rubrics regarding two aspects and provide a score: "
	"For Intergrated wirting, Score 5: Successfully selects and coherently presents important information from the lecture in relation to the reading. The response is well-organized with only occasional language errors that do not hinder accuracy or clarity."
	"Score 4: Good at selecting and presenting important lecture information in relation to the reading but may have minor inaccuracies or imprecisions. Minor language errors are more frequent but do not significantly affect clarity."
	"Score 3: Contains some important information from the lecture and some relevant connections to the reading but may be vague, imprecise, or contain one major omission. Frequent errors may obscure meanings or connections."
	"Score 2: Contains relevant information from the lecture but has significant language difficulties or inaccuracies in conveying important ideas or connections. Errors likely obscure key points for readers unfamiliar with the topics."
	"Score 1: Provides little to no meaningful content from the lecture, with very low language level making it difficult to derive meaning."
	"Score 0: Merely copies sentences from the reading, off-topic, written in a foreign language, consists of keystroke characters, or is blank."
	"For academic discussion, Score 5: Relevant and clearly expressed contribution with consistent facility in language use, showcasing relevant explanations, effective syntactic variety, precise word choice, and almost no errors."
	"Score 4: Relevant contribution that is easily understood, displaying adequate elaboration, syntactic variety, appropriate word choice, and few lexical or grammatical errors."
	"Score 3: Mostly relevant and understandable contribution with some facility in language use. Some parts may be missing, unclear, or irrelevant, with noticeable lexical and grammatical errors."
	"Score 2: Attempt to contribute with limited language use making ideas hard to follow, limited syntactic and vocabulary range, and an accumulation of structural and lexical errors."
	"Score 1: Ineffective attempt with severely limited language use preventing expression of ideas. Few coherent ideas, with any coherent language mostly borrowed."
	"Score 0: Blank, off-topic, not in English, entirely copied, unconnected to the prompt, or consists of arbitrary keystrokes."
	"Lastly, provide feedback and answer questions if the user has."},
	{"role": "user", "content": prompt},
	]
	)
	return chat_completion.choices[0].message.content.strip()
	except Exception as e:
	return f"An error occurred with GPT-4: {e}"
	elif model_type.startswith("llama"):
	api_request_json = {
	"model": model_type,
	"messages": [
	{"role": "system", "content": "Ask for input if user did not enter a writing."
	"Then, Evaluate student writing based on ETS Rubrics regarding two aspects and provide a score: "
	"For Intergrated wirting, Score 5: Successfully selects and coherently presents important information from the lecture in relation to the reading. The response is well-organized with only occasional language errors that do not hinder accuracy or clarity."
	"Score 4: Good at selecting and presenting important lecture information in relation to the reading but may have minor inaccuracies or imprecisions. Minor language errors are more frequent but do not significantly affect clarity."
	"Score 3: Contains some important information from the lecture and some relevant connections to the reading but may be vague, imprecise, or contain one major omission. Frequent errors may obscure meanings or connections."
	"Score 2: Contains relevant information from the lecture but has significant language difficulties or inaccuracies in conveying important ideas or connections. Errors likely obscure key points for readers unfamiliar with the topics."
	"Score 1: Provides little to no meaningful content from the lecture, with very low language level making it difficult to derive meaning."
	"Score 0: Merely copies sentences from the reading, off-topic, written in a foreign language, consists of keystroke characters, or is blank."
	"For academic discussion, Score 5: Relevant and clearly expressed contribution with consistent facility in language use, showcasing relevant explanations, effective syntactic variety, precise word choice, and almost no errors."
	"Score 4: Relevant contribution that is easily understood, displaying adequate elaboration, syntactic variety, appropriate word choice, and few lexical or grammatical errors."
	"Score 3: Mostly relevant and understandable contribution with some facility in language use. Some parts may be missing, unclear, or irrelevant, with noticeable lexical and grammatical errors."
	"Score 2: Attempt to contribute with limited language use making ideas hard to follow, limited syntactic and vocabulary range, and an accumulation of structural and lexical errors."
	"Score 1: Ineffective attempt with severely limited language use preventing expression of ideas. Few coherent ideas, with any coherent language mostly borrowed."
	"Score 0: Blank, off-topic, not in English, entirely copied, unconnected to the prompt, or consists of arbitrary keystrokes."
	"Lastly, provide feedback and answer questions if the user has."},
	{"role": "user", "content": prompt},
	]
	}
	try:
	response = llama.run(api_request_json)
	response_data = response.json()
	return response_data["choices"][0]["message"]["content"]
	except Exception as e:
	return f"An error occurred with LLaMA: {e}"
	else:
	return "Unsupported model type."