Spaces:

Mamadou2727
/

correction-zarma

Sleeping

App Files Files Community

correction-zarma / app.py

Mamadou2727

update

8913a87 verified about 1 month ago

raw

history blame contribute delete

3.32 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	import torch


	#code

	LANG_CODES = {
	"Bambara": "yo",
	"Zarma": "yo",
	}

	models = {
	"Bambara": ("Mamadou2727/m2m100_418M-correction", "facebook/m2m100_418M"),
	"Zarma": ("Mamadou2727/m2m100_418M-correction-zarma", "facebook/m2m100_418M")
	}

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	def load_model(language):
	model_name, tokenizer_name = models[language]
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
	return model, tokenizer

	def correct(text, language, candidates):
	"""
	Correct the text in the selected language
	"""
	model, tokenizer = load_model(language)
	model.to(device)
	src = LANG_CODES[language]

	tokenizer.src_lang = src
	tokenizer.tgt_lang = src

	ins = tokenizer(text, return_tensors='pt').to(device)

	gen_args = {
	'return_dict_in_generate': True,
	'output_scores': True,
	'output_hidden_states': True,
	'length_penalty': 0.0, # don't encourage longer or shorter output
	'num_return_sequences': candidates,
	'num_beams': candidates,
	'forced_bos_token_id': tokenizer.lang_code_to_id[src]
	}

	outs = model.generate({ins, **gen_args})
	output = tokenizer.batch_decode(outs.sequences, skip_special_tokens=True)

	return '\n'.join(output) # Join the suggestions with new lines and return as a single string

	with gr.Blocks() as app:
	markdown = r"""
	# Hasegnan, The First Zarma Spell Checker
	<img src="https://cdn-uploads.huggingface.co/production/uploads/63cc1d4bf488db9bb3c6449e/AtOKLAaL5kt0VhRsxE0vf.png" width="500" height="300">

	This is a beta version of the Zarma Spell Checker and Inlcude Bambara spellchecking as well.

	## Intended Uses & Limitations

	This model is intended for academic research and practical applications in machine translation. It can be used to translate French text to Zarma and vice versa. Users should note that the model's performance may vary based on the complexity and context of the input text.

	## Authors:
	The project, FERIJI, was curated by Elysabhete Ibrahim Amadou, Habibatou Abdoulaye Alfari, Adwoa Bremang, Dennis Owusu, Mamadou K. KEITA and Dr Christopher Homan, with the aim to enhance linguistic studies for Zarma.

	"""

	with gr.Row():
	gr.Markdown(markdown)
	with gr.Column():
	input_text = gr.components.Textbox(lines=7, label="Input Text", value="")
	language = gr.Dropdown(label="Language", choices=["Bambara", "Zarma"], value="Bambara")
	return_seqs = gr.Slider(label="Number of return sequences", value=1, minimum=1, maximum=12, step=1)
	correction_suggestions = gr.Textbox(lines=7, label="Correction Suggestions")
	final_output = gr.Textbox(lines=7, label="Final Output", placeholder="Copy your preferred correction here...")

	translate_btn = gr.Button("Corrige")
	translate_btn.click(correct, inputs=[input_text, language, return_seqs], outputs=correction_suggestions)

	app.launch(share=True)