Rajendransp133 commited on
Commit
0af0c43
·
verified ·
1 Parent(s): bec08f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -1,40 +1,41 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- import torch
4
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
- import os
6
- import sys
7
-
8
- sys.path.append(os.path.abspath("libs/IndicTransToolkit"))
9
- from IndicTransToolkit.processor import IndicProcessor
10
-
11
- app = FastAPI(title="IndicTrans Translator API")
12
-
13
- ip = IndicProcessor(inference=True)
14
- tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
15
- model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
16
-
17
- class TranslationRequest(BaseModel):
18
- text: str
19
- target_lang: str
20
-
21
- @app.post("/translate")
22
- def translate_text(req: TranslationRequest):
23
- if not req.text.strip():
24
- raise HTTPException(status_code=400, detail="Input text is empty.")
25
-
26
- try:
27
- batch = ip.preprocess_batch([req.text], src_lang="eng_Latn", tgt_lang=req.target_lang)
28
- batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
29
-
30
- with torch.inference_mode():
31
- outputs = model.generate(**batch, num_beams=5, max_length=256)
32
-
33
- with tokenizer.as_target_tokenizer():
34
- decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
35
-
36
- translated = ip.postprocess_batch(decoded, lang=req.target_lang)[0]
37
- return {"translation": translated}
38
-
39
- except Exception as e:
40
- raise HTTPException(status_code=500, detail=str(e))
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ import os
5
+ import sys
6
+
7
+ # Add local IndicTransToolkit path
8
+ sys.path.append(os.path.abspath("libs/IndicTransToolkit"))
9
+ from IndicTransToolkit.processor import IndicProcessor
10
+
11
+ # Load processor and model
12
+ st.title("IndicTrans Translator")
13
+ st.write("Translate English text into Indian languages using IndicTrans2.")
14
+
15
+ ip = IndicProcessor(inference=True)
16
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
17
+ model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
18
+
19
+ text = st.text_area("Enter text in English:", height=150)
20
+ target_lang = st.selectbox("Select Target Language", [
21
+ "hin_Deva", "ben_Beng", "pan_Guru", "guj_Gujr", "tam_Taml", "tel_Telu", "mal_Mlym", "mar_Deva", "kan_Knda", "asm_Beng"
22
+ ])
23
+
24
+ if st.button("Translate"):
25
+ if not text.strip():
26
+ st.warning("Please enter some text.")
27
+ else:
28
+ try:
29
+ batch = ip.preprocess_batch([text], src_lang="eng_Latn", tgt_lang=target_lang)
30
+ batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
31
+
32
+ with torch.inference_mode():
33
+ outputs = model.generate(**batch, num_beams=5, max_length=256)
34
+
35
+ with tokenizer.as_target_tokenizer():
36
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
37
+
38
+ translated = ip.postprocess_batch(decoded, lang=target_lang)[0]
39
+ st.success(f"Translation: {translated}")
40
+ except Exception as e:
41
+ st.error(f"Error: {e}")