rajeshradhakrishnan's picture
changed to AI4Bharat IndicTrans-English2Indic
656a72b
raw
history blame
1.44 kB
import os
import requests
import json
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from mosestokenizer import *
from indicnlp.tokenize import sentence_tokenize
INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
def split_sentences(paragraph, language):
if language == "en":
with MosesSentenceSplitter(language) as splitter:
return splitter([paragraph])
elif language in INDIC:
return sentence_tokenize.sentence_split(paragraph, lang=language)
# model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
# tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
app = FastAPI()
uri = "http://216.48.181.177:5050"
@app.get("/infer_t5")
def t5(input):
API_URL = f"{uri}/batch_translate"
sentence_batch = split_sentences(input, language="en")
response = requests.post(
API_URL,
json={
"text_lines": sentence_batch,
"source_language": "en",
"target_language": "ml"
},
)
output = json.loads(response.text)
return {"output":output["text_lines"][0]}
app.mount("/", StaticFiles(directory="static", html=True), name="static")
@app.get("/")
def index() -> FileResponse:
return FileResponse(path="/app/static/index.html", media_type="text/html")