Spaces:
Runtime error
Runtime error
rajeshradhakrishnan
commited on
Commit
•
f3b1912
1
Parent(s):
74a5927
English-Malayalam Translate v10
Browse files- Dockerfile +12 -4
- main.py +36 -0
- requirements.txt +6 -7
- static/index.html +31 -0
- static/script.js +84 -0
- static/style.css +105 -0
- translate.py +0 -56
Dockerfile
CHANGED
@@ -5,13 +5,21 @@ FROM python:3.9
|
|
5 |
|
6 |
WORKDIR /code
|
7 |
|
8 |
-
ARG TRANSFORMERS_CACHE=/code/translate_cache
|
9 |
-
|
10 |
COPY ./requirements.txt /code/requirements.txt
|
11 |
|
12 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
13 |
|
14 |
COPY . .
|
15 |
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
WORKDIR /code
|
7 |
|
|
|
|
|
8 |
COPY ./requirements.txt /code/requirements.txt
|
9 |
|
10 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
11 |
|
12 |
COPY . .
|
13 |
|
14 |
+
RUN useradd -m -u 1000 user
|
15 |
+
|
16 |
+
USER user
|
17 |
+
|
18 |
+
ENV HOME=/home/user \
|
19 |
+
PATH=/home/user/.local/bin:$PATH
|
20 |
+
|
21 |
+
WORKDIR $HOME/app
|
22 |
+
|
23 |
+
COPY --chown=user . $HOME/app
|
24 |
+
|
25 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.staticfiles import StaticFiles
|
3 |
+
from fastapi.responses import FileResponse
|
4 |
+
from fastapi.templating import Jinja2Templates
|
5 |
+
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
6 |
+
|
7 |
+
|
8 |
+
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
|
9 |
+
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
|
10 |
+
|
11 |
+
app = FastAPI()
|
12 |
+
templates = Jinja2Templates(directory="templates")
|
13 |
+
|
14 |
+
@app.get("/infer_t5")
|
15 |
+
def t5(input):
|
16 |
+
model_inputs = tokenizer(input, return_tensors="pt")
|
17 |
+
|
18 |
+
# translate from English to Malayalam
|
19 |
+
generated_tokens = model.generate(
|
20 |
+
**model_inputs,
|
21 |
+
forced_bos_token_id=tokenizer.lang_code_to_id["ml_IN"]
|
22 |
+
)
|
23 |
+
|
24 |
+
output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
25 |
+
return {"output":output}
|
26 |
+
|
27 |
+
app.mount("/", StaticFiles(directory="static", html=True), name="static")
|
28 |
+
|
29 |
+
# @app.get("/")
|
30 |
+
# def index() -> FileResponse:
|
31 |
+
# return FileResponse(path="/app/static/index.html", media_type="text/html")
|
32 |
+
|
33 |
+
@app.get("/")
|
34 |
+
async def index():
|
35 |
+
apikey = {"APIKEY": os.environ.get("API_KEY")}
|
36 |
+
return templates.TemplateResponse("index.html", {"apikey": apikey})
|
requirements.txt
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
sentencepiece
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
gunicorn==20.1.0
|
|
|
1 |
+
fastapi==0.74.*
|
2 |
+
requests==2.27.*
|
3 |
+
sentencepiece==0.1.*
|
4 |
+
torch==1.11.*
|
5 |
+
transformers==4.*
|
6 |
+
uvicorn[standard]==0.17.*
|
|
static/index.html
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>JavaScipt Open Assistant Clone</title>
|
6 |
+
<link rel="stylesheet" href="style.css">
|
7 |
+
</head>
|
8 |
+
<body>
|
9 |
+
<section class="side-bar">
|
10 |
+
<button>New Chat</button>
|
11 |
+
<div class="history"></div>
|
12 |
+
<nav>
|
13 |
+
<p>Made by Rajesh</p>
|
14 |
+
</nav>
|
15 |
+
</section>
|
16 |
+
<section class="main">
|
17 |
+
<h1>Rajesh - Open Assistant</h1>
|
18 |
+
<p id="output"></p>
|
19 |
+
<div class="bottom-section">
|
20 |
+
<div class="input-container">
|
21 |
+
<input>
|
22 |
+
<div id="submit">➢</div>
|
23 |
+
</div>
|
24 |
+
</div>
|
25 |
+
<p class="info">Open Assistant - This is the 4th iteration English
|
26 |
+
supervised-fine-tuning (SFT) model of the Open-Assistant project.
|
27 |
+
</p>
|
28 |
+
</section>
|
29 |
+
<script src="script.js"></script>
|
30 |
+
</body>
|
31 |
+
</html>
|
static/script.js
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const API_KEY = {{ apikey.value }};
|
2 |
+
|
3 |
+
const translateText = async (text) => {
|
4 |
+
const inferResponse = await fetch(`infer_t5?input=${text}`);
|
5 |
+
const inferJson = await inferResponse.json();
|
6 |
+
|
7 |
+
return inferJson.output;
|
8 |
+
};
|
9 |
+
|
10 |
+
|
11 |
+
function generatePrompterAssistantText(inputString) {
|
12 |
+
// Split the input string into an array of sentences
|
13 |
+
const sentences = inputString.split('<|endoftext|>');
|
14 |
+
|
15 |
+
// Initialize arrays for prompter and assistant text
|
16 |
+
let prompterText = [];
|
17 |
+
let assistantText = [];
|
18 |
+
|
19 |
+
// Loop through each sentence and add it to the prompter or assistant text array
|
20 |
+
for (let i = 0; i < sentences.length; i++) {
|
21 |
+
// Check if the sentence contains the <prompter> tag
|
22 |
+
if (sentences[i].includes('<|prompter|>')) {
|
23 |
+
// Extract the text within the <prompter> tags and add it to the prompter text array
|
24 |
+
const prompterSentence = sentences[i].replace(/<\|prompter\|>/g, '');
|
25 |
+
prompterText.push(prompterSentence);
|
26 |
+
} else if (sentences[i].includes('<|assistant|>')) {
|
27 |
+
const assistantSentence = sentences[i].replace(/<\|assistant\|>/g, '');
|
28 |
+
// Add the sentence to the assistant text array
|
29 |
+
assistantText.push(assistantSentence);
|
30 |
+
}
|
31 |
+
}
|
32 |
+
|
33 |
+
// Return the prompter and assistant text arrays
|
34 |
+
return [prompterText, assistantText];
|
35 |
+
}
|
36 |
+
|
37 |
+
const submitButton = document.querySelector('#submit')
|
38 |
+
const outPutElement = document.querySelector('#output')
|
39 |
+
const inputElement = document.querySelector('input')
|
40 |
+
const historyElement = document.querySelector('.history')
|
41 |
+
const buttonElement = document.querySelector('button')
|
42 |
+
|
43 |
+
|
44 |
+
function changeInput(value)
|
45 |
+
{
|
46 |
+
console.log(value)
|
47 |
+
const inputElement = document.querySelector('input')
|
48 |
+
inputElement.value = value
|
49 |
+
}
|
50 |
+
async function getMessage(){
|
51 |
+
const options = {
|
52 |
+
method: "POST",
|
53 |
+
headers: {
|
54 |
+
Authorization: `Bearer ${API_KEY}`,
|
55 |
+
"Content-Type": "application/json"
|
56 |
+
},
|
57 |
+
body: JSON.stringify({
|
58 |
+
inputs: "<|prompter|>" + inputElement.value + "<|endoftext|><|assistant|>"
|
59 |
+
})
|
60 |
+
}
|
61 |
+
try{
|
62 |
+
const response = await fetch("https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", options);
|
63 |
+
const data = await response.json()
|
64 |
+
console.log(data[0].generated_text)
|
65 |
+
|
66 |
+
if(inputElement.value && data && data[0].generated_text){
|
67 |
+
const [prompterText, assistantText] = generatePrompterAssistantText(data[0].generated_text);
|
68 |
+
outPutElement.textContent = = await translateText(assistantText);
|
69 |
+
const pElement = document.createElement('p')
|
70 |
+
pElement.textContent = inputElement.value
|
71 |
+
pElement.addEventListener('click', () => changeInput(pElement.textContent))
|
72 |
+
historyElement.append(pElement)
|
73 |
+
}
|
74 |
+
} catch(error) {
|
75 |
+
console.log(error)
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
submitButton.addEventListener('click', getMessage)
|
80 |
+
|
81 |
+
function clearInput(){
|
82 |
+
inputElement.value = ''
|
83 |
+
}
|
84 |
+
buttonElement.addEventListener('click', clearInput)
|
static/style.css
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;500;600;700;800&display=swap');
|
2 |
+
|
3 |
+
* {
|
4 |
+
color: #fff;
|
5 |
+
font-family: 'Open Sans', sans-serif;
|
6 |
+
}
|
7 |
+
body {
|
8 |
+
margin: 0;
|
9 |
+
padding: 0;
|
10 |
+
background-color: #343541;
|
11 |
+
display: flex;
|
12 |
+
}
|
13 |
+
h1{
|
14 |
+
font-size: 33px;
|
15 |
+
font-weight: 600;
|
16 |
+
padding: 200px 0;
|
17 |
+
}
|
18 |
+
.side-bar{
|
19 |
+
background-color: #202123;
|
20 |
+
width: 244px;
|
21 |
+
height: 100vh;
|
22 |
+
display: flex;
|
23 |
+
flex-direction: column;
|
24 |
+
justify-content: space-between;
|
25 |
+
}
|
26 |
+
|
27 |
+
.main{
|
28 |
+
display: flex;
|
29 |
+
flex-direction: column;
|
30 |
+
align-items: center;
|
31 |
+
text-align: center;
|
32 |
+
justify-content: space-between;
|
33 |
+
height: 100vh;
|
34 |
+
width: 100%;
|
35 |
+
}
|
36 |
+
|
37 |
+
.bottom-section{
|
38 |
+
width: 100%;
|
39 |
+
display: flex;
|
40 |
+
flex-direction: column;
|
41 |
+
justify-content: center;
|
42 |
+
align-items: center;
|
43 |
+
}
|
44 |
+
.info{
|
45 |
+
color: rgba(255, 255, 255, 0.5);
|
46 |
+
font-size: 11px;
|
47 |
+
padding: 10px;
|
48 |
+
}
|
49 |
+
|
50 |
+
input{
|
51 |
+
border: none;
|
52 |
+
background-color: rgba(255, 255, 255, 0.5);
|
53 |
+
width: 100%;
|
54 |
+
font-size: 20px;
|
55 |
+
padding: 12px 15px;
|
56 |
+
border-radius: 5px;
|
57 |
+
box-shadow: rgba(0, 0, 0, 0.5) 0 54px 55px,
|
58 |
+
rgba(0, 0, 0, 0.5) 0 -12 30px,
|
59 |
+
rgba(0, 0, 0, 0.5) 0 4px 6px,
|
60 |
+
rgba(0, 0, 0, 0.5) 0 12px 3px,
|
61 |
+
rgba(0, 0, 0, 0.5) 0 -3px 5px
|
62 |
+
|
63 |
+
}
|
64 |
+
input:focus{
|
65 |
+
outline: none;
|
66 |
+
}
|
67 |
+
|
68 |
+
.input-container{
|
69 |
+
position: relative;
|
70 |
+
width: 100%;
|
71 |
+
max-width: 650px;
|
72 |
+
}
|
73 |
+
|
74 |
+
.input-container #submit{
|
75 |
+
position: absolute;
|
76 |
+
right: 0;
|
77 |
+
bottom: 15px;
|
78 |
+
cursor: pointer;
|
79 |
+
}
|
80 |
+
|
81 |
+
button{
|
82 |
+
border: solid 0.5px rgba(255, 255, 255, 0.5);
|
83 |
+
background-color: transparent;
|
84 |
+
border-radius: 5px;
|
85 |
+
padding: 10px;
|
86 |
+
margin: 10px;
|
87 |
+
}
|
88 |
+
|
89 |
+
nav{
|
90 |
+
border-top: solid 0.5px rgba(255, 255, 255, 0.5);
|
91 |
+
padding: 10px;
|
92 |
+
margin: 10px;
|
93 |
+
}
|
94 |
+
|
95 |
+
.history{
|
96 |
+
padding: 10px;
|
97 |
+
margin: 10px;
|
98 |
+
display: flex;
|
99 |
+
flex-direction: column;
|
100 |
+
height: 100%;
|
101 |
+
}
|
102 |
+
|
103 |
+
.history p{
|
104 |
+
cursor: pointer;
|
105 |
+
}
|
translate.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import os
|
3 |
-
from transformers.pipelines import pipeline
|
4 |
-
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
|
5 |
-
from flask import Flask, request
|
6 |
-
from flask_restful import Api, Resource
|
7 |
-
from flask_cors import CORS
|
8 |
-
|
9 |
-
app = Flask(__name__)
|
10 |
-
CORS(app)
|
11 |
-
cors = CORS(app, resource={
|
12 |
-
r"/*": {
|
13 |
-
"origins": "*"
|
14 |
-
}
|
15 |
-
})
|
16 |
-
api = Api(app)
|
17 |
-
|
18 |
-
app.config['CORS_HEADERS'] = 'Content-Type'
|
19 |
-
|
20 |
-
class Classifier():
|
21 |
-
|
22 |
-
def __init__(self, data_en):
|
23 |
-
self.model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")
|
24 |
-
self.tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
|
25 |
-
self.model_inputs = self.tokenizer(data_en, return_tensors="pt")
|
26 |
-
|
27 |
-
# translate from English to Malayalam
|
28 |
-
self.generated_tokens = self.model.generate(
|
29 |
-
**self.model_inputs,
|
30 |
-
forced_bos_token_id=self.tokenizer.lang_code_to_id["ml_IN"]
|
31 |
-
)
|
32 |
-
self. translate = self.tokenizer.batch_decode(self.generated_tokens, skip_special_tokens=True)
|
33 |
-
self.data_en = data_en
|
34 |
-
|
35 |
-
def get_translator(self):
|
36 |
-
output = self.translate(self.data_en)
|
37 |
-
return {'output': output}
|
38 |
-
|
39 |
-
|
40 |
-
class Translate(Resource):
|
41 |
-
def post(self):
|
42 |
-
try:
|
43 |
-
# Decode json object from the request
|
44 |
-
json_object = request.get_json()
|
45 |
-
data_en = json_object["text"]
|
46 |
-
obj = Classifier(data_en)
|
47 |
-
except Exception as e:
|
48 |
-
return {"Message": "Error in creating Translator object" + str(e)}
|
49 |
-
status = obj.get_translator()
|
50 |
-
return status
|
51 |
-
|
52 |
-
|
53 |
-
api.add_resource(Translate, '/api/translate')
|
54 |
-
|
55 |
-
if __name__ == '__main__':
|
56 |
-
app.run(host='0.0.0.0', port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|