Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,11 @@ import torch
|
|
3 |
import pandas as pd
|
4 |
import PyPDF2
|
5 |
import pickle
|
|
|
6 |
from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig
|
7 |
from huggingface_hub import login, hf_hub_download
|
8 |
import time
|
9 |
-
from ch09util import subsequent_mask, create_model
|
10 |
|
11 |
# Device setup
|
12 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -18,9 +19,15 @@ st.set_page_config(
|
|
18 |
layout="centered"
|
19 |
)
|
20 |
|
21 |
-
# Model repository name
|
22 |
MODEL_NAME = "amiguel/custom-en2fr-transformer-v1"
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# Title with rocket emojis
|
25 |
st.title("🚀 English to French Translator 🚀")
|
26 |
|
@@ -28,12 +35,8 @@ st.title("🚀 English to French Translator 🚀")
|
|
28 |
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
|
29 |
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
|
30 |
|
31 |
-
# Sidebar configuration
|
32 |
with st.sidebar:
|
33 |
-
st.header("Authentication 🔒")
|
34 |
-
hf_token = st.text_input("Hugging Face Token", type="password",
|
35 |
-
help="Get your token from https://huggingface.co/settings/tokens")
|
36 |
-
|
37 |
st.header("Upload Documents 📂")
|
38 |
uploaded_file = st.file_uploader(
|
39 |
"Choose a PDF or XLSX file to translate",
|
@@ -64,18 +67,14 @@ def process_file(uploaded_file):
|
|
64 |
|
65 |
# Custom model loading function
|
66 |
@st.cache_resource
|
67 |
-
def load_model_and_resources(
|
68 |
try:
|
69 |
-
|
70 |
-
st.error("🔐 Authentication required! Please provide a Hugging Face token.")
|
71 |
-
return None
|
72 |
-
|
73 |
-
login(token=hf_token)
|
74 |
|
75 |
# Load tokenizer from the model repo
|
76 |
tokenizer = AutoTokenizer.from_pretrained(
|
77 |
MODEL_NAME,
|
78 |
-
token=
|
79 |
)
|
80 |
|
81 |
# Define Transformer configuration
|
@@ -109,7 +108,7 @@ def load_model_and_resources(hf_token):
|
|
109 |
return self.model(src, tgt, src_mask, tgt_mask)
|
110 |
|
111 |
# Load config with validation from the model repo
|
112 |
-
config_dict = TransformerConfig.from_pretrained(MODEL_NAME, token=
|
113 |
if "src_vocab_size" not in config_dict or "tgt_vocab_size" not in config_dict:
|
114 |
st.warning(
|
115 |
f"Config at {MODEL_NAME}/config.json is missing 'src_vocab_size' or 'tgt_vocab_size'. "
|
@@ -121,7 +120,7 @@ def load_model_and_resources(hf_token):
|
|
121 |
|
122 |
# Initialize model on meta device and load weights explicitly
|
123 |
model = CustomTransformer(config)
|
124 |
-
weights_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors", token=
|
125 |
from safetensors.torch import load_file
|
126 |
state_dict = load_file(weights_path)
|
127 |
model.load_state_dict(state_dict)
|
@@ -136,7 +135,7 @@ def load_model_and_resources(hf_token):
|
|
136 |
model.eval()
|
137 |
|
138 |
# Load dictionaries from the model repo
|
139 |
-
dict_path = hf_hub_download(repo_id=MODEL_NAME, filename="dict.p", token=
|
140 |
with open(dict_path, "rb") as fb:
|
141 |
en_word_dict, en_idx_dict, fr_word_dict, fr_idx_dict = pickle.load(fb)
|
142 |
|
@@ -190,15 +189,11 @@ for message in st.session_state.messages:
|
|
190 |
|
191 |
# Chat input handling
|
192 |
if prompt := st.chat_input("Enter text to translate into French..."):
|
193 |
-
if not hf_token:
|
194 |
-
st.error("🔑 Authentication required!")
|
195 |
-
st.stop()
|
196 |
-
|
197 |
# Load model and resources if not already loaded
|
198 |
if "model" not in st.session_state:
|
199 |
-
model_data = load_model_and_resources(
|
200 |
if model_data is None:
|
201 |
-
st.error("Failed to load model. Please check
|
202 |
st.stop()
|
203 |
|
204 |
st.session_state.model, st.session_state.tokenizer, \
|
|
|
3 |
import pandas as pd
|
4 |
import PyPDF2
|
5 |
import pickle
|
6 |
+
import os
|
7 |
from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig
|
8 |
from huggingface_hub import login, hf_hub_download
|
9 |
import time
|
10 |
+
from ch09util import subsequent_mask, create_model
|
11 |
|
12 |
# Device setup
|
13 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
19 |
layout="centered"
|
20 |
)
|
21 |
|
22 |
+
# Model repository name
|
23 |
MODEL_NAME = "amiguel/custom-en2fr-transformer-v1"
|
24 |
|
25 |
+
# Retrieve Hugging Face token from environment variable
|
26 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
27 |
+
if not HF_TOKEN:
|
28 |
+
st.error("🔐 Hugging Face token not found in environment variables. Please set HF_TOKEN in Space secrets.")
|
29 |
+
st.stop()
|
30 |
+
|
31 |
# Title with rocket emojis
|
32 |
st.title("🚀 English to French Translator 🚀")
|
33 |
|
|
|
35 |
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
|
36 |
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
|
37 |
|
38 |
+
# Sidebar configuration (removed token input)
|
39 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
40 |
st.header("Upload Documents 📂")
|
41 |
uploaded_file = st.file_uploader(
|
42 |
"Choose a PDF or XLSX file to translate",
|
|
|
67 |
|
68 |
# Custom model loading function
|
69 |
@st.cache_resource
|
70 |
+
def load_model_and_resources():
|
71 |
try:
|
72 |
+
login(token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
73 |
|
74 |
# Load tokenizer from the model repo
|
75 |
tokenizer = AutoTokenizer.from_pretrained(
|
76 |
MODEL_NAME,
|
77 |
+
token=HF_TOKEN
|
78 |
)
|
79 |
|
80 |
# Define Transformer configuration
|
|
|
108 |
return self.model(src, tgt, src_mask, tgt_mask)
|
109 |
|
110 |
# Load config with validation from the model repo
|
111 |
+
config_dict = TransformerConfig.from_pretrained(MODEL_NAME, token=HF_TOKEN).to_dict()
|
112 |
if "src_vocab_size" not in config_dict or "tgt_vocab_size" not in config_dict:
|
113 |
st.warning(
|
114 |
f"Config at {MODEL_NAME}/config.json is missing 'src_vocab_size' or 'tgt_vocab_size'. "
|
|
|
120 |
|
121 |
# Initialize model on meta device and load weights explicitly
|
122 |
model = CustomTransformer(config)
|
123 |
+
weights_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors", token=HF_TOKEN)
|
124 |
from safetensors.torch import load_file
|
125 |
state_dict = load_file(weights_path)
|
126 |
model.load_state_dict(state_dict)
|
|
|
135 |
model.eval()
|
136 |
|
137 |
# Load dictionaries from the model repo
|
138 |
+
dict_path = hf_hub_download(repo_id=MODEL_NAME, filename="dict.p", token=HF_TOKEN)
|
139 |
with open(dict_path, "rb") as fb:
|
140 |
en_word_dict, en_idx_dict, fr_word_dict, fr_idx_dict = pickle.load(fb)
|
141 |
|
|
|
189 |
|
190 |
# Chat input handling
|
191 |
if prompt := st.chat_input("Enter text to translate into French..."):
|
|
|
|
|
|
|
|
|
192 |
# Load model and resources if not already loaded
|
193 |
if "model" not in st.session_state:
|
194 |
+
model_data = load_model_and_resources()
|
195 |
if model_data is None:
|
196 |
+
st.error("Failed to load model. Please check the HF_TOKEN in Space secrets and try again.")
|
197 |
st.stop()
|
198 |
|
199 |
st.session_state.model, st.session_state.tokenizer, \
|