amiguel commited on
Commit
26726c7
·
verified ·
1 Parent(s): 6964639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -23
app.py CHANGED
@@ -3,10 +3,11 @@ import torch
3
  import pandas as pd
4
  import PyPDF2
5
  import pickle
 
6
  from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig
7
  from huggingface_hub import login, hf_hub_download
8
  import time
9
- from ch09util import subsequent_mask, create_model # Import from ch09util.py in the Space repo
10
 
11
  # Device setup
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -18,9 +19,15 @@ st.set_page_config(
18
  layout="centered"
19
  )
20
 
21
- # Model repository name (corrected to the actual model repo)
22
  MODEL_NAME = "amiguel/custom-en2fr-transformer-v1"
23
 
 
 
 
 
 
 
24
  # Title with rocket emojis
25
  st.title("🚀 English to French Translator 🚀")
26
 
@@ -28,12 +35,8 @@ st.title("🚀 English to French Translator 🚀")
28
  USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
29
  BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
30
 
31
- # Sidebar configuration
32
  with st.sidebar:
33
- st.header("Authentication 🔒")
34
- hf_token = st.text_input("Hugging Face Token", type="password",
35
- help="Get your token from https://huggingface.co/settings/tokens")
36
-
37
  st.header("Upload Documents 📂")
38
  uploaded_file = st.file_uploader(
39
  "Choose a PDF or XLSX file to translate",
@@ -64,18 +67,14 @@ def process_file(uploaded_file):
64
 
65
  # Custom model loading function
66
  @st.cache_resource
67
- def load_model_and_resources(hf_token):
68
  try:
69
- if not hf_token:
70
- st.error("🔐 Authentication required! Please provide a Hugging Face token.")
71
- return None
72
-
73
- login(token=hf_token)
74
 
75
  # Load tokenizer from the model repo
76
  tokenizer = AutoTokenizer.from_pretrained(
77
  MODEL_NAME,
78
- token=hf_token
79
  )
80
 
81
  # Define Transformer configuration
@@ -109,7 +108,7 @@ def load_model_and_resources(hf_token):
109
  return self.model(src, tgt, src_mask, tgt_mask)
110
 
111
  # Load config with validation from the model repo
112
- config_dict = TransformerConfig.from_pretrained(MODEL_NAME, token=hf_token).to_dict()
113
  if "src_vocab_size" not in config_dict or "tgt_vocab_size" not in config_dict:
114
  st.warning(
115
  f"Config at {MODEL_NAME}/config.json is missing 'src_vocab_size' or 'tgt_vocab_size'. "
@@ -121,7 +120,7 @@ def load_model_and_resources(hf_token):
121
 
122
  # Initialize model on meta device and load weights explicitly
123
  model = CustomTransformer(config)
124
- weights_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors", token=hf_token)
125
  from safetensors.torch import load_file
126
  state_dict = load_file(weights_path)
127
  model.load_state_dict(state_dict)
@@ -136,7 +135,7 @@ def load_model_and_resources(hf_token):
136
  model.eval()
137
 
138
  # Load dictionaries from the model repo
139
- dict_path = hf_hub_download(repo_id=MODEL_NAME, filename="dict.p", token=hf_token)
140
  with open(dict_path, "rb") as fb:
141
  en_word_dict, en_idx_dict, fr_word_dict, fr_idx_dict = pickle.load(fb)
142
 
@@ -190,15 +189,11 @@ for message in st.session_state.messages:
190
 
191
  # Chat input handling
192
  if prompt := st.chat_input("Enter text to translate into French..."):
193
- if not hf_token:
194
- st.error("🔑 Authentication required!")
195
- st.stop()
196
-
197
  # Load model and resources if not already loaded
198
  if "model" not in st.session_state:
199
- model_data = load_model_and_resources(hf_token)
200
  if model_data is None:
201
- st.error("Failed to load model. Please check your token and try again.")
202
  st.stop()
203
 
204
  st.session_state.model, st.session_state.tokenizer, \
 
3
  import pandas as pd
4
  import PyPDF2
5
  import pickle
6
+ import os
7
  from transformers import AutoTokenizer, PreTrainedModel, PretrainedConfig
8
  from huggingface_hub import login, hf_hub_download
9
  import time
10
+ from ch09util import subsequent_mask, create_model
11
 
12
  # Device setup
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
19
  layout="centered"
20
  )
21
 
22
+ # Model repository name
23
  MODEL_NAME = "amiguel/custom-en2fr-transformer-v1"
24
 
25
+ # Retrieve Hugging Face token from environment variable
26
+ HF_TOKEN = os.environ.get("HF_TOKEN")
27
+ if not HF_TOKEN:
28
+ st.error("🔐 Hugging Face token not found in environment variables. Please set HF_TOKEN in Space secrets.")
29
+ st.stop()
30
+
31
  # Title with rocket emojis
32
  st.title("🚀 English to French Translator 🚀")
33
 
 
35
  USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
36
  BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
37
 
38
+ # Sidebar configuration (removed token input)
39
  with st.sidebar:
 
 
 
 
40
  st.header("Upload Documents 📂")
41
  uploaded_file = st.file_uploader(
42
  "Choose a PDF or XLSX file to translate",
 
67
 
68
  # Custom model loading function
69
  @st.cache_resource
70
+ def load_model_and_resources():
71
  try:
72
+ login(token=HF_TOKEN)
 
 
 
 
73
 
74
  # Load tokenizer from the model repo
75
  tokenizer = AutoTokenizer.from_pretrained(
76
  MODEL_NAME,
77
+ token=HF_TOKEN
78
  )
79
 
80
  # Define Transformer configuration
 
108
  return self.model(src, tgt, src_mask, tgt_mask)
109
 
110
  # Load config with validation from the model repo
111
+ config_dict = TransformerConfig.from_pretrained(MODEL_NAME, token=HF_TOKEN).to_dict()
112
  if "src_vocab_size" not in config_dict or "tgt_vocab_size" not in config_dict:
113
  st.warning(
114
  f"Config at {MODEL_NAME}/config.json is missing 'src_vocab_size' or 'tgt_vocab_size'. "
 
120
 
121
  # Initialize model on meta device and load weights explicitly
122
  model = CustomTransformer(config)
123
+ weights_path = hf_hub_download(repo_id=MODEL_NAME, filename="model.safetensors", token=HF_TOKEN)
124
  from safetensors.torch import load_file
125
  state_dict = load_file(weights_path)
126
  model.load_state_dict(state_dict)
 
135
  model.eval()
136
 
137
  # Load dictionaries from the model repo
138
+ dict_path = hf_hub_download(repo_id=MODEL_NAME, filename="dict.p", token=HF_TOKEN)
139
  with open(dict_path, "rb") as fb:
140
  en_word_dict, en_idx_dict, fr_word_dict, fr_idx_dict = pickle.load(fb)
141
 
 
189
 
190
  # Chat input handling
191
  if prompt := st.chat_input("Enter text to translate into French..."):
 
 
 
 
192
  # Load model and resources if not already loaded
193
  if "model" not in st.session_state:
194
+ model_data = load_model_and_resources()
195
  if model_data is None:
196
+ st.error("Failed to load model. Please check the HF_TOKEN in Space secrets and try again.")
197
  st.stop()
198
 
199
  st.session_state.model, st.session_state.tokenizer, \