Sephfox commited on
Commit
289ccd4
·
verified ·
1 Parent(s): 327adad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -35,7 +35,6 @@ def setup_cyberpunk_style():
35
  from {text-shadow: 0 0 5px #00ff9d, 0 0 10px #00ff9d;}
36
  to {text-shadow: 0 0 15px #00b8ff, 0 0 20px #00b8ff;}
37
  }
38
-
39
  .stButton > button {
40
  font-family: 'Orbitron', sans-serif;
41
  background: linear-gradient(45deg, #00ff9d, #00b8ff);
@@ -51,7 +50,6 @@ def setup_cyberpunk_style():
51
  transform: scale(1.1);
52
  box-shadow: 0 0 20px rgba(0, 255, 157, 0.5);
53
  }
54
-
55
  .progress-bar-container {
56
  background: rgba(0, 0, 0, 0.5);
57
  border-radius: 15px;
@@ -115,12 +113,20 @@ def initialize_model(model_name="gpt2"):
115
  tokenizer.pad_token = tokenizer.eos_token
116
  return model, tokenizer
117
 
118
- # Load Dataset Function
119
- def load_dataset(data_source="demo", tokenizer=None):
120
  if data_source == "demo":
121
  data = ["Sample text data for model training. This can be replaced with actual data for better performance."]
 
 
 
 
 
 
 
122
  else:
123
- data = ["Loaded data from uploaded text file."]
 
124
  dataset = prepare_dataset(data, tokenizer)
125
  return dataset
126
 
@@ -160,53 +166,35 @@ def main():
160
  with st.sidebar:
161
  st.markdown("### Configuration Panel")
162
 
163
- # Hugging Face API Token Input
164
- hf_token = st.text_input("Enter your Hugging Face Token", type="password")
165
- if hf_token:
166
- api = HfApi()
167
- api.set_access_token(hf_token)
168
- st.success("Hugging Face token added successfully!")
169
-
170
- # Training Parameters
171
- training_epochs = st.slider("Training Epochs", min_value=1, max_value=5, value=3)
172
- batch_size = st.slider("Batch Size", min_value=2, max_value=8, value=4)
173
- model_choice = st.selectbox("Model Selection", ("gpt2", "distilgpt2", "gpt2-medium"))
174
-
175
- # Dataset Source Selection
176
- data_source = st.selectbox("Data Source", ("demo", "uploaded file"))
177
- if data_source == "uploaded file":
178
- uploaded_file = st.file_uploader("Upload a text file", type=["txt", "csv"])
179
-
180
- custom_learning_rate = st.slider("Learning Rate", min_value=1e-6, max_value=5e-4, value=3e-5, step=1e-6)
181
-
182
- # Advanced Settings Toggle
183
- advanced_toggle = st.checkbox("Advanced Training Settings")
184
- if advanced_toggle:
185
- warmup_steps = st.slider("Warmup Steps", min_value=0, max_value=500, value=100)
186
- weight_decay = st.slider("Weight Decay", min_value=0.0, max_value=0.1, step=0.01, value=0.01)
187
- else:
188
- warmup_steps = 100
189
- weight_decay = 0.01
190
 
 
 
 
 
 
 
 
 
191
 
192
  # Load Dataset
193
- train_dataset = load_dataset(data_source, tokenizer, uploaded_file=uploaded_file)
194
- def load_dataset(data_source="demo", tokenizer=None, uploaded_file=None):
195
- if data_source == "demo":
196
- data = ["Sample text data for model training. This can be replaced with actual data for better performance."]
197
- elif uploaded_file is not None:
198
- if uploaded_file.name.endswith(".txt"):
199
- data = [uploaded_file.read().decode("utf-8")]
200
- elif uploaded_file.name.endswith(".csv"):
201
- import pandas as pd
202
- df = pd.read_csv(uploaded_file)
203
- data = df[df.columns[0]].tolist() # assuming first column is text data
204
- else:
205
- data = ["No file uploaded. Please upload a dataset."]
206
-
207
- dataset = prepare_dataset(data, tokenizer)
208
- return dataset
209
-
210
 
211
  # Start Training with Progress Bar
212
  progress_placeholder = st.empty()
@@ -227,3 +215,4 @@ def load_dataset(data_source="demo", tokenizer=None, uploaded_file=None):
227
 
228
  if __name__ == "__main__":
229
  main()
 
 
35
  from {text-shadow: 0 0 5px #00ff9d, 0 0 10px #00ff9d;}
36
  to {text-shadow: 0 0 15px #00b8ff, 0 0 20px #00b8ff;}
37
  }
 
38
  .stButton > button {
39
  font-family: 'Orbitron', sans-serif;
40
  background: linear-gradient(45deg, #00ff9d, #00b8ff);
 
50
  transform: scale(1.1);
51
  box-shadow: 0 0 20px rgba(0, 255, 157, 0.5);
52
  }
 
53
  .progress-bar-container {
54
  background: rgba(0, 0, 0, 0.5);
55
  border-radius: 15px;
 
113
  tokenizer.pad_token = tokenizer.eos_token
114
  return model, tokenizer
115
 
116
+ # Load Dataset Function with Uploaded File Option
117
+ def load_dataset(data_source="demo", tokenizer=None, uploaded_file=None):
118
  if data_source == "demo":
119
  data = ["Sample text data for model training. This can be replaced with actual data for better performance."]
120
+ elif uploaded_file is not None:
121
+ if uploaded_file.name.endswith(".txt"):
122
+ data = [uploaded_file.read().decode("utf-8")]
123
+ elif uploaded_file.name.endswith(".csv"):
124
+ import pandas as pd
125
+ df = pd.read_csv(uploaded_file)
126
+ data = df[df.columns[0]].tolist() # assuming first column is text data
127
  else:
128
+ data = ["No file uploaded. Please upload a dataset."]
129
+
130
  dataset = prepare_dataset(data, tokenizer)
131
  return dataset
132
 
 
166
  with st.sidebar:
167
  st.markdown("### Configuration Panel")
168
 
169
+ # Hugging Face API Token Input
170
+ hf_token = st.text_input("Enter your Hugging Face Token", type="password")
171
+ if hf_token:
172
+ api = HfApi()
173
+ api.set_access_token(hf_token)
174
+ st.success("Hugging Face token added successfully!")
175
+
176
+ # Training Parameters
177
+ training_epochs = st.slider("Training Epochs", min_value=1, max_value=5, value=3)
178
+ batch_size = st.slider("Batch Size", min_value=2, max_value=8, value=4)
179
+ model_choice = st.selectbox("Model Selection", ("gpt2", "distilgpt2", "gpt2-medium"))
180
+
181
+ # Dataset Source Selection
182
+ data_source = st.selectbox("Data Source", ("demo", "uploaded file"))
183
+ uploaded_file = st.file_uploader("Upload a text file", type=["txt", "csv"]) if data_source == "uploaded file" else None
184
+
185
+ custom_learning_rate = st.slider("Learning Rate", min_value=1e-6, max_value=5e-4, value=3e-5, step=1e-6)
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # Advanced Settings Toggle
188
+ advanced_toggle = st.checkbox("Advanced Training Settings")
189
+ if advanced_toggle:
190
+ warmup_steps = st.slider("Warmup Steps", min_value=0, max_value=500, value=100)
191
+ weight_decay = st.slider("Weight Decay", min_value=0.0, max_value=0.1, step=0.01, value=0.01)
192
+ else:
193
+ warmup_steps = 100
194
+ weight_decay = 0.01
195
 
196
  # Load Dataset
197
+ train_dataset = load_dataset(data_source, tokenizer, uploaded_file=uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  # Start Training with Progress Bar
200
  progress_placeholder = st.empty()
 
215
 
216
  if __name__ == "__main__":
217
  main()
218
+