Spaces:
Sleeping
Sleeping
atifsial123
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -18,13 +18,23 @@ import gradio as gr
|
|
18 |
from transformers import AutoModel, AutoTokenizer
|
19 |
import torch
|
20 |
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Load your dataset
|
23 |
def load_dataset():
|
24 |
-
file_path =
|
25 |
-
|
|
|
26 |
if not os.path.exists(file_path):
|
27 |
-
|
|
|
28 |
|
29 |
try:
|
30 |
df = pd.read_excel(file_path)
|
@@ -36,8 +46,8 @@ def load_dataset():
|
|
36 |
|
37 |
# Preprocess the data
|
38 |
def preprocess_data(df):
|
39 |
-
#
|
40 |
-
# For
|
41 |
return df
|
42 |
|
43 |
# Train your model
|
@@ -49,8 +59,8 @@ def train_model(df):
|
|
49 |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
50 |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
51 |
|
52 |
-
#
|
53 |
-
#
|
54 |
return model
|
55 |
|
56 |
# Define the Gradio interface function
|
@@ -92,3 +102,4 @@ if __name__ == "__main__":
|
|
92 |
print("Failed to build the Gradio interface. Please check the dataset and model.")
|
93 |
|
94 |
|
|
|
|
18 |
from transformers import AutoModel, AutoTokenizer
|
19 |
import torch
|
20 |
from sklearn.model_selection import train_test_split
|
21 |
+
from google.colab import files
|
22 |
+
|
23 |
+
# Upload the dataset if running in Google Colab
|
24 |
+
def upload_dataset():
|
25 |
+
uploaded = files.upload() # This will prompt the file upload
|
26 |
+
file_name = list(uploaded.keys())[0]
|
27 |
+
file_path = f'/content/{file_name}'
|
28 |
+
return file_path
|
29 |
|
30 |
# Load your dataset
|
31 |
def load_dataset():
|
32 |
+
file_path = '/content/Valid-part-2.xlsx' # Default path if the file is uploaded manually to Colab
|
33 |
+
|
34 |
+
# Check if the file exists
|
35 |
if not os.path.exists(file_path):
|
36 |
+
print(f"File not found at '{file_path}', prompting file upload...")
|
37 |
+
file_path = upload_dataset() # Upload if not found
|
38 |
|
39 |
try:
|
40 |
df = pd.read_excel(file_path)
|
|
|
46 |
|
47 |
# Preprocess the data
|
48 |
def preprocess_data(df):
|
49 |
+
# Add your preprocessing steps here
|
50 |
+
# For example: cleaning, tokenization, etc.
|
51 |
return df
|
52 |
|
53 |
# Train your model
|
|
|
59 |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
60 |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
|
61 |
|
62 |
+
# Add your training code here
|
63 |
+
# This may involve tokenizing the data and feeding it into the model
|
64 |
return model
|
65 |
|
66 |
# Define the Gradio interface function
|
|
|
102 |
print("Failed to build the Gradio interface. Please check the dataset and model.")
|
103 |
|
104 |
|
105 |
+
|