atifsial123 commited on
Commit
01df9cf
·
verified ·
1 Parent(s): 99fbda0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -18,13 +18,23 @@ import gradio as gr
18
  from transformers import AutoModel, AutoTokenizer
19
  import torch
20
  from sklearn.model_selection import train_test_split
 
 
 
 
 
 
 
 
21
 
22
  # Load your dataset
23
  def load_dataset():
24
- file_path = "Valid-part-2.xlsx"
25
- print(f"Current working directory: {os.getcwd()}")
 
26
  if not os.path.exists(file_path):
27
- raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
 
28
 
29
  try:
30
  df = pd.read_excel(file_path)
@@ -36,8 +46,8 @@ def load_dataset():
36
 
37
  # Preprocess the data
38
  def preprocess_data(df):
39
- # Example preprocessing: You can add more steps as needed
40
- # For now, we're just returning the dataframe as is
41
  return df
42
 
43
  # Train your model
@@ -49,8 +59,8 @@ def train_model(df):
49
  tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
50
  model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
51
 
52
- # Normally, you would fine-tune the model here with your training data
53
- # Since this is an example, we're returning the model as is
54
  return model
55
 
56
  # Define the Gradio interface function
@@ -92,3 +102,4 @@ if __name__ == "__main__":
92
  print("Failed to build the Gradio interface. Please check the dataset and model.")
93
 
94
 
 
 
18
  from transformers import AutoModel, AutoTokenizer
19
  import torch
20
  from sklearn.model_selection import train_test_split
21
+ from google.colab import files
22
+
23
+ # Upload the dataset if running in Google Colab
24
+ def upload_dataset():
25
+ uploaded = files.upload() # This will prompt the file upload
26
+ file_name = list(uploaded.keys())[0]
27
+ file_path = f'/content/{file_name}'
28
+ return file_path
29
 
30
  # Load your dataset
31
  def load_dataset():
32
+ file_path = '/content/Valid-part-2.xlsx' # Default path if the file is uploaded manually to Colab
33
+
34
+ # Check if the file exists
35
  if not os.path.exists(file_path):
36
+ print(f"File not found at '{file_path}', prompting file upload...")
37
+ file_path = upload_dataset() # Upload if not found
38
 
39
  try:
40
  df = pd.read_excel(file_path)
 
46
 
47
  # Preprocess the data
48
  def preprocess_data(df):
49
+ # Add your preprocessing steps here
50
+ # For example: cleaning, tokenization, etc.
51
  return df
52
 
53
  # Train your model
 
59
  tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
60
  model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
61
 
62
+ # Add your training code here
63
+ # This may involve tokenizing the data and feeding it into the model
64
  return model
65
 
66
  # Define the Gradio interface function
 
102
  print("Failed to build the Gradio interface. Please check the dataset and model.")
103
 
104
 
105
+