Spaces:

atifsial123
/

Engineer

Sleeping

atifsial123 commited on Sep 2, 2024

Commit

01df9cf

verified ·

1 Parent(s): 99fbda0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,13 +18,23 @@ import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 import torch
 from sklearn.model_selection import train_test_split
 # Load your dataset
 def load_dataset():
-    file_path = "Valid-part-2.xlsx"
-    print(f"Current working directory: {os.getcwd()}")
     if not os.path.exists(file_path):
-        raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
     try:
         df = pd.read_excel(file_path)
@@ -36,8 +46,8 @@ def load_dataset():
 # Preprocess the data
 def preprocess_data(df):
-    # Example preprocessing: You can add more steps as needed
-    # For now, we're just returning the dataframe as is
     return df
 # Train your model
@@ -49,8 +59,8 @@ def train_model(df):
     tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
     model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
-    # Normally, you would fine-tune the model here with your training data
-    # Since this is an example, we're returning the model as is
     return model
 # Define the Gradio interface function
@@ -92,3 +102,4 @@ if __name__ == "__main__":
         print("Failed to build the Gradio interface. Please check the dataset and model.")

 from transformers import AutoModel, AutoTokenizer
 import torch
 from sklearn.model_selection import train_test_split
+from google.colab import files
+# Upload the dataset if running in Google Colab
+def upload_dataset():
+    uploaded = files.upload()  # This will prompt the file upload
+    file_name = list(uploaded.keys())[0]
+    file_path = f'/content/{file_name}'
+    return file_path
 # Load your dataset
 def load_dataset():
+    file_path = '/content/Valid-part-2.xlsx'  # Default path if the file is uploaded manually to Colab
+    # Check if the file exists
     if not os.path.exists(file_path):
+        print(f"File not found at '{file_path}', prompting file upload...")
+        file_path = upload_dataset()  # Upload if not found
     try:
         df = pd.read_excel(file_path)
 # Preprocess the data
 def preprocess_data(df):
+    # Add your preprocessing steps here
+    # For example: cleaning, tokenization, etc.
     return df
 # Train your model
     tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
     model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
+    # Add your training code here
+    # This may involve tokenizing the data and feeding it into the model
     return model
 # Define the Gradio interface function
         print("Failed to build the Gradio interface. Please check the dataset and model.")