Spaces:

bambadij
/

summaryT5

Running

App Files Files Community

bambadij commited on Sep 18, 2024

Commit

b1435f0

verified ·

1 Parent(s): 48333ed

update

Browse files

Files changed (1) hide show

app.py +19 -7

app.py CHANGED Viewed

@@ -136,33 +136,45 @@ async def generate_text(request: RequestModel):
     return {"summary_text_2": generated_text}
 @app.post("/generate2/")
 async def generate_text(file: UploadFile = File(...)):
     # Read the uploaded CSV file
     try:
-        contents = await file.read()
-        df = pd.read_csv(StringIO(contents.decode('utf-8')))
     except Exception as e:
         return {"error": f"Error reading CSV file: {str(e)}"}
-    # Concatenate all rows into a single string
     try:
-        # Convert the entire DataFrame to a string
         text_to_generate = df.to_string(index=False)
     except Exception as e:
         return {"error": f"Error converting DataFrame to string: {str(e)}"}
     # Create the request for the API
     try:
         completion = client.chat.completions.create(
             model="meta/llama-3.1-8b-instruct",
-            messages=[{"role": "user", "content": prompt1  + text_to_generate}],
             temperature=0.2,
             top_p=0.9,
-            # max_tokens=1024,
             stream=True
         )
     except Exception as e:
         return {"error": f"Error generating text: {str(e)}"}
     generated_text = ""
     for chunk in completion:
         if chunk.choices[0].delta.content is not None:

     return {"summary_text_2": generated_text}
 @app.post("/generate2/")
 async def generate_text(file: UploadFile = File(...)):
+    # Check file size
+    contents = await file.read()
+    file_size = len(contents)
+    if file_size > 5_000_000:  # 5MB limit
+        return {"error": "File size exceeds the 5MB limit. The file will be sampled."}
     # Read the uploaded CSV file
     try:
+        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))
     except Exception as e:
         return {"error": f"Error reading CSV file: {str(e)}"}
+    # Sample the data if it's too large
+    if len(df) > 1000:  # Adjust this number based on your needs
+        df = df.sample(n=100, random_state=42)
+    # Convert the DataFrame to a string
     try:
         text_to_generate = df.to_string(index=False)
     except Exception as e:
         return {"error": f"Error converting DataFrame to string: {str(e)}"}
+    # Ensure the generated text is within size limits
+    if len(text_to_generate.encode('utf-8')) > 5_000_000:
+        return {"error": "Generated text exceeds size limit even after sampling. Please reduce the data further."}
     # Create the request for the API
     try:
         completion = client.chat.completions.create(
             model="meta/llama-3.1-8b-instruct",
+            messages=[{"role": "user", "content": prompt1 + text_to_generate}],
             temperature=0.2,
             top_p=0.9,
             stream=True
         )
     except Exception as e:
         return {"error": f"Error generating text: {str(e)}"}
     generated_text = ""
     for chunk in completion:
         if chunk.choices[0].delta.content is not None: