Spaces:

karths
/

types_issues

Running on Zero

App Files Files Community

karths commited on Feb 21

Commit

740d8bb

verified ·

1 Parent(s): 1afa8fd

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -58

app.py CHANGED Viewed

@@ -31,33 +31,24 @@ token = os.getenv("hf_token")
 HfFolder.save_token(token)
 login(token)
-# --- Quality Prediction Model Setup ---
 model_paths = [
-    'karths/binary_classification_train_test',
-    "karths/binary_classification_train_process",
-    "karths/binary_classification_train_infrastructure",
-    "karths/binary_classification_train_documentation",
-    "karths/binary_classification_train_design",
-    "karths/binary_classification_train_defect",
-    "karths/binary_classification_train_code",
-    "karths/binary_classification_train_build",
-    "karths/binary_classification_train_automation",
-    "karths/binary_classification_train_people",
-    "karths/binary_classification_train_architecture",
 ]
 quality_mapping = {
-    'binary_classification_train_test': 'Test',
-    'binary_classification_train_process': 'Process',
-    'binary_classification_train_infrastructure': 'Infrastructure',
-    'binary_classification_train_documentation': 'Documentation',
-    'binary_classification_train_design': 'Design',
-    'binary_classification_train_defect': 'Defect',
-    'binary_classification_train_code': 'Code',
-    'binary_classification_train_build': 'Build',
-    'binary_classification_train_automation': 'Automation',
-    'binary_classification_train_people': 'People',
-    'binary_classification_train_architecture': 'Architecture'
 }
 # Pre-load models and tokenizer for quality prediction
@@ -82,9 +73,9 @@ def model_prediction(model, text, device):
     return avg_prob
 # --- Llama 3.2 3B Model Setup ---
-LLAMA_MAX_MAX_NEW_TOKENS = 2048
-LLAMA_DEFAULT_MAX_NEW_TOKENS = 512  # Reduced for efficiency
-LLAMA_MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "2048")) # Reduced
 llama_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # Explicit device
 llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"
 llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_id)
@@ -105,7 +96,7 @@ def llama_generate(
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
-) -> Iterator[str]:
     inputs = llama_tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=LLAMA_MAX_INPUT_TOKEN_LENGTH).to(llama_model.device)
@@ -113,25 +104,24 @@ def llama_generate(
         inputs.input_ids = inputs.input_ids[:, -LLAMA_MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {LLAMA_MAX_INPUT_TOKEN_LENGTH} tokens.")
-    streamer = TextIteratorStreamer(llama_tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        inputs,
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        do_sample=True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        num_beams=1,
-        repetition_penalty=repetition_penalty,
-    )
-    t = Thread(target=llama_model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        yield "".join(outputs)
     torch.cuda.empty_cache()  # Clear cache after each generation
 def generate_explanation(issue_text, top_qualities):
@@ -139,25 +129,27 @@ def generate_explanation(issue_text, top_qualities):
     if not top_qualities:
         return "No explanation available as no quality tags were predicted."
-    prompt = f"""
-    Given the following issue description:
-    ---
-    {issue_text}
-    ---
-    Explain why this issue might be classified under the following quality categories: {', '.join([q[0] for q in top_qualities])}.
-    Provide a concise explanation for each category, relating it back to the issue description.
-    """
-    explanation = ""
     try:
-        for chunk in llama_generate(prompt):
-            explanation += chunk
     except Exception as e:
         logging.error(f"Error during Llama generation: {e}")
         return "An error occurred while generating the explanation."
-    return explanation
-# @spaces.GPU(duration=180)  # Apply the GPU decorator *only* to the main interface
 def main_interface(text):
     if not text.strip():
         return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", "", ""

 HfFolder.save_token(token)
 login(token)
 model_paths = [
+    'karths/binary_classification_train_port',
+    'karths/binary_classification_train_perf',
+    "karths/binary_classification_train_main",
+    "karths/binary_classification_train_secu",
+    "karths/binary_classification_train_reli",
+    "karths/binary_classification_train_usab",
+    "karths/binary_classification_train_comp"
 ]
 quality_mapping = {
+    'binary_classification_train_port': 'Portability',
+    'binary_classification_train_main': 'Maintainability',
+    'binary_classification_train_secu': 'Security',
+    'binary_classification_train_reli': 'Reliability',
+    'binary_classification_train_usab': 'Usability',
+    'binary_classification_train_perf': 'Performance',
+    'binary_classification_train_comp': 'Compatibility'
 }
 # Pre-load models and tokenizer for quality prediction
     return avg_prob
 # --- Llama 3.2 3B Model Setup ---
+LLAMA_MAX_MAX_NEW_TOKENS = 512  # Max tokens for Explanation
+LLAMA_DEFAULT_MAX_NEW_TOKENS = 512  # Max tokens for explantion
+LLAMA_MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "700")) # Reduced
 llama_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # Explicit device
 llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"
 llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_id)
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
+) -> str: # Return string, not iterator
     inputs = llama_tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=LLAMA_MAX_INPUT_TOKEN_LENGTH).to(llama_model.device)
         inputs.input_ids = inputs.input_ids[:, -LLAMA_MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {LLAMA_MAX_INPUT_TOKEN_LENGTH} tokens.")
+    # Generate *without* streaming
+    with torch.no_grad():  # Ensure no gradient calculation
+        generate_ids = llama_model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            num_beams=1,
+            repetition_penalty=repetition_penalty,
+            pad_token_id=llama_tokenizer.pad_token_id,  # Pass pad_token_id here
+            eos_token_id=llama_tokenizer.eos_token_id,  # Pass eos_token_id here
+        )
+    output_text = llama_tokenizer.decode(generate_ids[0], skip_special_tokens=True)
     torch.cuda.empty_cache()  # Clear cache after each generation
+    return output_text
 def generate_explanation(issue_text, top_qualities):
     if not top_qualities:
         return "No explanation available as no quality tags were predicted."
+    # Build the prompt, explicitly mentioning each quality
+    prompt_parts = [
+        "Given the following issue description:\n---\n",
+        issue_text,
+        "\n---\n",
+        "Explain why this issue might be classified under the following quality categories. Provide a concise explanation for each category, relating it back to the issue description:\n"
+    ]
+    for quality, _ in top_qualities:  # Iterate through qualities
+        prompt_parts.append(f"- {quality}\n")
+    prompt = "".join(prompt_parts)
     try:
+        explanation = llama_generate(prompt)  # Get the explanation (not streamed)
+        return explanation
     except Exception as e:
         logging.error(f"Error during Llama generation: {e}")
         return "An error occurred while generating the explanation."
+ # @spaces.GPU(duration=60)  # Apply the GPU decorator *only* to the main interface
 def main_interface(text):
     if not text.strip():
         return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", "", ""