change pipeline to app

Files changed (4) hide show

app.py +56 -0
config.json +0 -3
pipeline.py +0 -63
requirements.txt +4 -4

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# app.py
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing.text import tokenizer_from_json
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+import json
+from typing import Union, List
+app = FastAPI()
+# Load model and tokenizer
+model = load_model('news_classifier.h5')
+with open('tokenizer.json', 'r') as f:
+    tokenizer_data = json.load(f)
+    tokenizer = tokenizer_from_json(tokenizer_data)
+class PredictionInput(BaseModel):
+    text: Union[str, List[str]]
+class PredictionOutput(BaseModel):
+    label: str
+    score: float
+@app.post("/predict")
+async def predict(input_data: PredictionInput):
+    try:
+        # Convert input to list if it's a single string
+        texts = input_data.text if isinstance(input_data.text, list) else [input_data.text]
+        # Preprocess
+        sequences = tokenizer.texts_to_sequences(texts)
+        padded = pad_sequences(sequences, maxlen=41)  # Use your model's expected input length
+        # Predict
+        predictions = model.predict(padded)
+        # Format results
+        results = []
+        for pred in predictions:
+            score = float(pred[1])  # Assuming binary classification
+            label = "foxnews" if score > 0.5 else "nbc"
+            results.append({
+                "label": label,
+                "score": score if label == "foxnews" else 1 - score
+            })
+        # Return single result if input was single string
+        return results[0] if isinstance(input_data.text, str) else results
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    return {"message": "News Classifier API is running"}

config.json DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:94fe098b058680d6431f9d8e034176ce15684464230c1d7194f98c092ed78cdb
-size 345

pipeline.py DELETED Viewed

@@ -1,63 +0,0 @@
-from transformers import PreTrainedModel, PretrainedConfig
-from tensorflow.keras.models import load_model
-from tensorflow.keras.preprocessing.text import tokenizer_from_json
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-import numpy as np
-import json
-class NewsClassifierConfig(PretrainedConfig):
-    model_type = "text_classifier"
-    def __init__(
-        self,
-        max_length=41,  # Modified to match model input shape
-        vocab_size=74934,  # Modified based on embedding layer size
-        embedding_dim=128,  # Added to match model architecture
-        hidden_size=64,  # Matches final LSTM layer
-        num_labels=2,
-        **kwargs
-    ):
-        self.max_length = max_length
-        self.vocab_size = vocab_size
-        self.embedding_dim = embedding_dim
-        self.hidden_size = hidden_size
-        self.num_labels = num_labels
-        super().__init__(**kwargs)
-class NewsClassifier(PreTrainedModel):
-    config_class = NewsClassifierConfig
-    base_model_prefix = "text_classifier"
-    def __init__(self, config):
-        super().__init__(config)
-        self.model = None
-        self.tokenizer = None
-    def post_init(self):
-        """Load model and tokenizer after initialization"""
-        self.model = load_model('news_classifier.h5')
-        with open('tokenizer.json', 'r') as f:
-            tokenizer_data = json.load(f)
-            self.tokenizer = tokenizer_from_json(tokenizer_data)
-    def forward(self, text_input):
-        if not self.model or not self.tokenizer:
-            self.post_init()
-        if isinstance(text_input, str):
-            text_input = [text_input]
-        sequences = self.tokenizer.texts_to_sequences(text_input)
-        padded = pad_sequences(sequences, maxlen=self.config.max_length)
-        predictions = self.model.predict(padded, verbose=0)
-        results = []
-        for pred in predictions:
-            score = float(pred[1])
-            label = "foxnews" if score > 0.5 else "nbc"
-            results.append({
-                "label": label,
-                "score": score if label == "foxnews" else 1 - score
-            })
-        return results[0] if len(text_input) == 1 else results

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 tensorflow>=2.10.0
-transformers>=4.46.3
-numpy>=1.19.2
-scikit-learn>=0.24.2
 fastapi>=0.68.0
 uvicorn>=0.15.0
-pydantic>=1.8.2

 tensorflow>=2.10.0
 fastapi>=0.68.0
 uvicorn>=0.15.0
+pydantic>=1.8.2
+numpy>=1.19.2
+scikit-learn>=0.24.2
+python-multipart