Jiahuita commited on
Commit
cec65b9
·
1 Parent(s): 1833f3a

change pipeline to app

Browse files
Files changed (4) hide show
  1. app.py +56 -0
  2. config.json +0 -3
  3. pipeline.py +0 -63
  4. requirements.txt +4 -4
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from tensorflow.keras.models import load_model
5
+ from tensorflow.keras.preprocessing.text import tokenizer_from_json
6
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
7
+ import json
8
+ from typing import Union, List
9
+
10
+ app = FastAPI()
11
+
12
+ # Load model and tokenizer
13
+ model = load_model('news_classifier.h5')
14
+ with open('tokenizer.json', 'r') as f:
15
+ tokenizer_data = json.load(f)
16
+ tokenizer = tokenizer_from_json(tokenizer_data)
17
+
18
+ class PredictionInput(BaseModel):
19
+ text: Union[str, List[str]]
20
+
21
+ class PredictionOutput(BaseModel):
22
+ label: str
23
+ score: float
24
+
25
+ @app.post("/predict")
26
+ async def predict(input_data: PredictionInput):
27
+ try:
28
+ # Convert input to list if it's a single string
29
+ texts = input_data.text if isinstance(input_data.text, list) else [input_data.text]
30
+
31
+ # Preprocess
32
+ sequences = tokenizer.texts_to_sequences(texts)
33
+ padded = pad_sequences(sequences, maxlen=41) # Use your model's expected input length
34
+
35
+ # Predict
36
+ predictions = model.predict(padded)
37
+
38
+ # Format results
39
+ results = []
40
+ for pred in predictions:
41
+ score = float(pred[1]) # Assuming binary classification
42
+ label = "foxnews" if score > 0.5 else "nbc"
43
+ results.append({
44
+ "label": label,
45
+ "score": score if label == "foxnews" else 1 - score
46
+ })
47
+
48
+ # Return single result if input was single string
49
+ return results[0] if isinstance(input_data.text, str) else results
50
+
51
+ except Exception as e:
52
+ raise HTTPException(status_code=500, detail=str(e))
53
+
54
+ @app.get("/")
55
+ async def root():
56
+ return {"message": "News Classifier API is running"}
config.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:94fe098b058680d6431f9d8e034176ce15684464230c1d7194f98c092ed78cdb
3
- size 345
 
 
 
 
pipeline.py DELETED
@@ -1,63 +0,0 @@
1
- from transformers import PreTrainedModel, PretrainedConfig
2
- from tensorflow.keras.models import load_model
3
- from tensorflow.keras.preprocessing.text import tokenizer_from_json
4
- from tensorflow.keras.preprocessing.sequence import pad_sequences
5
- import numpy as np
6
- import json
7
-
8
- class NewsClassifierConfig(PretrainedConfig):
9
- model_type = "text_classifier"
10
-
11
- def __init__(
12
- self,
13
- max_length=41, # Modified to match model input shape
14
- vocab_size=74934, # Modified based on embedding layer size
15
- embedding_dim=128, # Added to match model architecture
16
- hidden_size=64, # Matches final LSTM layer
17
- num_labels=2,
18
- **kwargs
19
- ):
20
- self.max_length = max_length
21
- self.vocab_size = vocab_size
22
- self.embedding_dim = embedding_dim
23
- self.hidden_size = hidden_size
24
- self.num_labels = num_labels
25
- super().__init__(**kwargs)
26
-
27
- class NewsClassifier(PreTrainedModel):
28
- config_class = NewsClassifierConfig
29
- base_model_prefix = "text_classifier"
30
-
31
- def __init__(self, config):
32
- super().__init__(config)
33
- self.model = None
34
- self.tokenizer = None
35
-
36
- def post_init(self):
37
- """Load model and tokenizer after initialization"""
38
- self.model = load_model('news_classifier.h5')
39
- with open('tokenizer.json', 'r') as f:
40
- tokenizer_data = json.load(f)
41
- self.tokenizer = tokenizer_from_json(tokenizer_data)
42
-
43
- def forward(self, text_input):
44
- if not self.model or not self.tokenizer:
45
- self.post_init()
46
-
47
- if isinstance(text_input, str):
48
- text_input = [text_input]
49
-
50
- sequences = self.tokenizer.texts_to_sequences(text_input)
51
- padded = pad_sequences(sequences, maxlen=self.config.max_length)
52
- predictions = self.model.predict(padded, verbose=0)
53
-
54
- results = []
55
- for pred in predictions:
56
- score = float(pred[1])
57
- label = "foxnews" if score > 0.5 else "nbc"
58
- results.append({
59
- "label": label,
60
- "score": score if label == "foxnews" else 1 - score
61
- })
62
-
63
- return results[0] if len(text_input) == 1 else results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  tensorflow>=2.10.0
2
- transformers>=4.46.3
3
- numpy>=1.19.2
4
- scikit-learn>=0.24.2
5
  fastapi>=0.68.0
6
  uvicorn>=0.15.0
7
- pydantic>=1.8.2
 
 
 
 
1
  tensorflow>=2.10.0
 
 
 
2
  fastapi>=0.68.0
3
  uvicorn>=0.15.0
4
+ pydantic>=1.8.2
5
+ numpy>=1.19.2
6
+ scikit-learn>=0.24.2
7
+ python-multipart