kuladeepmantri commited on
Commit
43d5a53
1 Parent(s): 89f5504

Upload 3 files

Browse files
Files changed (3) hide show
  1. buildmodel.py +47 -0
  2. findtool_model.pkl +3 -0
  3. vectorizer.pkl +3 -0
buildmodel.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.naive_bayes import MultinomialNB
5
+ from sklearn.metrics import classification_report
6
+ import joblib
7
+
8
+ # Load the dataset from the txt file
9
+ data_path = 'trainingdata.txt'
10
+ data = []
11
+
12
+ # Read the file and parse the data
13
+ with open(data_path, 'r') as file:
14
+ lines = file.readlines()
15
+ for line in lines:
16
+ # Split each line into question and tool by the last comma
17
+ parts = line.rsplit(', "', 1)
18
+ if len(parts) == 2:
19
+ question = parts[0].strip().strip('"')
20
+ tool = parts[1].strip().strip('",')
21
+ data.append((question, tool))
22
+
23
+ # Create a DataFrame
24
+ df = pd.DataFrame(data, columns=['question', 'tool'])
25
+
26
+ # Split the data
27
+ X_train, X_test, y_train, y_test = train_test_split(df['question'], df['tool'], test_size=0.2, random_state=42)
28
+
29
+ # Vectorize the text data
30
+ vectorizer = TfidfVectorizer()
31
+ X_train_vectorized = vectorizer.fit_transform(X_train)
32
+ X_test_vectorized = vectorizer.transform(X_test)
33
+
34
+ # Train a Naive Bayes classifier
35
+ clf = MultinomialNB()
36
+ clf.fit(X_train_vectorized, y_train)
37
+
38
+ # Make predictions
39
+ y_pred = clf.predict(X_test_vectorized)
40
+
41
+ # Print the classification report
42
+ print(classification_report(y_test, y_pred))
43
+
44
+ # Save the model and vectorizer
45
+ joblib.dump(clf, 'findtool_model.pkl')
46
+ joblib.dump(vectorizer, 'vectorizer.pkl')
47
+
findtool_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b7d40072e1758d51ba174901744926f38baf672342258d72b1742362436828
3
+ size 98439
vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d1c3a1559575978d012f36ac2144bd2ecd6dd8616c6f6b4e2d5a2e0fbc4b216
3
+ size 22618