Upload 3 files

Files changed (3) hide show

buildmodel.py ADDED Viewed

+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.metrics import classification_report
+import joblib
+# Load the dataset from the txt file
+data_path = 'trainingdata.txt'
+data = []
+# Read the file and parse the data
+with open(data_path, 'r') as file:
+    lines = file.readlines()
+    for line in lines:
+        # Split each line into question and tool by the last comma
+        parts = line.rsplit(', "', 1)
+        if len(parts) == 2:
+            question = parts[0].strip().strip('"')
+            tool = parts[1].strip().strip('",')
+            data.append((question, tool))
+# Create a DataFrame
+df = pd.DataFrame(data, columns=['question', 'tool'])
+# Split the data
+X_train, X_test, y_train, y_test = train_test_split(df['question'], df['tool'], test_size=0.2, random_state=42)
+# Vectorize the text data
+vectorizer = TfidfVectorizer()
+X_train_vectorized = vectorizer.fit_transform(X_train)
+X_test_vectorized = vectorizer.transform(X_test)
+# Train a Naive Bayes classifier
+clf = MultinomialNB()
+clf.fit(X_train_vectorized, y_train)
+# Make predictions
+y_pred = clf.predict(X_test_vectorized)
+# Print the classification report
+print(classification_report(y_test, y_pred))
+# Save the model and vectorizer
+joblib.dump(clf, 'findtool_model.pkl')
+joblib.dump(vectorizer, 'vectorizer.pkl')

findtool_model.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:22b7d40072e1758d51ba174901744926f38baf672342258d72b1742362436828
+size 98439

vectorizer.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1c3a1559575978d012f36ac2144bd2ecd6dd8616c6f6b4e2d5a2e0fbc4b216
+size 22618