Update README.md
Browse files
README.md
CHANGED
@@ -64,7 +64,7 @@ def predict_sentiments(model_name, tokenizer_name, input_file):
|
|
64 |
|
65 |
df = pd.read_csv(input_file)
|
66 |
|
67 |
-
# Tokenize
|
68 |
test_inputs = tokenizer(list(df['text']), padding=True, truncation=True, max_length=128, return_tensors='pt')
|
69 |
|
70 |
# Make predictions
|
@@ -91,7 +91,7 @@ tokenizer_name = "RinInori/bert-base-uncased_finetune_sentiments"
|
|
91 |
#Predict Unlabeled data
|
92 |
predict_sentiments(model_name, tokenizer_name, '/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America.csv')
|
93 |
|
94 |
-
# Load
|
95 |
df_Am = pd.read_csv('/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America_predicted.csv')
|
96 |
df_Am.head()
|
97 |
|
@@ -106,13 +106,13 @@ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, do_lower_case=True)
|
|
106 |
input_file = '/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America_predicted.csv'
|
107 |
df_Am = pd.read_csv(input_file)
|
108 |
|
109 |
-
# Examine
|
110 |
sentences = df_Am.text.values
|
111 |
print("Distribution of data based on labels: ", df_Am.label.value_counts())
|
112 |
|
113 |
MAX_LEN = 512
|
114 |
|
115 |
-
# Plot
|
116 |
label_count = df_Am['label'].value_counts()
|
117 |
plot_users = label_count.plot.pie(autopct='%1.1f%%', figsize=(4, 4))
|
118 |
plt.rc('axes', unicode_minus=False)
|
|
|
64 |
|
65 |
df = pd.read_csv(input_file)
|
66 |
|
67 |
+
# Tokenize input text
|
68 |
test_inputs = tokenizer(list(df['text']), padding=True, truncation=True, max_length=128, return_tensors='pt')
|
69 |
|
70 |
# Make predictions
|
|
|
91 |
#Predict Unlabeled data
|
92 |
predict_sentiments(model_name, tokenizer_name, '/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America.csv')
|
93 |
|
94 |
+
# Load predicted data
|
95 |
df_Am = pd.read_csv('/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America_predicted.csv')
|
96 |
df_Am.head()
|
97 |
|
|
|
106 |
input_file = '/content/drive/MyDrive/DLBBT01/data/c_unlabeled/dc_America_predicted.csv'
|
107 |
df_Am = pd.read_csv(input_file)
|
108 |
|
109 |
+
# Examine distribution of data based on labels
|
110 |
sentences = df_Am.text.values
|
111 |
print("Distribution of data based on labels: ", df_Am.label.value_counts())
|
112 |
|
113 |
MAX_LEN = 512
|
114 |
|
115 |
+
# Plot label
|
116 |
label_count = df_Am['label'].value_counts()
|
117 |
plot_users = label_count.plot.pie(autopct='%1.1f%%', figsize=(4, 4))
|
118 |
plt.rc('axes', unicode_minus=False)
|