{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import os\n",
"import random\n",
"import string"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the model: resume_learner.pth\n",
"\n",
"This model was trained in 8/22/2022 and this is the full Learner\n",
"\n",
"If you look in the models folders, those are not the full Learners saved\n",
"\n",
"Those are a dict with keys:\n",
"\n",
" 'model': the model (weights and biases) that can be loaded with load_state_dict()\n",
" \n",
" 'opt': the optimizer"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\ethan\\AppData\\Local\\Temp\\ipykernel_9460\\1803318925.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
" model = torch.load(\"resume_learner.pth\")\n"
]
}
],
"source": [
"model = torch.load(\"resume_learner.pth\")\n",
"\n",
"# Clear lines because there's a lot of text displayed when loading model\n",
"#os.system(\"cls\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is the function to process the prediction"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def process_info(predictions):\n",
" preds,_,probs = predictions\n",
" if preds == '2.0':\n",
" print(\"Is this a resume? YES\")\n",
" print(f\"Probability: {probs[1]}\")\n",
" else:\n",
" print(\"Is this a resume? NO\")\n",
" print(f\"Probability: {probs[0]}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is a function to generate a random paragraph with random letters as words"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def paragraph():\n",
" char_list = list(string.ascii_lowercase)\n",
" \n",
" paragraph_length = random.randint(5,10)\n",
" paragraph = \"\"\n",
" for _ in range(paragraph_length):\n",
" sentence_length = random.randint(2,15)\n",
" sentence = \"\"\n",
" for _ in range(sentence_length): \n",
" word_length = random.randint(1,10)\n",
" word = \"\"\n",
" for _ in range(word_length):\n",
" choose_char = random.randint(0,25)\n",
" word += char_list[choose_char]\n",
" sentence += \" \" + word\n",
" paragraph += sentence + \".\"\n",
" return paragraph"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is the function wrapper that takes in a string and will print out if that string is a resume or not"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def test_model(string):\n",
" output = model.predict(string)\n",
" process_info(output)\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below are some tests I made for some different generated texts"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Tests the random generated paragraphs\n",
"def test_rng():\n",
" print(\"Random Generated Paragraphs Test:\\n\")\n",
" for i in range(10):\n",
" print(f\"Paragraph {i}:\")\n",
" p = paragraph()\n",
" print(p)\n",
"\n",
" test_model(p)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Tests resumes that were generated by chatgpt\n",
"def test_chatgpt():\n",
" print(\"ChatGPT generated resumes in test_resumes folder:\\n\")\n",
" for i in range(1,10):\n",
" fname = \"test_resumes/test_resume\" + str(i) + \".txt\"\n",
" print(fname + \":\")\n",
" file = open(fname)\n",
" text = file.read()\n",
"\n",
" test_model(text)\n",
"\n",
" file.close()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Tests weird cases where it thinks a few words is a resume\n",
"def test_weird():\n",
" print(\"Some weird cases:\\n\")\n",
"\n",
" test = \"hi there lol ok\"\n",
" print(test + \":\")\n",
" test_model(test)\n",
"\n",
" test = \"adfp f d\"\n",
" print(test + \":\")\n",
" test_model(test)\n",
"\n",
" test = \"lolcode argh\"\n",
" print(test + \":\")\n",
" test_model(test)\n",
"\n",
" test = \"testing 123\"\n",
" print(test + \":\")\n",
" test_model(test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we run the tests"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Some weird cases:\n",
"\n",
"hi there lol ok:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is this a resume? YES\n",
"Probability: 0.7741007804870605\n",
"\n",
"adfp f d:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is this a resume? NO\n",
"Probability: 0.6239883303642273\n",
"\n",
"lolcode argh:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is this a resume? YES\n",
"Probability: 0.8041706681251526\n",
"\n",
"testing 123:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is this a resume? YES\n",
"Probability: 0.7747318744659424\n",
"\n"
]
}
],
"source": [
"#test_rng()\n",
"#test_chatgpt()\n",
"test_weird()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Can do other tests down here"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Testing test.txt\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is this a resume? NO\n",
"Probability: 0.9999693632125854\n",
"\n"
]
}
],
"source": [
"print(\"Testing test.txt\")\n",
"f = open('test.txt')\n",
"text = f.read()\n",
"test_model(text)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}