diff --git "a/final_code.ipynb" "b/final_code.ipynb" new file mode 100644--- /dev/null +++ "b/final_code.ipynb" @@ -0,0 +1,5207 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm import tqdm\n", + "from wordcloud import WordCloud\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from nltk import word_tokenize\n", + "from nltk.corpus import stopwords\n", + "from keybert import KeyBERT\n", + "from keyphrase_vectorizers import KeyphraseCountVectorizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import Data & EDA" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Univ_ID | \n", + "Course Name | \n", + "Details | \n", + "
---|---|---|---|
0 | \n", + "1 | \n", + "Approaches to Knowledge: Introduction to Inter... | \n", + "This module provides an introduction to inter... | \n", + "
1 | \n", + "1 | \n", + "Information Through the Ages | \n", + "This module explores the concept of informati... | \n", + "
2 | \n", + "1 | \n", + "Computers and Humans | \n", + "Should we trust news in a Deepfake world? Sho... | \n", + "
3 | \n", + "1 | \n", + "Migration and Health | \n", + "In this course, you will analyse the interpla... | \n", + "
4 | \n", + "1 | \n", + "Race, Gender and Feminism | \n", + "This module offers an introduction and analyt... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1143 | \n", + "76 | \n", + "INTRODUCTION TO CLIMATE DIPLOMACY | \n", + "This class is aimed at students willing to un... | \n", + "
1144 | \n", + "76 | \n", + "GOVERNING BORDERS IN THE 21ST CENTURY | \n", + "This course offers an introduction to the stu... | \n", + "
1145 | \n", + "76 | \n", + "AGRICULTURE, FOOD AND GLOBALIZATION | \n", + "With the war in Ukraine, the Covid crisis, so... | \n", + "
1146 | \n", + "76 | \n", + "BEHAVIORAL GAME THEORY | \n", + "Game theory is a formal language to describe ... | \n", + "
1147 | \n", + "76 | \n", + "COMPARATIVE POLITICS | \n", + "What is Comparative Politics? How do we analy... | \n", + "
1148 rows × 3 columns
\n", + "\n", + " | Univ_ID | \n", + "Details | \n", + "
---|---|---|
0 | \n", + "1 | \n", + "approaches to knowledge: introduction to inter... | \n", + "
1 | \n", + "1 | \n", + "information through the ages. this module expl... | \n", + "
2 | \n", + "1 | \n", + "computers and humans. should we trust news in ... | \n", + "
3 | \n", + "1 | \n", + "migration and health. in this course, you will... | \n", + "
4 | \n", + "1 | \n", + "race, gender and feminism. this module offers ... | \n", + "
... | \n", + "... | \n", + "... | \n", + "
1143 | \n", + "76 | \n", + "introduction to climate diplomacy. this class ... | \n", + "
1144 | \n", + "76 | \n", + "governing borders in the 21st century. this co... | \n", + "
1145 | \n", + "76 | \n", + "agriculture, food and globalization. with the ... | \n", + "
1146 | \n", + "76 | \n", + "behavioral game theory. game theory is a forma... | \n", + "
1147 | \n", + "76 | \n", + "comparative politics. what is comparative poli... | \n", + "
1148 rows × 2 columns
\n", + "\n", + " | Univ_ID | \n", + "Details | \n", + "
---|---|---|
0 | \n", + "1 | \n", + "approaches to knowledge: introduction to inter... | \n", + "
1 | \n", + "1 | \n", + "creative writing. weekly lecture and workshop-... | \n", + "
2 | \n", + "1 | \n", + "quantitative methods 2: data science and visua... | \n", + "
3 | \n", + "1 | \n", + "information through the ages. this module expl... | \n", + "
4 | \n", + "1 | \n", + "introduction into politics: key concepts and t... | \n", + "
... | \n", + "... | \n", + "... | \n", + "
375 | \n", + "76 | \n", + "governing borders in the 21st century. this co... | \n", + "
376 | \n", + "76 | \n", + "women in movement: history of feminisms from t... | \n", + "
377 | \n", + "76 | \n", + "comparative politics. what is comparative poli... | \n", + "
378 | \n", + "76 | \n", + "trade and international finance. the objective... | \n", + "
379 | \n", + "76 | \n", + "introduction to climate diplomacy. this class ... | \n", + "
380 rows × 2 columns
\n", + "\n", + " | Univ_ID | \n", + "Details | \n", + "Label | \n", + "ID | \n", + "
---|---|---|---|---|
0 | \n", + "1 | \n", + "approaches to knowledge: introduction to inter... | \n", + "interdisciplinarity, knowledge, traditional bo... | \n", + "0 | \n", + "
1 | \n", + "1 | \n", + "creative writing. weekly lecture and workshop-... | \n", + "creative writing, prose fiction, poetry, creat... | \n", + "17 | \n", + "
2 | \n", + "1 | \n", + "quantitative methods 2: data science and visua... | \n", + "quantitative methods, data science, visualisat... | \n", + "15 | \n", + "
3 | \n", + "1 | \n", + "information through the ages. this module expl... | \n", + "information, data, knowledge, historical persp... | \n", + "1 | \n", + "
4 | \n", + "1 | \n", + "introduction into politics: key concepts and t... | \n", + "politics, key concepts, texts, political commu... | \n", + "8 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
375 | \n", + "76 | \n", + "governing borders in the 21st century. this co... | \n", + "governance, migration, borders, security, 21st... | \n", + "1144 | \n", + "
376 | \n", + "76 | \n", + "women in movement: history of feminisms from t... | \n", + "women, movement, feminisms, history, activism,... | \n", + "1142 | \n", + "
377 | \n", + "76 | \n", + "comparative politics. what is comparative poli... | \n", + "comparative politics, analysis, political inst... | \n", + "1147 | \n", + "
378 | \n", + "76 | \n", + "trade and international finance. the objective... | \n", + "international finance, international trade, gl... | \n", + "1138 | \n", + "
379 | \n", + "76 | \n", + "introduction to climate diplomacy. this class ... | \n", + "climate diplomacy, international politics, fig... | \n", + "1143 | \n", + "
380 rows × 4 columns
\n", + "\n", + " | Univ_ID | \n", + "Course Name | \n", + "Details | \n", + "
---|---|---|---|
0 | \n", + "1 | \n", + "Approaches to Knowledge: Introduction to Inter... | \n", + "This module provides an introduction to inter... | \n", + "
1 | \n", + "1 | \n", + "Information Through the Ages | \n", + "This module explores the concept of informati... | \n", + "
2 | \n", + "1 | \n", + "Computers and Humans | \n", + "Should we trust news in a Deepfake world? Sho... | \n", + "
3 | \n", + "1 | \n", + "Migration and Health | \n", + "In this course, you will analyse the interpla... | \n", + "
4 | \n", + "1 | \n", + "Race, Gender and Feminism | \n", + "This module offers an introduction and analyt... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1143 | \n", + "76 | \n", + "INTRODUCTION TO CLIMATE DIPLOMACY | \n", + "This class is aimed at students willing to un... | \n", + "
1144 | \n", + "76 | \n", + "GOVERNING BORDERS IN THE 21ST CENTURY | \n", + "This course offers an introduction to the stu... | \n", + "
1145 | \n", + "76 | \n", + "AGRICULTURE, FOOD AND GLOBALIZATION | \n", + "With the war in Ukraine, the Covid crisis, so... | \n", + "
1146 | \n", + "76 | \n", + "BEHAVIORAL GAME THEORY | \n", + "Game theory is a formal language to describe ... | \n", + "
1147 | \n", + "76 | \n", + "COMPARATIVE POLITICS | \n", + "What is Comparative Politics? How do we analy... | \n", + "
1148 rows × 3 columns
\n", + "\n", + " | ID | \n", + "Univ_ID | \n", + "Course Name | \n", + "Details | \n", + "Processed | \n", + "KeyBERT Base | \n", + "KeyBERT + Rule | \n", + "KeyBERT + Vectorizer | \n", + "
---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "1 | \n", + "Approaches to Knowledge: Introduction to Inter... | \n", + "This module provides an introduction to inter... | \n", + "approaches to knowledge: introduction to inter... | \n", + "knowledge introduction interdisciplinarity, in... | \n", + "knowledge introduction interdisciplinarity, in... | \n", + "interdisciplinarity, interdisciplinary work, k... | \n", + "
1 | \n", + "1 | \n", + "1 | \n", + "Information Through the Ages | \n", + "This module explores the concept of informati... | \n", + "information through the ages. this module expl... | \n", + "knowledge taking historical, information, info... | \n", + "information, libraries, museums, knowledge, ar... | \n", + "information, museums, different historical for... | \n", + "
2 | \n", + "2 | \n", + "1 | \n", + "Computers and Humans | \n", + "Should we trust news in a Deepfake world? Sho... | \n", + "computers and humans. should we trust news in ... | \n", + "computers humans, concepts human computer, com... | \n", + "ai, computing, computers, wearable, visualisat... | \n", + "ai, human computer interaction, wearable techn... | \n", + "
3 | \n", + "3 | \n", + "1 | \n", + "Migration and Health | \n", + "In this course, you will analyse the interpla... | \n", + "migration and health. in this course, you will... | \n", + "migration health course, migration health, mig... | \n", + "migration health course, migration health, mig... | \n", + "health, migrant, migration, host society, succ... | \n", + "
4 | \n", + "4 | \n", + "1 | \n", + "Race, Gender and Feminism | \n", + "This module offers an introduction and analyt... | \n", + "race, gender and feminism. this module offers ... | \n", + "race gender feminism, concepts gender race, ge... | \n", + "race gender feminism, intersectionality, femin... | \n", + "intersectionality, racial difference, persiste... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
1143 | \n", + "1143 | \n", + "76 | \n", + "INTRODUCTION TO CLIMATE DIPLOMACY | \n", + "This class is aimed at students willing to un... | \n", + "introduction to climate diplomacy. this class ... | \n", + "introduction climate diplomacy, climate diplom... | \n", + "introduction climate diplomacy, climate diplom... | \n", + "climate diplomacy, future climate diplomacy, m... | \n", + "
1144 | \n", + "1144 | \n", + "76 | \n", + "GOVERNING BORDERS IN THE 21ST CENTURY | \n", + "This course offers an introduction to the stu... | \n", + "governing borders in the 21st century. this co... | \n", + "governance migration borders, migration border... | \n", + "governance migration borders, borders, mobilit... | \n", + "borders, mobilities, governance, migration con... | \n", + "
1145 | \n", + "1145 | \n", + "76 | \n", + "AGRICULTURE, FOOD AND GLOBALIZATION | \n", + "With the war in Ukraine, the Covid crisis, so... | \n", + "agriculture, food and globalization. with the ... | \n", + "agriculture food globalization, challenges int... | \n", + "agriculture food globalization, agriculture, a... | \n", + "international food markets, food sovereignty, ... | \n", + "
1146 | \n", + "1146 | \n", + "76 | \n", + "BEHAVIORAL GAME THEORY | \n", + "Game theory is a formal language to describe ... | \n", + "behavioral game theory. game theory is a forma... | \n", + "behavioral game theory, behavioral game, game ... | \n", + "behavioral game theory, economics, economic, b... | \n", + "behavioral game theory, economics, political i... | \n", + "
1147 | \n", + "1147 | \n", + "76 | \n", + "COMPARATIVE POLITICS | \n", + "What is Comparative Politics? How do we analy... | \n", + "comparative politics. what is comparative poli... | \n", + "comparative politics analyse, study comparativ... | \n", + "comparative politics analyse, study comparativ... | \n", + "comparative politics, political systems, insti... | \n", + "
1148 rows × 8 columns
\n", + "\n", + " | KeyBERT Base | \n", + "KeyBERT + Rule | \n", + "KeyBERT + Vectorizer | \n", + "Label | \n", + "
---|---|---|---|---|
0 | \n", + "knowledge introduction interdisciplinarity, in... | \n", + "knowledge introduction interdisciplinarity, in... | \n", + "interdisciplinarity, interdisciplinary work, k... | \n", + "interdisciplinarity, knowledge, traditional bo... | \n", + "
1 | \n", + "knowledge taking historical, information, info... | \n", + "information, libraries, museums, knowledge, ar... | \n", + "information, museums, different historical for... | \n", + "information, data, knowledge, historical persp... | \n", + "
2 | \n", + "politics key concepts, modules introduction po... | \n", + "politics, political, machiavelli, aristotle, r... | \n", + "political thinkers, foucault, core texts, intr... | \n", + "politics, key concepts, texts, political commu... | \n", + "
3 | \n", + "quantitative methods data, data science visual... | \n", + "quantitative, data, visualisation, modelling, ... | \n", + "quantitative methods course, data science, vis... | \n", + "quantitative methods, data science, visualisat... | \n", + "
4 | \n", + "student creative writing, creative writing wor... | \n", + "student creative writing, writing, prose, poet... | \n", + "creative writing work, contemporary prose fict... | \n", + "creative writing, prose fiction, poetry, creat... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
375 | \n", + "globalization international trade, trade inter... | \n", + "globalization, macroeconomics, macroeconomic, ... | \n", + "international trade, many international financ... | \n", + "international finance, international trade, gl... | \n", + "
376 | \n", + "movement history feminisms, history feminisms,... | \n", + "movement history feminisms, history feminisms,... | \n", + "world feminisms, international socialist movem... | \n", + "women, movement, feminisms, history, activism,... | \n", + "
377 | \n", + "introduction climate diplomacy, climate diplom... | \n", + "introduction climate diplomacy, climate diplom... | \n", + "climate diplomacy, future climate diplomacy, m... | \n", + "climate diplomacy, international politics, fig... | \n", + "
378 | \n", + "governance migration borders, migration border... | \n", + "governance migration borders, borders, mobilit... | \n", + "borders, mobilities, governance, migration con... | \n", + "governance, migration, borders, security, 21st... | \n", + "
379 | \n", + "comparative politics analyse, study comparativ... | \n", + "comparative politics analyse, study comparativ... | \n", + "comparative politics, political systems, insti... | \n", + "comparative politics, analysis, political inst... | \n", + "
380 rows × 4 columns
\n", + "\n", + " | KeyBERT Base | \n", + "KeyBERT + Rule | \n", + "KeyBERT + Vectorizer | \n", + "Label | \n", + "
---|---|---|---|---|
0 | \n", + "[knowledge introduction interdisciplinarity, i... | \n", + "[knowledge introduction interdisciplinarity, i... | \n", + "[interdisciplinarity, interdisciplinary work, ... | \n", + "[interdisciplinarity, knowledge, traditional b... | \n", + "
1 | \n", + "[knowledge taking historical, information, inf... | \n", + "[information, libraries, museums, knowledge, a... | \n", + "[information, museums, different historical fo... | \n", + "[information, data, knowledge, historical pers... | \n", + "
2 | \n", + "[politics key concepts, modules introduction p... | \n", + "[politics, political, machiavelli, aristotle, ... | \n", + "[political thinkers, foucault, core texts, int... | \n", + "[politics, key concepts, texts, political comm... | \n", + "
3 | \n", + "[quantitative methods data, data science visua... | \n", + "[quantitative, data, visualisation, modelling,... | \n", + "[quantitative methods course, data science, vi... | \n", + "[quantitative methods, data science, visualisa... | \n", + "
4 | \n", + "[student creative writing, creative writing wo... | \n", + "[student creative writing, writing, prose, poe... | \n", + "[creative writing work, contemporary prose fic... | \n", + "[creative writing, prose fiction, poetry, crea... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
375 | \n", + "[globalization international trade, trade inte... | \n", + "[globalization, macroeconomics, macroeconomic,... | \n", + "[international trade, many international finan... | \n", + "[international finance, international trade, g... | \n", + "
376 | \n", + "[movement history feminisms, history feminisms... | \n", + "[movement history feminisms, history feminisms... | \n", + "[world feminisms, international socialist move... | \n", + "[women, movement, feminisms, history, activism... | \n", + "
377 | \n", + "[introduction climate diplomacy, climate diplo... | \n", + "[introduction climate diplomacy, climate diplo... | \n", + "[climate diplomacy, future climate diplomacy, ... | \n", + "[climate diplomacy, international politics, fi... | \n", + "
378 | \n", + "[governance migration borders, migration borde... | \n", + "[governance migration borders, borders, mobili... | \n", + "[borders, mobilities, governance, migration co... | \n", + "[governance, migration, borders, security, 21s... | \n", + "
379 | \n", + "[comparative politics analyse, study comparati... | \n", + "[comparative politics analyse, study comparati... | \n", + "[comparative politics, political systems, inst... | \n", + "[comparative politics, analysis, political ins... | \n", + "
380 rows × 4 columns
\n", + "