File size: 10,112 Bytes
72a6c46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np \n",
    "import re\n",
    "\n",
    "from time import sleep\n",
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.by import By\n",
    "from selenium.common.exceptions import NoSuchElementException\n",
    "from selenium.webdriver.common.keys import Keys\n",
    "from selenium.webdriver.support.select import Select\n",
    "from selenium.webdriver.common.action_chains import ActionChains"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Initialize Browser (Chrome)\n",
    "options = webdriver.ChromeOptions()\n",
    "options.add_experimental_option('excludeSwitches', ['enable-logging'])\n",
    "driver = webdriver.Chrome(options=options)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Collect Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 https://iisma.kemdikbud.go.id/info/02-university-college-london/\n",
      "1 https://iisma.kemdikbud.go.id/info/03-university-of-chicago/\n",
      "2 https://iisma.kemdikbud.go.id/info/04-nanyang-technological-university/\n",
      "3 https://iisma.kemdikbud.go.id/info/05-the-university-of-pennsylvania-college-of-liberal-and-professional-studies/\n",
      "4 https://iisma.kemdikbud.go.id/info/06-yale-university/\n",
      "5 https://iisma.kemdikbud.go.id/info/07-university-of-edinburgh/\n",
      "6 https://iisma.kemdikbud.go.id/info/09-the-australian-national-university/\n",
      "7 https://iisma.kemdikbud.go.id/info/10-university-of-melbourne/\n",
      "8 https://iisma.kemdikbud.go.id/info/11-university-of-sydney/\n",
      "9 https://iisma.kemdikbud.go.id/info/12-university-of-new-south-wales/\n",
      "10 https://iisma.kemdikbud.go.id/info/13-university-of-british-columbia/\n",
      "11 https://iisma.kemdikbud.go.id/info/14-the-university-of-queensland/\n",
      "12 https://iisma.kemdikbud.go.id/info/15-monash-university/\n",
      "13 https://iisma.kemdikbud.go.id/info/16-university-of-warwick/\n",
      "14 https://iisma.kemdikbud.go.id/info/17-universiti-malaya/\n",
      "15 https://iisma.kemdikbud.go.id/info/18-national-taiwan-university/\n",
      "16 https://iisma.kemdikbud.go.id/info/18-osaka-university/\n",
      "17 https://iisma.kemdikbud.go.id/info/20-ku-leuven/\n",
      "18 https://iisma.kemdikbud.go.id/info/21-university-of-texas-at-austin/\n",
      "19 https://iisma.kemdikbud.go.id/info/22-university-of-glasgow/\n",
      "20 https://iisma.kemdikbud.go.id/info/23-korea-university/\n",
      "21 https://iisma.kemdikbud.go.id/info/24-m-v-lomonosov-moscow-state-university/\n",
      "22 https://iisma.kemdikbud.go.id/info/25-university-of-auckland/\n",
      "23 https://iisma.kemdikbud.go.id/info/26-university-of-leeds/\n",
      "24 https://iisma.kemdikbud.go.id/info/27-the-university-of-western-australia/\n",
      "25 https://iisma.kemdikbud.go.id/info/28-university-of-birmingham/\n",
      "26 https://iisma.kemdikbud.go.id/info/29-penn-state-university/\n",
      "27 https://iisma.kemdikbud.go.id/info/30-university-of-california-davis/\n",
      "28 https://iisma.kemdikbud.go.id/info/31-boston-university-metropolitan-college/\n",
      "29 https://iisma.kemdikbud.go.id/info/32-the-university-of-adelaide/\n",
      "30 https://iisma.kemdikbud.go.id/info/33-university-college-cork/\n",
      "31 https://iisma.kemdikbud.go.id/info/34-queen-mary-university-of-london/\n",
      "32 https://iisma.kemdikbud.go.id/info/35-uc-chile/\n",
      "33 https://iisma.kemdikbud.go.id/info/36-newcastle-university/\n",
      "34 https://iisma.kemdikbud.go.id/info/37-humboldt-universitat-zu-berlin/\n",
      "35 https://iisma.kemdikbud.go.id/info/38-universiti-kebangsaan-malaysia/\n",
      "36 https://iisma.kemdikbud.go.id/info/39-lancaster-university/\n",
      "37 https://iisma.kemdikbud.go.id/info/40-universiti-sains-malaysia/\n",
      "38 https://iisma.kemdikbud.go.id/info/41-grenoble-ecole-de-management/\n",
      "39 https://iisma.kemdikbud.go.id/info/42-university-of-waterloo/\n",
      "40 https://iisma.kemdikbud.go.id/info/43-university-of-york/\n",
      "41 https://iisma.kemdikbud.go.id/info/44-hanyang-university-seoul-campus/\n",
      "42 https://iisma.kemdikbud.go.id/info/45-michigan-state-university/\n",
      "43 https://iisma.kemdikbud.go.id/info/46-western-university/\n",
      "44 https://iisma.kemdikbud.go.id/info/47-sapienza-university-of-rome/\n",
      "45 https://iisma.kemdikbud.go.id/info/48-university-college-dublin/\n",
      "46 https://iisma.kemdikbud.go.id/info/49-university-of-twente/\n",
      "47 https://iisma.kemdikbud.go.id/info/50-university-of-liverpool/\n",
      "48 https://iisma.kemdikbud.go.id/info/51-university-of-otago/\n",
      "49 https://iisma.kemdikbud.go.id/info/52-keio-university/\n",
      "50 https://iisma.kemdikbud.go.id/info/53-universidad-autonoma-de-madrid/\n",
      "51 https://iisma.kemdikbud.go.id/info/54-vrije-universiteit-amsterdam/\n",
      "52 https://iisma.kemdikbud.go.id/info/55-chulalongkorn-university/\n",
      "53 https://iisma.kemdikbud.go.id/info/56-arizona-state-university/\n",
      "54 https://iisma.kemdikbud.go.id/info/57-radboud-university/\n",
      "55 https://iisma.kemdikbud.go.id/info/58-university-of-sussex/\n",
      "56 https://iisma.kemdikbud.go.id/info/59-maastricht-university/\n",
      "57 https://iisma.kemdikbud.go.id/info/60-universitat-pompeu-fabra/\n",
      "58 https://iisma.kemdikbud.go.id/info/61-university-of-leicester/\n",
      "59 https://iisma.kemdikbud.go.id/info/62-victoria-university-of-wellington/\n",
      "60 https://iisma.kemdikbud.go.id/info/63-university-of-padua/\n",
      "61 https://iisma.kemdikbud.go.id/info/64-university-of-colorado-boulder/\n",
      "62 https://iisma.kemdikbud.go.id/info/65-university-of-galway/\n",
      "63 https://iisma.kemdikbud.go.id/info/66-university-of-canterbury/\n",
      "64 https://iisma.kemdikbud.go.id/info/68-university-of-warsaw/\n",
      "65 https://iisma.kemdikbud.go.id/info/69-university-of-tartu/\n",
      "66 https://iisma.kemdikbud.go.id/info/71-national-taiwan-university-of-science-and-technology-taiwan-tech/\n",
      "67 https://iisma.kemdikbud.go.id/info/72-university-of-pisa/\n",
      "68 https://iisma.kemdikbud.go.id/info/73-leiden-university/\n",
      "69 https://iisma.kemdikbud.go.id/info/73-middle-east-technical-university/\n",
      "70 https://iisma.kemdikbud.go.id/info/74-singapore-management-university/\n",
      "71 https://iisma.kemdikbud.go.id/info/75-university-of-szeged/\n",
      "72 https://iisma.kemdikbud.go.id/info/76-palacky-university-olomouc/\n",
      "73 https://iisma.kemdikbud.go.id/info/77-university-of-zagreb/\n",
      "74 https://iisma.kemdikbud.go.id/info/78-vytautas-magnus-university/\n",
      "75 https://iisma.kemdikbud.go.id/info/lolos-67-sciences-po/\n"
     ]
    }
   ],
   "source": [
    "link_df = pd.read_excel(\"C:/Users/mhani/Downloads/Link Kampus IISMA.xlsx\", header=None)\n",
    "link_list = link_df[0].to_list()\n",
    "for i in range(len(link_list)):\n",
    "    print(i, link_list[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "uni_details = pd.DataFrame(columns=[\"ID\", \"Name\", \"Location\", \"Requirements\", \"Period\", \"Statistics\"])\n",
    "uni_courses = pd.DataFrame(columns=[\"Univ_ID\", \"Course Name\", \"Details\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(len(link_list)):\n",
    "    driver.get(link_list[i])\n",
    "    name = driver.find_element(By.XPATH, '//*[@class=\"elementor-heading-title elementor-size-default\"]').text\n",
    "    loc = driver.find_elements(By.XPATH, '//*[@class=\"elementor-widget-container\"]')[2].text\n",
    "\n",
    "    tab_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-title elementor-tab-desktop-title\"]')\n",
    "    tab_list[0].click()\n",
    "    req = driver.find_element(By.ID, 'elementor-tab-content-4502').text\n",
    "    tab_list[1].click()\n",
    "    period = driver.find_element(By.ID, 'elementor-tab-content-4503').text\n",
    "    tab_list[2].click()\n",
    "    stats = driver.find_element(By.ID, 'elementor-tab-content-4504').text\n",
    "\n",
    "    uni_details.loc[len(uni_details)] = [i+1, name, loc, req, period, stats]\n",
    "\n",
    "    course_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-toggle-title\"]')\n",
    "    for j in range(len(course_list)):\n",
    "        course_list[j].click()\n",
    "        course_name = course_list[j].text\n",
    "        detail_content = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-content elementor-clearfix elementor-active\"]')[1]\n",
    "        inner_detail = detail_content.get_attribute('innerHTML')\n",
    "        clean_detail = re.sub('<[^<]+?>', ' ', inner_detail)\n",
    "        clean_detail = clean_detail.replace(\"&nbsp;\", \"\")\n",
    "        course_list[j].click()\n",
    "        sleep(1)\n",
    "        uni_courses.loc[len(uni_courses)] = [i+1, course_name, clean_detail]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "uni_courses.to_excel(\"uni_courses.xlsx\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}