{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np \n", "import re\n", "\n", "from time import sleep\n", "from selenium import webdriver\n", "from selenium.webdriver.common.by import By\n", "from selenium.common.exceptions import NoSuchElementException\n", "from selenium.webdriver.common.keys import Keys\n", "from selenium.webdriver.support.select import Select\n", "from selenium.webdriver.common.action_chains import ActionChains" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "### Initialize Browser (Chrome)\n", "options = webdriver.ChromeOptions()\n", "options.add_experimental_option('excludeSwitches', ['enable-logging'])\n", "driver = webdriver.Chrome(options=options)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Collect Data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 https://iisma.kemdikbud.go.id/info/02-university-college-london/\n", "1 https://iisma.kemdikbud.go.id/info/03-university-of-chicago/\n", "2 https://iisma.kemdikbud.go.id/info/04-nanyang-technological-university/\n", "3 https://iisma.kemdikbud.go.id/info/05-the-university-of-pennsylvania-college-of-liberal-and-professional-studies/\n", "4 https://iisma.kemdikbud.go.id/info/06-yale-university/\n", "5 https://iisma.kemdikbud.go.id/info/07-university-of-edinburgh/\n", "6 https://iisma.kemdikbud.go.id/info/09-the-australian-national-university/\n", "7 https://iisma.kemdikbud.go.id/info/10-university-of-melbourne/\n", "8 https://iisma.kemdikbud.go.id/info/11-university-of-sydney/\n", "9 https://iisma.kemdikbud.go.id/info/12-university-of-new-south-wales/\n", "10 https://iisma.kemdikbud.go.id/info/13-university-of-british-columbia/\n", "11 https://iisma.kemdikbud.go.id/info/14-the-university-of-queensland/\n", "12 https://iisma.kemdikbud.go.id/info/15-monash-university/\n", "13 https://iisma.kemdikbud.go.id/info/16-university-of-warwick/\n", "14 https://iisma.kemdikbud.go.id/info/17-universiti-malaya/\n", "15 https://iisma.kemdikbud.go.id/info/18-national-taiwan-university/\n", "16 https://iisma.kemdikbud.go.id/info/18-osaka-university/\n", "17 https://iisma.kemdikbud.go.id/info/20-ku-leuven/\n", "18 https://iisma.kemdikbud.go.id/info/21-university-of-texas-at-austin/\n", "19 https://iisma.kemdikbud.go.id/info/22-university-of-glasgow/\n", "20 https://iisma.kemdikbud.go.id/info/23-korea-university/\n", "21 https://iisma.kemdikbud.go.id/info/24-m-v-lomonosov-moscow-state-university/\n", "22 https://iisma.kemdikbud.go.id/info/25-university-of-auckland/\n", "23 https://iisma.kemdikbud.go.id/info/26-university-of-leeds/\n", "24 https://iisma.kemdikbud.go.id/info/27-the-university-of-western-australia/\n", "25 https://iisma.kemdikbud.go.id/info/28-university-of-birmingham/\n", "26 https://iisma.kemdikbud.go.id/info/29-penn-state-university/\n", "27 https://iisma.kemdikbud.go.id/info/30-university-of-california-davis/\n", "28 https://iisma.kemdikbud.go.id/info/31-boston-university-metropolitan-college/\n", "29 https://iisma.kemdikbud.go.id/info/32-the-university-of-adelaide/\n", "30 https://iisma.kemdikbud.go.id/info/33-university-college-cork/\n", "31 https://iisma.kemdikbud.go.id/info/34-queen-mary-university-of-london/\n", "32 https://iisma.kemdikbud.go.id/info/35-uc-chile/\n", "33 https://iisma.kemdikbud.go.id/info/36-newcastle-university/\n", "34 https://iisma.kemdikbud.go.id/info/37-humboldt-universitat-zu-berlin/\n", "35 https://iisma.kemdikbud.go.id/info/38-universiti-kebangsaan-malaysia/\n", "36 https://iisma.kemdikbud.go.id/info/39-lancaster-university/\n", "37 https://iisma.kemdikbud.go.id/info/40-universiti-sains-malaysia/\n", "38 https://iisma.kemdikbud.go.id/info/41-grenoble-ecole-de-management/\n", "39 https://iisma.kemdikbud.go.id/info/42-university-of-waterloo/\n", "40 https://iisma.kemdikbud.go.id/info/43-university-of-york/\n", "41 https://iisma.kemdikbud.go.id/info/44-hanyang-university-seoul-campus/\n", "42 https://iisma.kemdikbud.go.id/info/45-michigan-state-university/\n", "43 https://iisma.kemdikbud.go.id/info/46-western-university/\n", "44 https://iisma.kemdikbud.go.id/info/47-sapienza-university-of-rome/\n", "45 https://iisma.kemdikbud.go.id/info/48-university-college-dublin/\n", "46 https://iisma.kemdikbud.go.id/info/49-university-of-twente/\n", "47 https://iisma.kemdikbud.go.id/info/50-university-of-liverpool/\n", "48 https://iisma.kemdikbud.go.id/info/51-university-of-otago/\n", "49 https://iisma.kemdikbud.go.id/info/52-keio-university/\n", "50 https://iisma.kemdikbud.go.id/info/53-universidad-autonoma-de-madrid/\n", "51 https://iisma.kemdikbud.go.id/info/54-vrije-universiteit-amsterdam/\n", "52 https://iisma.kemdikbud.go.id/info/55-chulalongkorn-university/\n", "53 https://iisma.kemdikbud.go.id/info/56-arizona-state-university/\n", "54 https://iisma.kemdikbud.go.id/info/57-radboud-university/\n", "55 https://iisma.kemdikbud.go.id/info/58-university-of-sussex/\n", "56 https://iisma.kemdikbud.go.id/info/59-maastricht-university/\n", "57 https://iisma.kemdikbud.go.id/info/60-universitat-pompeu-fabra/\n", "58 https://iisma.kemdikbud.go.id/info/61-university-of-leicester/\n", "59 https://iisma.kemdikbud.go.id/info/62-victoria-university-of-wellington/\n", "60 https://iisma.kemdikbud.go.id/info/63-university-of-padua/\n", "61 https://iisma.kemdikbud.go.id/info/64-university-of-colorado-boulder/\n", "62 https://iisma.kemdikbud.go.id/info/65-university-of-galway/\n", "63 https://iisma.kemdikbud.go.id/info/66-university-of-canterbury/\n", "64 https://iisma.kemdikbud.go.id/info/68-university-of-warsaw/\n", "65 https://iisma.kemdikbud.go.id/info/69-university-of-tartu/\n", "66 https://iisma.kemdikbud.go.id/info/71-national-taiwan-university-of-science-and-technology-taiwan-tech/\n", "67 https://iisma.kemdikbud.go.id/info/72-university-of-pisa/\n", "68 https://iisma.kemdikbud.go.id/info/73-leiden-university/\n", "69 https://iisma.kemdikbud.go.id/info/73-middle-east-technical-university/\n", "70 https://iisma.kemdikbud.go.id/info/74-singapore-management-university/\n", "71 https://iisma.kemdikbud.go.id/info/75-university-of-szeged/\n", "72 https://iisma.kemdikbud.go.id/info/76-palacky-university-olomouc/\n", "73 https://iisma.kemdikbud.go.id/info/77-university-of-zagreb/\n", "74 https://iisma.kemdikbud.go.id/info/78-vytautas-magnus-university/\n", "75 https://iisma.kemdikbud.go.id/info/lolos-67-sciences-po/\n" ] } ], "source": [ "link_df = pd.read_excel(\"C:/Users/mhani/Downloads/Link Kampus IISMA.xlsx\", header=None)\n", "link_list = link_df[0].to_list()\n", "for i in range(len(link_list)):\n", " print(i, link_list[i])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "uni_details = pd.DataFrame(columns=[\"ID\", \"Name\", \"Location\", \"Requirements\", \"Period\", \"Statistics\"])\n", "uni_courses = pd.DataFrame(columns=[\"Univ_ID\", \"Course Name\", \"Details\"])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "for i in range(len(link_list)):\n", " driver.get(link_list[i])\n", " name = driver.find_element(By.XPATH, '//*[@class=\"elementor-heading-title elementor-size-default\"]').text\n", " loc = driver.find_elements(By.XPATH, '//*[@class=\"elementor-widget-container\"]')[2].text\n", "\n", " tab_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-title elementor-tab-desktop-title\"]')\n", " tab_list[0].click()\n", " req = driver.find_element(By.ID, 'elementor-tab-content-4502').text\n", " tab_list[1].click()\n", " period = driver.find_element(By.ID, 'elementor-tab-content-4503').text\n", " tab_list[2].click()\n", " stats = driver.find_element(By.ID, 'elementor-tab-content-4504').text\n", "\n", " uni_details.loc[len(uni_details)] = [i+1, name, loc, req, period, stats]\n", "\n", " course_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-toggle-title\"]')\n", " for j in range(len(course_list)):\n", " course_list[j].click()\n", " course_name = course_list[j].text\n", " detail_content = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-content elementor-clearfix elementor-active\"]')[1]\n", " inner_detail = detail_content.get_attribute('innerHTML')\n", " clean_detail = re.sub('<[^<]+?>', ' ', inner_detail)\n", " clean_detail = clean_detail.replace(\" \", \"\")\n", " course_list[j].click()\n", " sleep(1)\n", " uni_courses.loc[len(uni_courses)] = [i+1, course_name, clean_detail]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "uni_courses.to_excel(\"uni_courses.xlsx\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 2 }