Vishal1806 commited on
Commit
404e92b
·
verified ·
1 Parent(s): 64aa0bc
Files changed (1) hide show
  1. scraper.py +41 -0
scraper.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import pandas as pd
4
+
5
+ def scrape_courses():
6
+ url = "https://courses.analyticsvidhya.com/pages/all-free-courses" # Use the actual URL where the courses are listed
7
+ response = requests.get(url)
8
+ soup = BeautifulSoup(response.text, 'html.parser')
9
+
10
+ courses = []
11
+ for course_card in soup.find_all("a", class_="card-link"):
12
+ # Extract title
13
+ title = course_card.find("h2", class_="card-text").get_text(strip=True)
14
+
15
+ # Extract duration and lessons (if available)
16
+ duration_lessons = course_card.find("p", class_="fs-14").get_text(strip=True)
17
+ duration, lessons = duration_lessons.split(" Hours")[0] + " Hours", duration_lessons.split(" Hours")[1].strip()
18
+
19
+ # Extract review rating (if available)
20
+ rating = course_card.find("span", class_="rating") # Assuming rating is in a span with class "rating"
21
+ rating = rating.get_text(strip=True) if rating else "No rating"
22
+
23
+ # Extract price (if available)
24
+ price = course_card.find("span", class_="price") # Assuming price is in a span with class "price"
25
+ price = price.get_text(strip=True) if price else "Free" # Assuming courses without price are free
26
+
27
+ courses.append({
28
+ 'title': title,
29
+ 'duration': duration,
30
+ 'lessons': lessons,
31
+ 'rating': rating,
32
+ 'price': price
33
+ })
34
+
35
+ return pd.DataFrame(courses)
36
+
37
+ # Run the scraper and save the data to a CSV
38
+ if __name__ == "__main__":
39
+ courses_df = scrape_courses()
40
+ print(courses_df) # Preview the scraped data
41
+ courses_df.to_csv("courses_data.csv", index=False)