Spaces:

Vishal1806
/

RAGbasedcoursesearch

Sleeping

File size: 1,700 Bytes

404e92b

import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_courses():
    url = "https://courses.analyticsvidhya.com/pages/all-free-courses"  # Use the actual URL where the courses are listed
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    courses = []
    for course_card in soup.find_all("a", class_="card-link"):
        # Extract title
        title = course_card.find("h2", class_="card-text").get_text(strip=True)
        
        # Extract duration and lessons (if available)
        duration_lessons = course_card.find("p", class_="fs-14").get_text(strip=True)
        duration, lessons = duration_lessons.split(" Hours")[0] + " Hours", duration_lessons.split(" Hours")[1].strip()
        
        # Extract review rating (if available)
        rating = course_card.find("span", class_="rating")  # Assuming rating is in a span with class "rating"
        rating = rating.get_text(strip=True) if rating else "No rating"
        
        # Extract price (if available)
        price = course_card.find("span", class_="price")  # Assuming price is in a span with class "price"
        price = price.get_text(strip=True) if price else "Free"  # Assuming courses without price are free
        
        courses.append({
            'title': title,
            'duration': duration,
            'lessons': lessons,
            'rating': rating,
            'price': price
        })

    return pd.DataFrame(courses)

# Run the scraper and save the data to a CSV
if __name__ == "__main__":
    courses_df = scrape_courses()
    print(courses_df)  # Preview the scraped data
    courses_df.to_csv("courses_data.csv", index=False)