import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import time

class SletcherScraper:
    def __init__(self):
        self.base_url = "https://www.sletchersystems.com"
        self.pages = [
            "/",
            "/clients",
            "/solutions",
            "/services",
            "/about"
        ]
        self.content = {
            "timestamp": datetime.now().isoformat(),
            "pages": {},
            "company_info": {},
            "services": [],
            "solutions": [],
            "clients": []
        }
        
    def clean_text(self, text):
        if text:
            return " ".join(text.strip().split())
        return ""
    
    def scrape_page(self, url_path):
        full_url = self.base_url + url_path
        try:
            response = requests.get(full_url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            
            page_data = {
                "url": full_url,
                "title": self.clean_text(soup.title.string) if soup.title else "",
                "sections": []
            }
            
            # Extract main content sections
            for section in soup.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower()):
                section_data = {
                    "heading": "",
                    "content": ""
                }
                
                # Get heading
                heading = section.find(['h1', 'h2', 'h3'])
                if heading:
                    section_data["heading"] = self.clean_text(heading.text)
                
                # Get content paragraphs
                paragraphs = section.find_all('p')
                section_data["content"] = "\n".join([
                    self.clean_text(p.text) for p in paragraphs if self.clean_text(p.text)
                ])
                
                if section_data["heading"] or section_data["content"]:
                    page_data["sections"].append(section_data)
            
            return page_data
            
        except Exception as e:
            print(f"Error scraping {full_url}: {e}")
            return None
    
    def extract_specific_content(self):
        # Extract services
        if "/services" in self.content["pages"]:
            services_page = self.content["pages"]["/services"]
            for section in services_page["sections"]:
                if section["heading"] and section["content"]:
                    self.content["services"].append({
                        "name": section["heading"],
                        "description": section["content"]
                    })
        
        # Extract solutions
        if "/solutions" in self.content["pages"]:
            solutions_page = self.content["pages"]["/solutions"]
            for section in solutions_page["sections"]:
                if section["heading"] and section["content"]:
                    self.content["solutions"].append({
                        "name": section["heading"],
                        "description": section["content"]
                    })
        
        # Extract company info from about page
        if "/about" in self.content["pages"]:
            about_page = self.content["pages"]["/about"]
            self.content["company_info"] = {
                "name": "SletcherSystems",
                "description": "\n".join([
                    section["content"] for section in about_page["sections"]
                    if section["content"]
                ])
            }
    
    def scrape_all(self):
        # Scrape each page
        for page in self.pages:
            print(f"Scraping {self.base_url}{page}")
            page_data = self.scrape_page(page)
            if page_data:
                self.content["pages"][page] = page_data
            time.sleep(1)  # Be nice to the server
        
        # Extract specific content
        self.extract_specific_content()
        
        return self.content
    
    def save_to_json(self, filename="site_content.json"):
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(self.content, f, indent=2, ensure_ascii=False)

def main():
    scraper = SletcherScraper()
    content = scraper.scrape_all()
    scraper.save_to_json("data/site_content.json")
    print("Scraping completed and saved to data/site_content.json")

if __name__ == "__main__":
    main()