Spaces:

SmokeyBandit
/

SletcherSystems

Running

App Files Files Community

SmokeyBandit commited on Jan 7

Commit

7df492c

verified ·

1 Parent(s): 583b2a6

Create update_content.py

Browse files

Files changed (1) hide show

update_content.py +114 -0

update_content.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import requests
+from bs4 import BeautifulSoup
+import json
+from datetime import datetime
+from huggingface_hub import HfApi
+def clean_text(text):
+    if text:
+        return " ".join(text.strip().split())
+    return ""
+def scrape_page(url):
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        sections = []
+        for section in soup.find_all(['section', 'div'], class_=lambda x: x and 'section' in x.lower()):
+            section_data = {"heading": "", "content": ""}
+            # Get heading
+            heading = section.find(['h1', 'h2', 'h3'])
+            if heading:
+                section_data["heading"] = clean_text(heading.text)
+            # Get content
+            paragraphs = section.find_all('p')
+            content = "\n".join([clean_text(p.text) for p in paragraphs if clean_text(p.text)])
+            if not content:  # If no paragraphs, get all text
+                content = clean_text(section.get_text())
+            section_data["content"] = content
+            if section_data["heading"] or section_data["content"]:
+                sections.append(section_data)
+        return {
+            "title": clean_text(soup.title.string) if soup.title else "",
+            "sections": sections
+        }
+    except Exception as e:
+        print(f"Error scraping {url}: {e}")
+        return {"title": "", "sections": []}
+def update_content():
+    # Load existing structure
+    try:
+        with open("data/site_content.json", "r") as f:
+            content = json.load(f)
+    except Exception as e:
+        print(f"Error loading JSON: {e}")
+        return
+    # Update timestamp
+    content["timestamp"] = datetime.now().isoformat()
+    # Scrape each page
+    for path in content["pages"]:
+        url = content["pages"][path]["url"]
+        print(f"Scraping {url}")
+        page_data = scrape_page(url)
+        content["pages"][path].update(page_data)
+    # Extract specific content
+    services = []
+    solutions = []
+    clients = []
+    # Process services
+    if "/services" in content["pages"]:
+        for section in content["pages"]["/services"]["sections"]:
+            if section["heading"] or section["content"]:
+                services.append({
+                    "name": section["heading"],
+                    "description": section["content"]
+                })
+    content["services"] = services
+    # Process solutions
+    if "/solutions" in content["pages"]:
+        for section in content["pages"]["/solutions"]["sections"]:
+            if section["heading"] or section["content"]:
+                solutions.append({
+                    "name": section["heading"],
+                    "description": section["content"]
+                })
+    content["solutions"] = solutions
+    # Process about page for company info
+    if "/about" in content["pages"]:
+        about_sections = content["pages"]["/about"]["sections"]
+        content["company_info"] = {
+            "name": "SletcherSystems",
+            "description": "\n".join([
+                section["content"] for section in about_sections
+                if section["content"]
+            ])
+        }
+    # Save updated content
+    with open("data/site_content.json", "w") as f:
+        json.dump(content, f, indent=2)
+    # Upload to Hugging Face Space
+    api = HfApi()
+    api.upload_file(
+        path_or_fileobj="data/site_content.json",
+        path_in_repo="data/site_content.json",
+        repo_id="SmokeyBandit/SletcherSystems",
+        repo_type="space"
+    )
+if __name__ == "__main__":
+    update_content()