|
from bs4 import BeautifulSoup |
|
import requests |
|
import numpy as np |
|
from datetime import datetime |
|
|
|
def gen_link(): |
|
if np.random.choice([True, False]): |
|
|
|
np.random.seed() |
|
year = np.random.randint(2015, 2023) |
|
AB = np.random.choice(['A', 'B']) |
|
|
|
mu, sigma = 18, 5 |
|
s = np.random.normal(mu, sigma, 1000) |
|
s = np.round(s) |
|
s = s[(s >= 10) & (s <= 25)] |
|
q = int(np.random.choice(s)) |
|
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}' |
|
else: |
|
|
|
np.random.seed() |
|
year = np.random.randint(2005, 2023) |
|
I = np.random.choice(['I', 'II']) |
|
mu, sigma = 6, 4 |
|
s = np.random.normal(mu, sigma, 1000) |
|
s = np.round(s) |
|
s = s[(s >= 1) & (s <= 15)] |
|
q = int(np.random.choice(s)) |
|
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}' |
|
return link |
|
|
|
def convert_to_renderable_html(text): |
|
text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com') |
|
return text |
|
|
|
def get_problem(url): |
|
headers = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" |
|
} |
|
response = requests.get(url, headers=headers) |
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'}) |
|
|
|
if problem_headline: |
|
problem_content = [] |
|
for sibling in problem_headline.parent.find_next_siblings(): |
|
if sibling.name == 'h2': |
|
break |
|
elif sibling.name == 'p': |
|
problem_content.append(convert_to_renderable_html(str(sibling))) |
|
|
|
problem_html = " ".join(problem_content) |
|
return problem_html |
|
else: |
|
print("No problem found") |
|
|
|
def gen_html(num): |
|
all_q = str() |
|
num_tried = 0 |
|
num_succ = 0 |
|
while True: |
|
try: |
|
link = gen_link() |
|
print(link) |
|
hype = f'<a href="{link}" target="_blank">to AOPS</a>' |
|
qhtml = get_problem(link) |
|
all_q += (hype + qhtml + '<div class="spacer"></div>') |
|
num_succ += 1 |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
pass |
|
num_tried += 1 |
|
if num_succ >= num or num_tried > 20: |
|
break |
|
|
|
all_q = f''' |
|
<html> |
|
<head> |
|
<style> |
|
body {{ |
|
font-family: Arial, sans-serif; |
|
font-size: 12pt; |
|
margin: 150px; /* Add side margins */ |
|
}} |
|
.spacer {{ |
|
margin-top: 50px; /* Add spacing between questions */ |
|
}} |
|
img.latex {{ |
|
font-size: 12pt; /* Ensure math font size matches the body text size */ |
|
}} |
|
a {{ |
|
color: blue; |
|
text-decoration: none; |
|
}} |
|
a:hover {{ |
|
text-decoration: underline; |
|
}} |
|
</style> |
|
</head> |
|
<body> |
|
{all_q} |
|
</body> |
|
</html> |
|
''' |
|
return all_q |
|
|
|
def save_html_to_file(html_content, output_filename): |
|
with open(output_filename, 'w', encoding='utf-8') as file: |
|
file.write(html_content) |
|
|