ymcmy's picture
Update utils.py
c9509de verified
from bs4 import BeautifulSoup
import requests
import numpy as np
from datetime import datetime
def gen_link():
if np.random.choice([True, False]):
# AMC
np.random.seed()
year = np.random.randint(2015, 2023)
AB = np.random.choice(['A', 'B'])
# Question
mu, sigma = 18, 5
s = np.random.normal(mu, sigma, 1000)
s = np.round(s)
s = s[(s >= 10) & (s <= 25)]
q = int(np.random.choice(s))
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AMC_12{AB}_Problems/Problem_{q}'
else:
# AIME
np.random.seed()
year = np.random.randint(2005, 2023)
I = np.random.choice(['I', 'II'])
mu, sigma = 6, 4
s = np.random.normal(mu, sigma, 1000)
s = np.round(s)
s = s[(s >= 1) & (s <= 15)]
q = int(np.random.choice(s))
link = f'https://artofproblemsolving.com/wiki/index.php/{year}_AIME_{I}_Problems/Problem_{q}'
return link
def convert_to_renderable_html(text):
text = text.replace('//latex.artofproblemsolving.com', 'https://latex.artofproblemsolving.com')
return text
def get_problem(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
problem_headline = soup.find('span', {'class': 'mw-headline', 'id': 'Problem'})
if problem_headline:
problem_content = []
for sibling in problem_headline.parent.find_next_siblings():
if sibling.name == 'h2':
break
elif sibling.name == 'p':
problem_content.append(convert_to_renderable_html(str(sibling)))
problem_html = " ".join(problem_content)
return problem_html
else:
print("No problem found")
def gen_html(num):
all_q = str()
num_tried = 0
num_succ = 0
while True:
try:
link = gen_link()
print(link)
hype = f'<a href="{link}" target="_blank">to AOPS</a>'
qhtml = get_problem(link)
all_q += (hype + qhtml + '<div class="spacer"></div>') # Add spacer div between questions
num_succ += 1
except Exception as e:
print(f"Error: {e}")
pass
num_tried += 1
if num_succ >= num or num_tried > 20:
break
all_q = f'''
<html>
<head>
<style>
body {{
font-family: Arial, sans-serif;
font-size: 12pt;
margin: 150px; /* Add side margins */
}}
.spacer {{
margin-top: 50px; /* Add spacing between questions */
}}
img.latex {{
font-size: 12pt; /* Ensure math font size matches the body text size */
}}
a {{
color: blue;
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
</style>
</head>
<body>
{all_q}
</body>
</html>
'''
return all_q
def save_html_to_file(html_content, output_filename):
with open(output_filename, 'w', encoding='utf-8') as file:
file.write(html_content)