from typing import List, Tuple import math def split_text_into_chunks(text: str, chunk_size: int) -> List[str]: """ Splits the text into chunks of a specified maximum size. """ # Trim the text to remove leading/trailing whitespace and reduce multiple spaces to a single space cleaned_text = " ".join(text.split()) words = cleaned_text.split(" ") chunks = [] current_chunk = [] current_length = 0 for word in words: if current_length + len(word) + 1 > chunk_size: chunks.append(" ".join(current_chunk)) current_chunk = [word] current_length = len(word) else: current_chunk.append(word) current_length += len(word) + 1 if current_chunk: chunks.append(" ".join(current_chunk)) return chunks def distribute_questions_across_chunks(n_chunks: int, n_questions: int) -> List[int]: """ Distributes a specified number of questions across a specified number of chunks. """ # Initial allocation of at least one question to early chunks if possible questions_per_chunk = [1] * min(n_chunks, n_questions) remaining_questions = n_questions - len(questions_per_chunk) # Distribute remaining questions evenly across chunks if remaining_questions > 0: for i in range(len(questions_per_chunk)): if remaining_questions == 0: break questions_per_chunk[i] += 1 remaining_questions -= 1 # If chunks remain, add zeros to match the total chunks. while len(questions_per_chunk) < n_chunks: questions_per_chunk.append(0) return questions_per_chunk def generate_questions_for_text(text: str, chunk_size: int, n_questions: int) -> List[Tuple[str, int]]: """ Splits the text into chunks, distributes questions across them, and returns a list of (chunk, number of questions). """ chunks = split_text_into_chunks(text, chunk_size) n_chunks = len(chunks) questions_distribution = distribute_questions_across_chunks(n_chunks, n_questions) return list(zip(chunks, questions_distribution)) # Example usage text = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin hendrerit urna " "vel erat bibendum, eget condimentum ipsum interdum. Nulla facilisi. Quisque dictum " "eros eu velit varius, eget faucibus mauris euismod. Etiam placerat nisi at urna maximus " "viverra. Integer ut odio nec justo volutpat varius ut quis quam. Suspendisse potenti. " "Donec vulputate quam quis metus sagittis, sed commodo justo ultricies. Nam ut velit " "finibus, venenatis eros vel, consectetur arcu. Praesent vulputate at ligula non elementum. " "Nulla varius condimentum justo, non placerat nisl ullamcorper eu." ) chunk_size = 100 # Max length of each chunk in characters n_questions = 5 # Total number of questions to be asked result = generate_questions_for_text(text, chunk_size, n_questions) for i, (chunk, num_questions) in enumerate(result): print(f"Chunk {i + 1} ({len(chunk.split())} words):") print(f"Questions: {num_questions}") print(chunk) print("-" * 40)