import os import pymupdf4llm from .models import parse_message from .pipeline import Pipeline def process_pdf(pdf_path: str) -> str: """Extracts text from a PDF file using pymupdf4llm.""" try: text = pymupdf4llm.to_markdown(pdf_path) return text except Exception as e: raise ValueError(f"Error processing PDF: {str(e)}") def read_text_file(file_path: str) -> str: """Reads text from a .txt or .md file.""" try: with open(file_path, 'r', encoding='utf-8') as f: text = f.read() return text except Exception as e: raise ValueError(f"Error reading text file: {str(e)}") def process_file(file_obj, output_format: str, pipeline) -> str: """Processes the uploaded file based on its type and extracts flashcards.""" file_path = file_obj.name file_ext = os.path.splitext(file_path)[1].lower() if file_ext == '.pdf': text = process_pdf(file_path) elif file_ext in ['.txt', '.md']: text = read_text_file(file_path) else: raise ValueError("Unsupported file type.") flashcards = generate_flashcards(output_format, text) return flashcards def reduce_newlines(text: str) -> str: """Reduces consecutive newlines exceeding 2 to just 2.""" while "\n\n\n" in text: text = text.replace("\n\n\n", "\n\n") return text def generate_flashcards(output_format: str, content: str) -> str: """ Generates flashcards from the content. """ content = reduce_newlines(content) response = Pipeline().extract_flashcards(content) return format_flashcards(output_format, response) def process_text_input(input_text: str, output_format: str = "csv") -> str: """Processes the input text and extracts flashcards.""" if not input_text.strip(): raise ValueError("No text provided.") pipeline = Pipeline() flashcards = generate_flashcards(output_format, input_text) return flashcards def format_flashcards(output_format: str, response: str) -> str: """Formats the response into the desired output format.""" output = "" try : message = parse_message(response) except Exception as e: raise e if output_format.lower() == "json": output:str = message.content_to_json() elif output_format.lower() == "csv": output = message.content_to_csv() return output