Spaces:

nslaughter
/

flashcard-studio

Sleeping

+import json
+import csv
+from io import StringIO
+from pydantic import BaseModel, validator, ValidationError
+class Card(BaseModel):
+ question: str
+ answer: str
+class Message(BaseModel):
+ role: str
+ content: list[Card]
+ @validator('content', pre=True)
+ def parse_content(cls, v):
+ if isinstance(v, str):
+ try:
+ content_list = json.loads(v)
+ return content_list
+ except json.JSONDecodeError as e:
+ raise ValueError(f"Error decoding 'content' JSON: {e}") from e
+ return v
+ def content_to_json(self) -> str:
+ return json.dumps([card.dict() for card in self.content], indent=2)
+ def content_to_csv(self) -> str:
+ """
+ Converts the content of the Message instance into a CSV string.
+ """
+ output = StringIO()
+ # Step 2: Create a CSV writer - windows style is the default, so set Unix-style line endings
+ writer = csv.writer(output, lineterminator='\n')
+ writer.writerow(["Question", "Answer"])
+ for card in self.content:
+ writer.writerow([card.question, card.answer])
+ csv_content = output.getvalue()
+ output.close()
+ return csv_content
+class PydanticEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, BaseModel):
+ return obj.dict()
+ return super().default(obj)

app/pipeline.py CHANGED Viewed

@@ -1,48 +1,14 @@
 from io import StringIO
-import csv
 import json
 import logging
 import torch
 from transformers import pipeline
-from pydantic import BaseModel, ValidationError, validator
-logger = logging.getLogger(__name__)
-class Card(BaseModel):
- question: str
- answer: str
-class Message(BaseModel):
- role: str
- content: list[Card]
- @validator('content', pre=True)
- def parse_content(cls, v):
- if isinstance(v, str):
- try:
- content_list = json.loads(v)
- return content_list
- except json.JSONDecodeError as e:
- raise ValueError(f"Error decoding 'content' JSON: {e}") from e
- return v
- def content_to_json(self) -> str:
- return json.dumps([card.dict() for card in self.content], indent=2)
- def content_to_csv(self) -> str:
- output = StringIO()
- writer = csv.writer(output)
- writer.writerow(['Question', 'Answer']) # CSV Header
- for card in self.content:
- writer.writerow([card.question, card.answer])
- return output.getvalue()
-class PydanticEncoder(json.JSONEncoder):
- def default(self, obj):
- if isinstance(obj, BaseModel):
- return obj.dict()
- return super().default(obj)
 class Pipeline:
  def __init__(self, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
@@ -53,6 +19,7 @@ class Pipeline:
  device_map="auto"
  )
  self.device = self._determine_device()
  self.messages = [
  {"role": "system", "content": """You are an expert flashcard creator. You always include a single knowledge item per flashcard.
  - You ALWAYS include a single knowledge item per flashcard.
@@ -77,46 +44,9 @@ class Pipeline:
  )[0]["generated_text"][-1]
  return response_message
- def format_flashcards(self, output_format: str, response: str) -> str:
- output = ""
- try :
- message = parse_message(response)
- logger.debug("after parse_obj_as")
- except ValidationError as e:
- raise e
- if output_format.lower() == "json":
- output = message.content_to_json()
- elif output_format.lower() == "csv":
- output = message.content_to_csv()
- return output
  def generate_flashcards(self, output_format: str, content: str) -> str:
  response = self.extract_flashcards(content)
- return self.format_flashcards(output_format, response)
- def parse_message(self, input_dict: dict[str, any]) -> Message:
- try:
- # Extract the role
- role = input_dict['role']
- # Parse the content
- content = input_dict['content']
- # If content is a string, try to parse it as JSON
- if isinstance(content, str):
- content = content.strip()
- content = json.loads(content)
- # Create Card objects from the content
- cards = [Card(**item) for item in content]
- # Create and return the Message object
- return Message(role=role, content=cards)
- except json.JSONDecodeError as e:
- raise ValueError(f"Invalid JSON in content: {str(e)}")
- except ValidationError as e:
- raise ValueError(f"Validation error: {str(e)}")
- except KeyError as e:
- raise ValueError(f"Missing required key: {str(e)}")
  def _determine_device(self):
  if torch.cuda.is_available():
@@ -144,8 +74,11 @@ def parse_message(input_dict: dict[str, any]) -> Message:
  # Create and return the Message object
  return Message(role=role, content=cards)
  except json.JSONDecodeError as e:
  raise ValueError(f"Invalid JSON in content: {str(e)}")
  except ValidationError as e:
  raise ValueError(f"Validation error: {str(e)}")
  except KeyError as e:
  raise ValueError(f"Missing required key: {str(e)}")

 from io import StringIO
 import json
 import logging
 import torch
 from transformers import pipeline
+from .models import Card, Message, ValidationError
+logger = logging.getLogger(__name__)
+logging.basicConfig(filename="pipeline.log", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
 class Pipeline:
  def __init__(self, model_name: str = "Qwen/Qwen2.5-7B-Instruct"):
  device_map="auto"
  )
  self.device = self._determine_device()
+ logger.info(f"device type: {self.device}")
  self.messages = [
  {"role": "system", "content": """You are an expert flashcard creator. You always include a single knowledge item per flashcard.
  - You ALWAYS include a single knowledge item per flashcard.
  )[0]["generated_text"][-1]
  return response_message
  def generate_flashcards(self, output_format: str, content: str) -> str:
  response = self.extract_flashcards(content)
+ return format_flashcards(output_format, response)
  def _determine_device(self):
  if torch.cuda.is_available():
  # Create and return the Message object
  return Message(role=role, content=cards)
  except json.JSONDecodeError as e:
+ logger.error(f"Invalid JSON in content: {str(e)}")
  raise ValueError(f"Invalid JSON in content: {str(e)}")
  except ValidationError as e:
+ logger.error(f"Validation error: {str(e)}")
  raise ValueError(f"Validation error: {str(e)}")
  except KeyError as e:
+ logger.error(f"Missing required key: {str(e)}")
  raise ValueError(f"Missing required key: {str(e)}")

app/processing.py CHANGED Viewed

@@ -22,47 +22,6 @@ def read_text_file(file_path: str) -> str:
  except Exception as e:
  raise ValueError(f"Error reading text file: {str(e)}")
-def format_prompt(output_format: str) -> str:
- """
- Formats the prompt based on the output type.
- """
- if output_format.lower() == "json":
- return """You only respond in JSON format. Follow the example below.
- EXAMPLE:
- [
- {"question": "What is AI?", "answer": "Artificial Intelligence."},
- {"question": "What is ML?", "answer": "Machine Learning."}
- ]
- """
- elif output_format.lower() == "csv":
- return """You only respond with cards in CSV format. Follow the example below.
- EXAMPLE:
- "What is AI?", "Artificial Intelligence."
- "What is ML?", "Machine Learning."
- """
-# def extract_flashcards(text: str, output_format: str, pipeline: str) -> str:
-# """
-# Extracts flashcards from the input text using the LLM and formats them in CSV or JSON.
-# """
-# prompt = f"""You are an expert flashcard creator. You always include a single knowledge item per flashcard.
-# {format_prompt(output_format)}
-# Extract flashcards from the user's text:
-# {text}
-# Do not include the prompt or any other unnecessary information in the flashcards.
-# Do not include triple ticks (```) or any other code blocks in the flashcards.
-# """
-# # TODO:
-# response = pipeline.generate_flashcards("json", prompt)
-# return response
 def process_file(file_obj, output_format: str, pipeline) -> str:
  """
  Processes the uploaded file based on its type and extracts flashcards.
@@ -89,3 +48,17 @@ def process_text_input(output_format: str, input_text: str) -> str:
  flashcards = pipeline.generate_flashcards(output_format, input_text)
  return flashcards

  except Exception as e:
  raise ValueError(f"Error reading text file: {str(e)}")
 def process_file(file_obj, output_format: str, pipeline) -> str:
  """
  Processes the uploaded file based on its type and extracts flashcards.
  flashcards = pipeline.generate_flashcards(output_format, input_text)
  return flashcards
+def format_flashcards(self, output_format: str, response: str) -> str:
+ output = ""
+ try :
+ message = parse_message(response)
+ logger.debug("after parse_obj_as")
+ except ValidationError as e:
+ raise e
+ if output_format.lower() == "json":
+ output = message.content_to_json()
+ elif output_format.lower() == "csv":
+ output = message.content_to_csv()
+ return output

tests/conftest.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pytest
 from unittest.mock import Mock
-from app.pipeline import LanguageModel
 @pytest.fixture
 def pipeline():
@@ -8,7 +8,7 @@ def pipeline():
  Fixture to provide a mocked LanguageModel instance.
  """
  # Create a mock instance of LanguageModel
- lm = Mock(spec=LanguageModel)
  # Mock the generate_flashcards method
  lm.generate_flashcards.return_value = '{"flashcards": []}'
  return lm

 import pytest
 from unittest.mock import Mock
+from app.pipeline import Pipeline
 @pytest.fixture
 def pipeline():
  Fixture to provide a mocked LanguageModel instance.
  """
  # Create a mock instance of LanguageModel
+ lm = Mock(spec=Pipeline)
  # Mock the generate_flashcards method
  lm.generate_flashcards.return_value = '{"flashcards": []}'
  return lm

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+from app.models import Card, Message
+ # Tests for Card and Message models
+def test_card_model():
+ card = Card(question="What is Python?", answer="A programming language")
+ assert card.question == "What is Python?"
+ assert card.answer == "A programming language"
+def test_message_model():
+ cards = [
+ Card(question="What is AI?", answer="Artificial Intelligence"),
+ Card(question="What is ML?", answer="Machine Learning")
+ ]
+ message = Message(role="assistant", content=cards)
+ assert message.role == "assistant"
+ assert len(message.content) == 2
+ assert message.content[0].question == "What is AI?"
+def test_message_content_json_parsing():
+ json_content = '[{"question": "Q1", "answer": "A1"}, {"question": "Q2", "answer": "A2"}]'
+ message = Message(role="assistant", content=json_content)
+ assert len(message.content) == 2
+ assert message.content[0].question == "Q1"
+def test_message_content_to_json():
+ cards = [Card(question="Q1", answer="A1"), Card(question="Q2", answer="A2")]
+ message = Message(role="assistant", content=cards)
+ json_output = message.content_to_json()
+ assert json.loads(json_output) == [
+ {"question": "Q1", "answer": "A1"},
+ {"question": "Q2", "answer": "A2"}
+ ]
+# failed test
+def test_message_content_to_csv():
+ cards = [Card(question="Q1", answer="A1"), Card(question="Q2", answer="A2")]
+ message = Message(role="assistant", content=cards)
+ csv_output = message.content_to_csv()
+ expected_output = "Question,Answer\nQ1,A1\nQ2,A2\n" # Use Unix-style line endings
+ print(csv_output) # Optional: for debugging purposes
+ assert csv_output == expected_output

tests/test_pipeline.py CHANGED Viewed

@@ -1,18 +1,67 @@
 import pytest
-def test_generate_flashcards(pipeline, mocker):
- """
- Test the generate_flashcards method of LanguageModel.
- """
- prompt = "Sample prompt for flashcard generation."
- expected_response = '{"flashcards": [{"Question": "What is AI?", "Answer": "Artificial Intelligence."}]}'
- # Configure the mock to return a specific response
- pipeline.generate_flashcards.return_value = expected_response
- # Call the method
- response = pipeline.generate_flashcards(prompt)
- # Assertions
- assert response == expected_response
- pipeline.generate_flashcards.assert_called_once_with(prompt)

 import pytest
+from unittest.mock import Mock, patch
+import json
+from io import StringIO
+from pydantic import ValidationError
+from app.pipeline import Pipeline, Message, Card, parse_message
+from app.models import PydanticEncoder
+# Tests for Pipeline class
+@pytest.fixture
+def mock_pipeline():
+ with patch('app.pipeline') as mock_pipe:
+ mock_pipe.return_value = Mock()
+ yield Pipeline("mock_model")
+# def test_extract_flashcards(mock_pipeline):
+# mock_pipeline.torch_pipe.return_value = [{"generated_text": [{"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}]}]
+# response = mock_pipeline.extract_flashcards("Test content")
+# assert isinstance(response, dict)
+# assert "content" in response
+# def test_format_flashcards_csv(mock_pipeline):
+# response = {"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}
+# formatted = mock_pipeline.format_flashcards("csv", response)
+# assert formatted.strip() == "Question,Answer\nQ,A"
+# def test_generate_flashcards(mock_pipeline):
+# mock_pipeline.extract_flashcards.return_value = {"role": "assistant", "content": '[{"question": "Q", "answer": "A"}]'}
+# result = mock_pipeline.generate_flashcards("json", "Test content")
+# assert json.loads(result) == [{"question": "Q", "answer": "A"}]
+# Tests for parse_message function
+def test_parse_message_valid_input():
+ input_dict = {
+ "role": "assistant",
+ "content": '[{"question": "Q1", "answer": "A1"}, {"question": "Q2", "answer": "A2"}]'
+ }
+ message = parse_message(input_dict)
+ assert isinstance(message, Message)
+ assert message.role == "assistant"
+ assert len(message.content) == 2
+def test_parse_message_invalid_json():
+ input_dict = {
+ "role": "assistant",
+ "content": 'Invalid JSON'
+ }
+ with pytest.raises(ValueError, match="Invalid JSON in content"):
+ parse_message(input_dict)
+def test_parse_message_missing_key():
+ input_dict = {
+ "content": '[{"question": "Q", "answer": "A"}]'
+ }
+ with pytest.raises(ValueError, match="Missing required key"):
+ parse_message(input_dict)
+# Test for PydanticEncoder
+def test_pydantic_encoder():
+ card = Card(question="Q", answer="A")
+ encoded = json.dumps(card, cls=PydanticEncoder)
+ assert json.loads(encoded) == {"question": "Q", "answer": "A"}
+# Test error cases
+def test_message_invalid_content():
+ with pytest.raises(ValidationError):
+ Message(role="assistant", content="Invalid content")

tests/test_processing.py CHANGED Viewed

@@ -1,71 +1,40 @@
 import pytest
-from app.processing import process_text_input, process_file
-def test_process_text_input_success(pipeline):
- """
- Test processing of valid text input.
- """
- input_text = "This is a sample text for flashcard extraction."
- output_format = "JSON"
- expected_output = '{"flashcards": []}'
- result = process_text_input(input_text, output_format, pipeline)
- assert result == expected_output
- pipeline.generate_flashcards.assert_called_once()
-def test_process_text_input_empty(pipeline):
- """
- Test processing of empty text input.
- """
- input_text = " "
- output_format = "JSON"
- with pytest.raises(ValueError) as excinfo:
- process_text_input(input_text, output_format, pipeline)
- assert "No text provided." in str(excinfo.value)
-def test_process_file_unsupported_type(pipeline, tmp_path):
- """
- Test processing of an unsupported file type.
- """
- # Create a dummy unsupported file
- dummy_file = tmp_path / "dummy.unsupported"
- dummy_file.write_text("Unsupported content")
- with pytest.raises(ValueError) as excinfo:
- process_file(dummy_file, "JSON", pipeline)
- assert "Unsupported file type." in str(excinfo.value)
-def test_process_file_pdf(pipeline, tmp_path, mocker):
- """
- Test processing of a PDF file.
- """
- # Mock the process_pdf function
- mocker.patch('app.processing.process_pdf', return_value="Extracted PDF text.")
- # Create a dummy PDF file
- dummy_file = tmp_path / "test.pdf"
- dummy_file.write_text("PDF content")
- expected_output = '{"flashcards": []}'
- result = process_file(dummy_file, "JSON", pipeline)
- assert result == expected_output
- pipeline.generate_flashcards.assert_called_once()
-def test_process_file_txt(pipeline, tmp_path, mocker):
- """
- Test processing of a TXT file.
- """
- # Mock the read_text_file function
- mocker.patch('app.processing.read_text_file', return_value="Extracted TXT text.")
- # Create a dummy TXT file
- dummy_file = tmp_path / "test.txt"
- dummy_file.write_text("TXT content")
- expected_output = '{"flashcards": []}'
- result = process_file(dummy_file, "JSON", pipeline)
- assert result == expected_output
- pipeline.generate_flashcards.assert_called_once()

 import pytest
+from unittest.mock import patch, Mock
+from app.processing import process_pdf, read_text_file, process_file, process_text_input
+def test_read_text_file_error():
+ with patch("builtins.open", side_effect=IOError("File read error")):
+ with pytest.raises(ValueError, match="Error reading text file: File read error"):
+ read_text_file("test.txt")
+# Test for process_file function
+def test_process_file_pdf(pipeline):
+ mock_file = Mock()
+ mock_file.name = "test.pdf"
+ with patch('app.processing.process_pdf', return_value="PDF content"):
+ result = process_file(mock_file, "json", pipeline)
+ pipeline.generate_flashcards.assert_called_once_with("json", "PDF content")
+ assert result == '{"flashcards": []}'
+def test_process_file_txt(pipeline):
+ mock_file = Mock()
+ mock_file.name = "test.txt"
+ with patch('app.processing.read_text_file', return_value="Text content"):
+ result = process_file(mock_file, "json", pipeline)
+ pipeline.generate_flashcards.assert_called_once_with("json", "Text content")
+ assert result == '{"flashcards": []}'
+def test_process_file_unsupported():
+ mock_file = Mock()
+ mock_file.name = "test.unsupported"
+ with pytest.raises(ValueError, match="Unsupported file type."):
+ process_file(mock_file, "json", None)
+# Ensure the pipeline fixture is used in all tests that require it
+@pytest.mark.usefixtures("pipeline")
+class TestWithPipeline:
+ def test_pipeline_usage(self, pipeline):
+ assert pipeline.generate_flashcards.return_value == '{"flashcards": []}'