Spaces:

irmchek
/

mynotebooksummary

Sleeping

App Files Files Community

irmchek commited on Apr 15

Commit

3658694

1 Parent(s): 0318708

initial commit

Browse files

Files changed (3) hide show

.gitignore +174 -0
notebook_enhancer.py +122 -0
requirements.txt +29 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,174 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

notebook_enhancer.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import nbformat
+import gradio as gr
+from transformers import pipeline
+class NotebookEnhancer:
+    def __init__(self):
+        # Initialize Hugging Face models
+        self.title_generator = pipeline(
+            "summarization", model="facebook/bart-large-cnn"
+        )
+        self.summary_generator = pipeline(
+            "summarization", model="sshleifer/distilbart-cnn-12-6"
+        )
+    def generate_title(self, code):
+        """Generate a concise title for a code cell"""
+        # Limit input length to match model constraints
+        max_length = 1024
+        truncated_code = code[:max_length] if len(code) > max_length else code
+        result = self.title_generator(
+            truncated_code, max_length=10, min_length=3, do_sample=False
+        )
+        title = result[0]["summary_text"].strip()
+        # Format as a markdown title
+        return f"## {title.capitalize()}"
+    def generate_summary(self, code):
+        """Generate a detailed summary for a code cell"""
+        # Limit input length to match model constraints
+        max_length = 1024
+        truncated_code = code[:max_length] if len(code) > max_length else code
+        result = self.summary_generator(
+            truncated_code, max_length=100, min_length=30, do_sample=True
+        )
+        return result[0]["summary_text"].strip()
+    def enhance_notebook(self, notebook_content):
+        """Add title and summary markdown cells before each code cell"""
+        # Load the notebook
+        notebook = nbformat.reads(notebook_content, as_version=4)
+        # Create a new notebook
+        enhanced_notebook = nbformat.v4.new_notebook()
+        enhanced_notebook.metadata = notebook.metadata
+        # Process each cell
+        i = 0
+        while i < len(notebook.cells):
+            cell = notebook.cells[i]
+            # For code cells, add title and summary markdown cells
+            if cell.cell_type == "code" and cell.source.strip():
+                # Generate title
+                title = self.generate_title(cell.source)
+                title_cell = nbformat.v4.new_markdown_cell(title)
+                enhanced_notebook.cells.append(title_cell)
+                # Generate summary
+                summary = self.generate_summary(cell.source)
+                summary_cell = nbformat.v4.new_markdown_cell(summary)
+                enhanced_notebook.cells.append(summary_cell)
+            # Add the original cell
+            enhanced_notebook.cells.append(cell)
+            i += 1
+        # Convert back to string
+        return nbformat.writes(enhanced_notebook)
+def process_notebook(file):
+    """Process an uploaded notebook file"""
+    enhancer = NotebookEnhancer()
+    # Read uploaded file
+    notebook_content = file.decode("utf-8")
+    # Process the notebook
+    enhanced_notebook = enhancer.enhance_notebook(notebook_content)
+    # Save to temp file
+    output_path = "enhanced_notebook.ipynb"
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(enhanced_notebook)
+    return output_path
+def build_gradio_interface():
+    """Create and launch the Gradio interface"""
+    with gr.Blocks(title="Notebook Enhancer") as demo:
+        gr.Markdown("# Jupyter Notebook Enhancer")
+        gr.Markdown(
+            """
+        Upload a Jupyter notebook to enhance it with automatically generated titles and summaries for each code cell.
+        This tool uses Hugging Face models to:
+        1. Generate concise titles for code cells
+        2. Create explanatory summaries of what the code does
+        """
+        )
+        with gr.Row():
+            with gr.Column():
+                file_input = gr.File(label="Upload Jupyter Notebook (.ipynb)")
+                process_btn = gr.Button("Enhance Notebook")
+            with gr.Column():
+                output = gr.File(label="Enhanced Notebook")
+        process_btn.click(fn=process_notebook, inputs=file_input, outputs=output)
+    return demo
+# This will be the entry point when running the script
+if __name__ == "__main__":
+    demo = build_gradio_interface()
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+# Core dependencies
+nbformat>=5.1.3
+gradio>=3.32.0
+transformers>=4.26.0
+torch>=1.13.1
+accelerate>=0.16.0
+# NLP model dependencies
+sentencepiece>=0.1.97
+protobuf>=3.20.0
+# Notebook dependencies
+jupyterlab>=3.5.0
+ipykernel>=6.21.0
+# Visualization (for example notebooks)
+matplotlib>=3.6.0
+seaborn>=0.12.0
+pandas>=1.5.0
+numpy>=1.23.0
+# ML components (for example notebooks)
+scikit-learn>=1.2.0
+# Development tools
+pytest>=7.2.0
+black>=23.1.0
+flake8>=6.0.0
+isort>=5.12.0