SmolLM2-135M-Instruct-GGUF

All right reserved to the original owners of the model. For more data refer to the original model card. https://huggingface.co/HuggingFaceTB

Introduction

SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.

Quickstart

SmolLM2-135M-Instruct-GGUF can be loaded and used via Llama.cpp, here is a program with GUI.

pip install PyQt5 llama-cpp-python pymupdf
import sys
import os
from PyQt5.QtWidgets import (QApplication, QWidget, QLabel, QPushButton,
                             QLineEdit, QTextEdit, QVBoxLayout, QHBoxLayout,
                             QFileDialog, QProgressBar, QMessageBox, QMenu)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from llama_cpp import Llama
import fitz  # For PDF processing

class Worker(QThread):
    finished = pyqtSignal(str)
    progress = pyqtSignal(int, int)

    def __init__(self, model, messages, max_tokens):
        super().__init__()
        self.model = model
        self.messages = messages
        self.max_tokens = max_tokens

    def run(self):
        try:
            response = self.model.create_chat_completion(
                messages=self.messages,
                max_tokens=self.max_tokens,
                temperature=0.7,
                stream=True
            )

            total_tokens = 0
            full_response = ""
            for chunk in response:
                if "choices" in chunk:
                    content = chunk["choices"][0]["delta"].get("content", "")
                    full_response += content
                    total_tokens += 1
                    self.progress.emit(total_tokens, self.max_tokens)
            self.finished.emit(full_response)
        except Exception as e:
            self.finished.emit(f"Error generating response: {str(e)}")

class ChatbotGUI(QWidget):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Chatbot GUI")
        self.resize(800, 600)

        self.model = None
        self.messages = [
            {"role": "system", "content": "You are a helpful AI assistant."}
        ]
        self.thread_count = 12
        self.pdf_content = ""

        self.initUI()

    def initUI(self):
        # Model loading section
        model_label = QLabel("Model: No model loaded")
        load_button = QPushButton("Load GGUF Model")
        load_button.clicked.connect(self.load_model)

        model_layout = QHBoxLayout()
        model_layout.addWidget(model_label)
        model_layout.addWidget(load_button)

        # PDF upload section
        pdf_label = QLabel("PDF: No PDF loaded")
        upload_pdf_button = QPushButton("Upload PDF")
        upload_pdf_button.clicked.connect(self.upload_pdf)

        pdf_layout = QHBoxLayout()
        pdf_layout.addWidget(pdf_label)
        pdf_layout.addWidget(upload_pdf_button)

        # Thread count section
        thread_label = QLabel(f"Thread Count: {self.thread_count}")
        self.thread_input = QLineEdit()
        self.thread_input.setPlaceholderText("Enter new thread count")
        update_thread_button = QPushButton("Update Threads")
        update_thread_button.clicked.connect(self.update_thread_count)

        thread_layout = QHBoxLayout()
        thread_layout.addWidget(thread_label)
        thread_layout.addWidget(self.thread_input)
        thread_layout.addWidget(update_thread_button)

        # Chat display
        self.chat_display = QTextEdit()
        self.chat_display.setReadOnly(True)
        self.chat_display.setContextMenuPolicy(Qt.CustomContextMenu)
        self.chat_display.customContextMenuRequested.connect(self.show_context_menu)

        # User input
        self.user_input = QLineEdit()
        self.user_input.returnPressed.connect(self.send_message)
        send_button = QPushButton("Send")
        send_button.clicked.connect(self.send_message)

        input_layout = QHBoxLayout()
        input_layout.addWidget(self.user_input)
        input_layout.addWidget(send_button)

        # Progress bar
        self.progress_bar = QProgressBar()
        self.progress_bar.hide()

        # Clear conversation button
        clear_button = QPushButton("Clear Conversation")
        clear_button.clicked.connect(self.clear_conversation)

        # Main layout
        main_layout = QVBoxLayout()
        main_layout.addLayout(model_layout)
        main_layout.addLayout(pdf_layout)  # PDF before threads
        main_layout.addLayout(thread_layout) 
        main_layout.addWidget(self.chat_display)
        main_layout.addWidget(self.progress_bar)
        main_layout.addLayout(input_layout)
        main_layout.addWidget(clear_button)

        self.setLayout(main_layout)

    def load_model(self):
        model_path, _ = QFileDialog.getOpenFileName(self, "Load GGUF Model", "", "GGUF Files (*.gguf)")
        if model_path:
            try:
                self.model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=-1, n_threads=self.thread_count)
                model_name = os.path.basename(model_path)
                self.layout().itemAt(0).itemAt(0).widget().setText(f"Model: {model_name}")
                QMessageBox.information(self, "Success", "Model loaded successfully!")
            except Exception as e:
                error_message = f"Error loading model: {str(e)}"
                QMessageBox.critical(self, "Error", error_message)

    def update_thread_count(self):
        try:
            new_thread_count = int(self.thread_input.text())
            if new_thread_count > 0:
                self.thread_count = new_thread_count
                self.layout().itemAt(2).itemAt(0).widget().setText(f"Thread Count: {self.thread_count}")  # Updated index
                self.thread_input.clear()
                if self.model:
                    self.model.set_thread_count(self.thread_count)
                QMessageBox.information(self, "Success", f"Thread count updated to {self.thread_count}")
            else:
                raise ValueError("Thread count must be a positive integer")
        except ValueError as e:
            QMessageBox.warning(self, "Invalid Input", str(e))

    def upload_pdf(self):
        pdf_path, _ = QFileDialog.getOpenFileName(self, "Upload PDF", "", "PDF Files (*.pdf)")
        if pdf_path:
            try:
                doc = fitz.open(pdf_path)
                self.pdf_content = ""
                for page in doc:
                    self.pdf_content += page.get_text()
                self.layout().itemAt(1).itemAt(0).widget().setText(f"PDF: {os.path.basename(pdf_path)}")  # Updated index
                QMessageBox.information(self, "Success", "PDF loaded successfully!")
            except Exception as e:
                QMessageBox.critical(self, "Error", f"Error loading PDF: {str(e)}")

    def send_message(self):
        user_message = self.user_input.text()
        if user_message and self.model:
            self.messages.append({"role": "user", "content": user_message})
            self.update_chat_display(f"You: {user_message}")
            self.user_input.clear()

            max_tokens = 1000
            self.progress_bar.show()
            self.progress_bar.setRange(0, max_tokens)
            self.progress_bar.setValue(0)

            # Add PDF content if available
            if self.pdf_content:
                self.messages.append({"role": "user", "content": self.pdf_content})

            self.worker = Worker(self.model, self.messages, max_tokens)
            self.worker.finished.connect(self.on_response_finished)
            self.worker.progress.connect(self.on_response_progress)
            self.worker.start()

    def on_response_finished(self, assistant_message):
        self.progress_bar.hide()
        self.messages.append({"role": "assistant", "content": assistant_message})
        self.update_chat_display(f"Assistant: {assistant_message}")

        # Python Code Download
        if assistant_message.startswith("```python") and assistant_message.endswith("```"):
            self.offer_code_download(assistant_message)

    def on_response_progress(self, current_tokens, total_tokens):
        self.progress_bar.setValue(current_tokens)

    def offer_code_download(self, code):
        reply = QMessageBox.question(self, "Download Code", 
                                     "The assistant generated Python code. Do you want to download it?",
                                     QMessageBox.Yes | QMessageBox.No)
        if reply == QMessageBox.Yes:
            file_path, _ = QFileDialog.getSaveFileName(self, "Save Python Code", "code.py", "Python Files (*.py)")
            if file_path:
                try:
                    with open(file_path, "w") as f:
                        f.write(code.strip("```python").strip("```"))
                    QMessageBox.information(self, "Success", "Code saved successfully!")
                except Exception as e:
                    QMessageBox.critical(self, "Error", f"Error saving code: {str(e)}")

    def update_chat_display(self, message):
        self.chat_display.append(message + "\n")
        self.chat_display.verticalScrollBar().setValue(self.chat_display.verticalScrollBar().maximum())

    def clear_conversation(self):
        self.messages = [
            {"role": "system", "content": "You are a helpful AI assistant."}
        ]
        self.chat_display.clear()
        self.pdf_content = ""  # Clear PDF content
        self.layout().itemAt(1).itemAt(0).widget().setText("PDF: No PDF loaded")  # Updated index

    def show_context_menu(self, point):
        menu = QMenu(self)
        copy_action = menu.addAction("Copy")
        copy_action.triggered.connect(self.copy_text)
        menu.exec_(self.chat_display.mapToGlobal(point))

    def copy_text(self):
        cursor = self.chat_display.textCursor()
        if cursor.hasSelection():
            text = cursor.selectedText()
            QApplication.clipboard().setText(text)


if __name__ == "__main__":
    app = QApplication(sys.argv)
    gui = ChatbotGUI()
    gui.show()
    sys.exit(app.exec_())
Downloads last month
15
GGUF
Model size
135M params
Architecture
llama

8-bit

32-bit

Inference API
Unable to determine this model’s pipeline type. Check the docs .

Model tree for LMLK/SmolLM2-135M-Instruct-GGUF

Quantized
(23)
this model