import os
import logging
import json
from datetime import datetime
import gradio as gr
from huggingface_hub import InferenceClient
from logging.handlers import RotatingFileHandler

# Настройка логирования
log_file = 'app_debug.log'
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
logger.debug("Application started")

# Системный промпт по умолчанию
DEFAULT_SYSTEM_PROMPT = "For every question I ask, I want you to think through the problem step by step using Chain Of Thought."

class FrikadelchikBot:
    def __init__(self):
        self.client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
        self.system_prompt = DEFAULT_SYSTEM_PROMPT
        logger.debug("Bot initialized")

    def format_prompt(self, message, history):
        # Максимальное количество токенов контекстного окна модели
        max_context_length = 2048

        # Формируем системный промпт
        prompt = f"<s>[INST] <<SYS>>\n{self.system_prompt.strip()}\n<</SYS>>\n\n"

        # Добавляем только последнее сообщение пользователя и ответ бота для предотвращения переполнения
        if history:
            last_user_msg, last_bot_msg = history[-1]
            prompt += f"[INST] {last_user_msg.strip()} [/INST] {last_bot_msg.strip()} </s><s>"

        # Добавляем текущее сообщение пользователя
        prompt += f"[INST] {message.strip()} [/INST]"
        logger.debug(f"Formatted prompt length (tokens): {len(prompt.split())}")
        return prompt

    def generate(self, message, history, temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.2):
        try:
            formatted_prompt = self.format_prompt(message, history)
            logger.debug(f"Formatted prompt:\n{formatted_prompt}")

            # Настройка параметров генерации
            generation_args = {
                "temperature": max(float(temperature), 1e-2),
                "max_new_tokens": int(max_new_tokens),
                "top_p": float(top_p),
                "repetition_penalty": float(repetition_penalty),
                "do_sample": True,
                "seed": 42,
                "stream": True,
                "details": True,
                "return_full_text": False
            }

            stream = self.client.text_generation(
                formatted_prompt,
                **generation_args
            )

            response = ""
            for token in stream:
                response += token.token.text
                yield history + [[message, response]], ""

            logger.debug(f"Generated response length: {len(response)}")

        except Exception as e:
            error_msg = f"Ошибка при генерации ответа: {str(e)}"
            logger.error(error_msg)
            yield history + [[message, f"Извините, произошла ошибка: {error_msg}"]], ""

    def clear_chat_history(self):
        try:
            logger.info("Chat history cleared")
            return []
        except Exception as e:
            logger.error(f"Error clearing chat history: {str(e)}")
            return None

bot = FrikadelchikBot()

with gr.Blocks() as app:
    gr.Markdown("# Фрикадельчик v0.3")

    with gr.Accordion("Системный промпт", open=False):
        system_prompt = gr.TextArea(
            value=DEFAULT_SYSTEM_PROMPT,
            label="Системный промпт",
            lines=4,
            interactive=True,
            info="Определите поведение и личность бота"
        )

    chatbot = gr.Chatbot()

    msg = gr.Textbox(
        placeholder="Введите сообщение...",
        label="Ввод"
    )

    with gr.Accordion("Параметры генерации", open=False):
        temperature = gr.Slider(
            label="Температура",
            value=0.7,
            minimum=0.0,
            maximum=1.0,
            step=0.05,
            interactive=True,
            info="Меньшие значения делают ответы более сфокусированными"
        )
        max_new_tokens = gr.Slider(
            label="Максимальное количество новых токенов",
            value=256,
            minimum=0,
            maximum=1024,
            step=64,
            interactive=True,
            info="Максимальное количество генерируемых токенов"
        )
        top_p = gr.Slider(
            label="Top-p (ядровая выборка)",
            value=0.95,
            minimum=0.0,
            maximum=1.0,
            step=0.05,
            interactive=True,
            info="Более высокие значения учитывают больше вероятных токенов"
        )
        repetition_penalty = gr.Slider(
            label="Штраф за повторения",
            value=1.2,
            minimum=1.0,
            maximum=2.0,
            step=0.05,
            interactive=True,
            info="Штраф за повторяющиеся токены"
        )

    clear = gr.Button("Очистить чат")

    # Обработчики событий
    msg.submit(
        bot.generate,
        inputs=[msg, chatbot, temperature, max_new_tokens, top_p, repetition_penalty],
        outputs=[chatbot, msg]
    )

    clear.click(
        bot.clear_chat_history,
        outputs=[chatbot]
    )

if __name__ == "__main__":
    app.launch(show_api=False, debug=True)
    logger.debug("Chat interface initialized and launched")