Spaces:
Sleeping
Sleeping
aidoskanapyanov
commited on
Commit
•
60ebe5d
1
Parent(s):
c6f0328
add code
Browse files- app.py +118 -0
- requirements.txt +9 -0
- samples/out_olga2.mp3 +0 -0
app.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# type: ignore
|
2 |
+
from typing import List, Tuple
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
+
import torch
|
7 |
+
from langchain_community.llms import CTransformers
|
8 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
9 |
+
from langchain_core.prompts import PromptTemplate
|
10 |
+
from langchain_core.pydantic_v1 import BaseModel, Field
|
11 |
+
from loguru import logger
|
12 |
+
from transformers import pipeline
|
13 |
+
|
14 |
+
logger.add("logs/file_{time}.log")
|
15 |
+
|
16 |
+
|
17 |
+
# asr model
|
18 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
19 |
+
logger.info(f"Device: {device}")
|
20 |
+
pipe = pipeline(
|
21 |
+
"automatic-speech-recognition",
|
22 |
+
model="openai/whisper-medium",
|
23 |
+
chunk_length_s=30,
|
24 |
+
# device=device,
|
25 |
+
generate_kwargs={"language": "russian"},
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
+
# qa model
|
30 |
+
class Result(BaseModel):
|
31 |
+
"""Извлечь вопрос и ответ из аудио записи колл-центра"""
|
32 |
+
|
33 |
+
question: str = Field(..., description="Вопрос клиента")
|
34 |
+
answer: str = Field(..., description="Ответ оператора")
|
35 |
+
|
36 |
+
|
37 |
+
class Results(BaseModel):
|
38 |
+
results: List[Result] = Field(..., description="Пары вопрос-ответ")
|
39 |
+
|
40 |
+
|
41 |
+
config = {
|
42 |
+
"max_new_tokens": 1000,
|
43 |
+
"context_length": 3000,
|
44 |
+
"temperature": 0,
|
45 |
+
# "gpu_layers": 50,
|
46 |
+
}
|
47 |
+
llm = CTransformers(
|
48 |
+
model="TheBloke/saiga_mistral_7b-GGUF",
|
49 |
+
config=config,
|
50 |
+
)
|
51 |
+
|
52 |
+
# accelerator = Accelerator()
|
53 |
+
# llm, config = accelerator.prepare(llm, config)
|
54 |
+
|
55 |
+
|
56 |
+
def asr(audio_file) -> str:
|
57 |
+
transcribed_text = pipe(audio_file, batch_size=16)
|
58 |
+
logger.info(f"Transcribed text: {transcribed_text}")
|
59 |
+
|
60 |
+
return transcribed_text["text"]
|
61 |
+
# return "Здравствуйте, меня зовут Александр, чем могу помочь? До скольки вы работаете? До 20:00. Спасибо, до свидания!"
|
62 |
+
|
63 |
+
|
64 |
+
def qa(transcribed_text: str) -> Tuple[str, str]:
|
65 |
+
parser = PydanticOutputParser(pydantic_object=Results)
|
66 |
+
prompt = PromptTemplate(
|
67 |
+
template="На основе транскрипции звонка из колл-центра определите пары вопросов и ответов, выделив конкретные вопросы, которые задал клиент, и ответы, которые предоставил оператор.\n{format_instructions}\nТекст аудио записи: {transcribed_text}\n",
|
68 |
+
# template="Какой вопрос задал клиент? Какой ответ дал оператор?\n{format_instructions}\nТекст аудио записи: {transcribed_text}\n",
|
69 |
+
input_variables=["transcribed_text"],
|
70 |
+
partial_variables={"format_instructions": parser.get_format_instructions()},
|
71 |
+
)
|
72 |
+
prompt_and_model = prompt | llm
|
73 |
+
|
74 |
+
output = prompt_and_model.invoke({"transcribed_text": transcribed_text})
|
75 |
+
logger.info(f"Output: {output}")
|
76 |
+
|
77 |
+
results = parser.invoke(output)
|
78 |
+
logger.info(f"Result: {results}")
|
79 |
+
logger.info(f"Dict: {results.dict()}")
|
80 |
+
|
81 |
+
results = (
|
82 |
+
pd.DataFrame(results.dict())
|
83 |
+
.results.apply(pd.Series)
|
84 |
+
.rename({"question": "Вопрос", "answer": "Ответ"}, axis=1)
|
85 |
+
)
|
86 |
+
|
87 |
+
return transcribed_text, results
|
88 |
+
|
89 |
+
|
90 |
+
@logger.catch
|
91 |
+
def inference(audio_file):
|
92 |
+
transcribed_text = asr(audio_file)
|
93 |
+
return qa(transcribed_text)
|
94 |
+
|
95 |
+
|
96 |
+
demo = gr.Interface(
|
97 |
+
fn=inference,
|
98 |
+
inputs=[
|
99 |
+
gr.Audio(
|
100 |
+
label="Аудио запись для обработки",
|
101 |
+
sources="upload",
|
102 |
+
type="filepath",
|
103 |
+
)
|
104 |
+
],
|
105 |
+
outputs=[
|
106 |
+
gr.components.Textbox(label="Транскрибированный текст"),
|
107 |
+
gr.DataFrame(headers=["Вопрос", "Ответ"], label="Вопросы и ответы"),
|
108 |
+
],
|
109 |
+
submit_btn="Обработать",
|
110 |
+
clear_btn="Очистить",
|
111 |
+
allow_flagging="never",
|
112 |
+
title="Обработчик аудиозаписей колл-центра",
|
113 |
+
description="Распознавание речи и определение вопроса клиента и ответа оператора.",
|
114 |
+
css="footer {visibility: hidden}",
|
115 |
+
examples=["samples/out_olga2.mp3"],
|
116 |
+
)
|
117 |
+
|
118 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ctransformers
|
2 |
+
gradio
|
3 |
+
langchain
|
4 |
+
langchain-community
|
5 |
+
langchain-core
|
6 |
+
langchain-huggingface
|
7 |
+
librosa
|
8 |
+
loguru
|
9 |
+
transformers
|
samples/out_olga2.mp3
ADDED
Binary file (178 kB). View file
|
|