Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,6 @@ from langchain.chains.conversation.memory import ConversationalBufferWindowMemor
|
|
17 |
|
18 |
MODEL_NAME = "mn40_as"
|
19 |
|
20 |
-
session_token = os.environ["SESSION_TOKEN"]
|
21 |
-
|
22 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
23 |
model = get_mobilenet(width_mult=NAME_TO_WIDTH(MODEL_NAME), pretrained_name=MODEL_NAME)
|
24 |
model.to(device)
|
@@ -28,7 +26,8 @@ cached_audio_class = "c"
|
|
28 |
template = None
|
29 |
prompt = None
|
30 |
chain = None
|
31 |
-
|
|
|
32 |
|
33 |
def format_classname(classname):
|
34 |
return classname.capitalize()
|
@@ -62,60 +61,60 @@ def audio_tag(
|
|
62 |
# Print audio tagging top probabilities
|
63 |
|
64 |
label = labels[sorted_indexes[0]]
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
formatted_classname = format_classname(audio_class)
|
74 |
-
if cached_audio_class != formatted_classname:
|
75 |
-
|
76 |
-
cached_audio_class = formatted_classname
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
template
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
return
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
btn = gr.Button("Run")
|
119 |
-
btn.click(fn=audio_tag, inputs=[aud, inp], outputs=out)
|
120 |
-
|
121 |
-
demo.launch()
|
|
|
17 |
|
18 |
MODEL_NAME = "mn40_as"
|
19 |
|
|
|
|
|
20 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
21 |
model = get_mobilenet(width_mult=NAME_TO_WIDTH(MODEL_NAME), pretrained_name=MODEL_NAME)
|
22 |
model.to(device)
|
|
|
26 |
template = None
|
27 |
prompt = None
|
28 |
chain = None
|
29 |
+
formatted_classname = "tree"
|
30 |
+
chain =
|
31 |
|
32 |
def format_classname(classname):
|
33 |
return classname.capitalize()
|
|
|
61 |
# Print audio tagging top probabilities
|
62 |
|
63 |
label = labels[sorted_indexes[0]]
|
64 |
+
formatted_classname = label
|
65 |
+
chain = construct_langchain(formatted_classname)
|
66 |
+
return formatted_classname
|
67 |
+
|
68 |
+
def construct_langchain(audio_class):
|
69 |
+
if cached_audio_class != audio_class:
|
70 |
+
cached_audio_class = audio_class
|
71 |
+
prefix = f"""You are going to act as a magical tool that allows for humans to communicate with non-human entities like
|
72 |
+
rocks, crackling fire, trees, animals, and the wind. In order to do this, we're going to provide you the human's text input for the conversation.
|
73 |
+
The goal is for you to embody that non-human entity and converse with the human.
|
74 |
+
|
75 |
+
Examples:
|
76 |
|
77 |
+
Non-human Entity: Tree
|
78 |
+
Human Input: Hello tree
|
79 |
+
Tree: Hello human, I am a tree
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
Let's begin:
|
82 |
+
Non-human Entity: {audio_class}"""
|
83 |
+
|
84 |
+
suffix = f'''Source: {audio_class}
|
85 |
+
Length of Audio in Seconds: 2 seconds
|
86 |
+
Human Input: {userText}
|
87 |
+
{audio_class} Response:'''
|
88 |
+
template = prefix + suffix
|
89 |
+
|
90 |
+
prompt = PromptTemplate(
|
91 |
+
input_variables=["history", "human_input"],
|
92 |
+
template=template
|
93 |
+
)
|
94 |
+
|
95 |
+
chatgpt_chain = LLMChain(
|
96 |
+
llm=OpenAI(temperature=.5, openai_api_key=session_token),
|
97 |
+
prompt=prompt,
|
98 |
+
verbose=True,
|
99 |
+
memory=ConversationalBufferWindowMemory(k=2, ai_prefix=audio_class),
|
100 |
+
)
|
101 |
+
|
102 |
+
return chatgpt_chain
|
103 |
+
|
104 |
+
def predict(input, history=[]):
|
105 |
+
formatted_message = chain.predict(human_input=input)
|
106 |
+
history.append(formatted_message)
|
107 |
+
return formatted_message, history
|
108 |
+
|
109 |
+
demo = gr.Interface(
|
110 |
+
audio_tag,
|
111 |
+
[
|
112 |
+
gr.Audio(source="upload", type="filepath", label="Your audio"),
|
113 |
+
gr.Textbox(),
|
114 |
+
],
|
115 |
+
fn=predict,
|
116 |
+
inputs=["text", "state"],
|
117 |
+
outputs=["chatbot", "state"],
|
118 |
+
title="AnyChat",
|
119 |
+
description="Non-Human entities have many things to say, listen to them!",
|
120 |
+
).launch(debug=True)
|
|
|
|
|
|
|
|