y5shen commited on
Commit
81463e4
·
verified ·
1 Parent(s): 7fe52bd

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. README.md +3 -9
  2. cert.pem +35 -0
  3. demo.sh +14 -0
  4. key.pem +52 -0
  5. my_code.py +1 -0
  6. requirements_web_demo.txt +2 -0
  7. test_audio.py +295 -0
  8. text_clasi.py +42 -0
  9. web_demo_audio.py +164 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: RoboAssist Demo
3
- emoji: 🏢
4
- colorFrom: green
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: roboAssist_demo
3
+ app_file: web_demo_audio.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.31.3
 
 
6
  ---
 
 
cert.pem ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIGCTCCA/GgAwIBAgIUJaGhsmKaUMWDxgOv85icnxiTXcwwDQYJKoZIhvcNAQEL
3
+ BQAwgZMxCzAJBgNVBAYTAkNOMREwDwYDVQQIDAhaaGVqaWFuZzERMA8GA1UEBwwI
4
+ SGFuZ3pob3UxFDASBgNVBAoMC1poaWNoZW5nIEFJMRQwEgYDVQQLDAtBSSByb2Jv
5
+ dGljczEPMA0GA1UEAwwGWXVxaW5nMSEwHwYJKoZIhvcNAQkBFhJzeXE0MTBAaG90
6
+ bWFpbC5jb20wHhcNMjQwOTE0MDYwOTEwWhcNMjUwOTE0MDYwOTEwWjCBkzELMAkG
7
+ A1UEBhMCQ04xETAPBgNVBAgMCFpoZWppYW5nMREwDwYDVQQHDAhIYW5nemhvdTEU
8
+ MBIGA1UECgwLWmhpY2hlbmcgQUkxFDASBgNVBAsMC0FJIHJvYm90aWNzMQ8wDQYD
9
+ VQQDDAZZdXFpbmcxITAfBgkqhkiG9w0BCQEWEnN5cTQxMEBob3RtYWlsLmNvbTCC
10
+ AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAIUdNs067FfWUjuwFqk4OOWp
11
+ meR1aMKdkFYrDRAmI7vPqiUJbE1hQWMGs2HQKEzL/PNnTN+TAc34Iuv7ayvwe56l
12
+ p5VeRrZvUOaw0epMi2OLEh0WaiC9jJt+zSaqEYzJGcwork2Gk9eUoQGhe2plqFEl
13
+ 1ZNNjvJnLfSuajSbPW8YrDkkoq0M2LcFBn6A16ekBMKCFf6ykSCCS9+KWCIMev7B
14
+ 9zRcVLDedGBrhStRmZZMQncxa4oY279GRLliwJhNVEQiU6FU0tCZGIqcYao8VyNh
15
+ 0HY1/n3WkIR8vSQTd4oxBt5buoce1bErvGiqmPw+uWRc+ENHizL+Djyo/ZcC5hBB
16
+ 6r7M825LH/HL30VjPsLsVUqay78KdKwgGTdZhqO6WB8Py8WOsbWKltnJXPJZwnc1
17
+ 1fs97ZrD1H/1ku6cz0XalnqX/doO52yAK7l8/V5mYBSAR/8r6WAs4epKgEZGln9S
18
+ wn/l/RcecA+gnF419hxUEhftunrSK+fhDOxFFSFXMBrGX555WPhN86QiJ3UPBfy1
19
+ 9RO5nhuseXtnGBSA6Cx1Ca4fsQ+gc1JVfOtkjt+H7ovBTZjaexNq4/5rVUvG7zTC
20
+ 23LdMh7QXjlNPUYMNAotFMvaLBb4vbj5s2+IUfXVWP98dZ5bWAaSAsLgx90demUw
21
+ rMYp0dATWbIY1C7m344dAgMBAAGjUzBRMB0GA1UdDgQWBBQpWpVDljIwqV5BDH1r
22
+ G19X3Z7/ADAfBgNVHSMEGDAWgBQpWpVDljIwqV5BDH1rG19X3Z7/ADAPBgNVHRMB
23
+ Af8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBgTBYppmEO/geMU6VpjnMLoZbB
24
+ SBvrt15Bz8S0Yd37gu4v4pWOXhSJXvrn90AT4iWWVPSpCE6T9DXLBHFG7euUZ3Fu
25
+ HqlRQXH1MXuRdh3aJ68nm6AWY5Jh9DwqkzNNPStB9t7p/gLvEIltyISTLoDDP3Ds
26
+ bSuBIqIl2C/si6jkWkxSIett7zx9Yjaim9GQ9ZchubUy5MiIr1uT3Veoz00HmWKr
27
+ eT1S86gpPCAXIf2zJvX87i2j1ad61k3cJUf3tZCMm+qpcW+w/sT7M5EmQdOPN6+n
28
+ NMlQRQ0L7WDGCm8Y7IofGSUMzTL5l9GFTcTNPQACox1KcLpZU4/4d1L0n8J2WXrZ
29
+ 6Q2rOOqeL0zr2018JJntrpB+yuhtQZbKJWSFtwZF4jrCJvcsUb3GyAO+imMF3Fj/
30
+ Walvf0c/QPe+uoVBwx9kQ3VjTURgEGnwUQSFmim+rH+yBLrvwg8TeBIJ3z/Pvnbp
31
+ gEpuSc1qArV8jR8u8c07wp41/YTlwbZpe11ghmtsM/aCXUXW/jN2laKmB3esRavK
32
+ q1BDzJ5kMzayazL5vsVOKnaEanXA9Y45GZhmPrOAPUqCnzt+HyfLL+DpFrNdtDPu
33
+ OFuFBtWKLlEU1T72+Bgy3a33LPJN646P69yjgf4jYsQI1tu+IbskP68PQbCvIC6w
34
+ onan9gKqEZrTOYa1YQ==
35
+ -----END CERTIFICATE-----
demo.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ echo $CUDA_VISIBLE_DEVICES
2
+ SERVER_PORT=9001
3
+ MASTER_ADDR=localhost
4
+ MASTER_PORT="3${SERVER_PORT}"
5
+ NNODES=${WORLD_SIZE:-1}
6
+ NODE_RANK=${RANK:-0}
7
+ GPUS_PER_NODE=1
8
+ python -m torch.distributed.launch --use_env \
9
+ --nproc_per_node $GPUS_PER_NODE --nnodes $NNODES \
10
+ --node_rank $NODE_RANK \
11
+ --master_addr=${MASTER_ADDR:-127.0.0.1} \
12
+ --master_port=$MASTER_PORT \
13
+ web_demo_audio.py \
14
+ --server-port ${SERVER_PORT}
key.pem ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN PRIVATE KEY-----
2
+ MIIJQQIBADANBgkqhkiG9w0BAQEFAASCCSswggknAgEAAoICAQCFHTbNOuxX1lI7
3
+ sBapODjlqZnkdWjCnZBWKw0QJiO7z6olCWxNYUFjBrNh0ChMy/zzZ0zfkwHN+CLr
4
+ +2sr8HuepaeVXka2b1DmsNHqTItjixIdFmogvYybfs0mqhGMyRnMKK5NhpPXlKEB
5
+ oXtqZahRJdWTTY7yZy30rmo0mz1vGKw5JKKtDNi3BQZ+gNenpATCghX+spEggkvf
6
+ ilgiDHr+wfc0XFSw3nRga4UrUZmWTEJ3MWuKGNu/RkS5YsCYTVREIlOhVNLQmRiK
7
+ nGGqPFcjYdB2Nf591pCEfL0kE3eKMQbeW7qHHtWxK7xoqpj8PrlkXPhDR4sy/g48
8
+ qP2XAuYQQeq+zPNuSx/xy99FYz7C7FVKmsu/CnSsIBk3WYajulgfD8vFjrG1ipbZ
9
+ yVzyWcJ3NdX7Pe2aw9R/9ZLunM9F2pZ6l/3aDudsgCu5fP1eZmAUgEf/K+lgLOHq
10
+ SoBGRpZ/UsJ/5f0XHnAPoJxeNfYcVBIX7bp60ivn4QzsRRUhVzAaxl+eeVj4TfOk
11
+ Iid1DwX8tfUTuZ4brHl7ZxgUgOgsdQmuH7EPoHNSVXzrZI7fh+6LwU2Y2nsTauP+
12
+ a1VLxu80wtty3TIe0F45TT1GDDQKLRTL2iwW+L24+bNviFH11Vj/fHWeW1gGkgLC
13
+ 4MfdHXplMKzGKdHQE1myGNQu5t+OHQIDAQABAoICAAYdXe+WhdXRHAzu3VOfqAlw
14
+ vYd/lzTdbQxwRNTZk6mkc/HomWeCkLDeVW4oVDmW40NBGwhoJdySyfPiJD2r+dnD
15
+ TUvwvTnTKjm2pKXpJzwG3umkJnLjkipwbDca99xqPnug4MTh6jesdarwAo3UJqO3
16
+ o7R1Cxx6feNwAaPx/6PzU7H6S1i8rtXVbsNv4rgqMifHNwBwrnb6VUbrAXpq7Vl3
17
+ ioYGucM4LMtjFQZ05KPBVApKpGB/HNUQAscrUKKOZtEMbKAMF/UKG1T+swPRasVO
18
+ 0Hclqs+YBCJ54mxeUo7wCps7Rnm/glx1oF2EfKuVrAl+tc7NYgkRH5FMx/1gXjYP
19
+ AsIaEp+mPLYSH5apArbOu+RUjdoxHkYf4syZ4dNpf9EUDp7pkwIjNfz6P035Gp55
20
+ 4RwGrmSiL2VQXdTVqbBB7tuI0nmMmofqX0197YMysi9cstTn5ApfjQV28IjyWPFg
21
+ AzDpPbeT4v2TZfS958LXQx1i4zdlmrpWniXu1jE+M/x3aaQVip645mw86ys3+Dgr
22
+ wfKRwUT9Bj+4XwZvgwA3Yuv5BCzrBIDJ4t5p1d0gcgzNzymde3ws8H16dYvtZuPq
23
+ st3V1Tk1FXdCCUbVb8VRslBPIVN5U3hKMmPNIMfcXsIk1ilZdGbmO2SbX50ow7kK
24
+ w2WrKybXPOdnT4lJaRjhAoIBAQC7zZqypJGDA6HXU1qtOXiwNTntbU5SA9a47/RI
25
+ Ufy3H14lFbr+uPmJ+T5ZnF2hJS0BL4QfxgJZpuLbAhzf19G82VzZR4a0+w0CDMn+
26
+ mLrXlA51guvGO85kq663fA1S3Z2Z3MVe3wi8lCzJ8eFdae6Jl/ZilNiaEjvSO5Py
27
+ 6DNFGq4gw6dH5ZAndWufRR8iwEOh3v+qcNr29b4EznLDsYxODaI3r/GHSXJowdah
28
+ 8Fllb7ZqyHRVMrEHGAn7ZynivXYFk4i7KlmoDz0OpYu6mntJzYZ3RnJqjGIn1RSb
29
+ JSAOjmZQXHxcgm4Ew5hG4KOemv0V+MnoHFTVHW+uiUF3QZ/NAoIBAQC1c6fAnmT6
30
+ a8tmDkChlIxP4QxYUVLq/1Uys1xvWK65ItmsRyhECp3LzDu1dSKyL7bsiF7mgXOL
31
+ jV0tWKLXaScw541BB1VCBX/bYu7g/dzPxjGXGtNNyZvZ3Z/KzMnyqywMGWTx9DEG
32
+ hlfmT1FG4ZOIv4zQ3hgtzOPE9yOZoypTy/tNi6543QqRHZDgeU4LnQAK71EZAxjN
33
+ Y7P6LtyqbQdwD53gFVyVR6dXxwDNT2bF1YGMGG5RvhpUF/D0f64/SHcL9RY/33W1
34
+ PZEDqPVmgn3iKWFpDPaEVVeHRuggcdetUCVNk4agFUzQ6Qn3ouVHLy/E8yFTn8vo
35
+ qy0uOFbB2zeRAoIBAHgQJ/hRcZ2astrgNTZNabTtMk1x9gJE5f9xmHCrya6g5SU8
36
+ UUILq6zjpVLQvqMJC06YvBU8F8CNCrcNELyiUzoylENskNgfL9/BoVmc9sZfpybF
37
+ PkqLbZW6lIZFTpdNizYUiDHw0LsO19t7QaLg2Fkqx5+anSdSxN/QqvM+M5jYR742
38
+ lzlFr/UqAlXecNtco2BXGogygiokTQjxmzk0xwgXez+ywezFrdj/I2KAMAbDdpGp
39
+ +YWGVNq+KZOKDqMgkNVbzqwplQv2QgidGUmA/ZTfSjny1a78dLwWY4gUIfxsWUjC
40
+ 99aEpCVzjmSxtKjvWSG03hAAHNPqb73bjfjaAXkCggEAImxTic5C1+M1TiIsDrtQ
41
+ tyK5pdOfyGcyMZEnWxRW6VZ8Nxu0vtvIvc38Edur/mPg7xSb9Fzgn7vszlp/YiBX
42
+ yAZVnN2YRhMnA2bMbLCcgGpYJBN5vMsfnwBVfTLGqRviGlWC+gfaeSNpb47im3DV
43
+ /V5hh4pqyd2R5F8CSPZUZFtRTI/QRKuigo86WeuJa9iYFnr6RqV4whZFIZRcVpSi
44
+ kZbU25sR2lwimhttwoVquONdVUeZPjazZfq4wpmqY+UKeKFBXFK3UO/iH/CgxZbB
45
+ 7PLuwPAtoENUfJxuEyGlygJnYUU4iAwia4uaUXl8x7li9ZkuXF/3XieDRb54bwR4
46
+ kQKCAQAqnt9BD1AWWBsEA+N8maYbrPT2hfxNPjeKgKmjIxUwd874pz+xwTNhpsBU
47
+ z9/aST0CV3xOUEMV3HrpPMt2n23xuyxeT9vUYTlCjOD8gCXGFCciAGc4kAYXzg/d
48
+ Gy5s8OwqS0YBw/X+kdowqgEabuGojF6o5w+PYBoRwPqUBFRVt3ZYAYQ0icF/fvRs
49
+ xeoayt09ojFRjD2McLtHOR7WKOdbWEirAUWuby3clfsy4eHc8LSXk6HOddu7qsIp
50
+ r2wDxW66buupMW5Lr6lNQ9T54T/AKK1pa2ceAevlP7d4R8/d1VH80zbZSh/FYGqy
51
+ 3wpZb1NHygBaGIGz82CqM7GbMG49
52
+ -----END PRIVATE KEY-----
my_code.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from langchain_community.document_loaders import JSONLoader
requirements_web_demo.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==4.31.3
2
+ modelscope-studio
test_audio.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import modelscope_studio as mgr
3
+ import librosa
4
+ from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
5
+ from argparse import ArgumentParser
6
+ import requests
7
+ import os
8
+ from django.http import HttpResponse
9
+
10
+ # 默认的模型检查点路径
11
+ DEFAULT_CKPT_PATH = 'Qwen/Qwen2-Audio-7B-Instruct'
12
+
13
+ def text_to_speech(text2):
14
+ data = {
15
+ "text": text2,
16
+ "text_language": "zh",
17
+ }
18
+ # 注意 URL 中的单引号应该是 URL 的一部分,需要正确转义
19
+ response = requests.post('http://127.0.0.1:8000', json=data)
20
+
21
+ if response.status_code == 200:
22
+ audio_file_path = "/root/project/Qwen2-Audio/demo/output.mp3"
23
+ with open(audio_file_path, "wb") as f:
24
+ f.write(response.content)
25
+ return audio_file_path
26
+ else:
27
+ print(f"错误:请求失败,状态码为 {response.status_code}")
28
+ return None
29
+
30
+ def _get_args():
31
+ """
32
+ 解析命令行参数,获取运行配置。
33
+
34
+ 返回:
35
+ argparse.Namespace: 包含命令行参数的命名空间对象。
36
+ """
37
+ parser = ArgumentParser()
38
+ parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
39
+ help="Checkpoint name or path, default to %(default)r") # 模型检查点路径
40
+ parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only") # 是否仅使用CPU
41
+ parser.add_argument("--inbrowser", action="store_true", default=False,
42
+ help="Automatically launch the interface in a new tab on the default browser.") # 是否在浏览器中自动打开界面
43
+ parser.add_argument("--server-port", type=int, default=15110,
44
+ help="Demo server port.") # 指定服务器端口
45
+ parser.add_argument("--server-name", type=str, default="0.0.0.0",
46
+ help="Demo server name.") # 指定服务器名称
47
+
48
+ args = parser.parse_args()
49
+ return args
50
+
51
+ def add_text(chatbot, task_history, input):
52
+ """
53
+ 将用户输入的文本内容添加到聊天记录中,并更新聊天机器人界面。
54
+
55
+ 参数:
56
+ chatbot (gr.components.Chatbot): 聊天机器人组件。
57
+ task_history (list): 任务历史记录。
58
+ input (gr.inputs): 用户输入内容。
59
+
60
+ 返回:
61
+ tuple: 更新后的聊天机器人界面和任务历史记录,以及重置后的用户输入框。
62
+ """
63
+ text_content = input.text # 获取文本输入内容
64
+ content = []
65
+ if len(input.files) > 0: # 如果用户上传了音频文件
66
+ for i in input.files:
67
+ content.append({'type': 'audio', 'audio_url': i.path}) # 将音频文件添加到内容列表中
68
+ if text_content: # 如果用户输入了文本
69
+ content.append({'type': 'text', 'text': text_content}) # 将文本内容添加到内容列表中
70
+ task_history.append({"role": "user", "content": content}) # 更新任务历史记录
71
+
72
+ # 更新聊天机器人界面,添加用户输入
73
+ chatbot.append([{
74
+ "text": input.text,
75
+ "files": input.files,
76
+ }, None])
77
+ return chatbot, task_history, None
78
+ '''
79
+ def add_file(chatbot, task_history, audio_file_path):
80
+ """
81
+ 将音频文件添加到聊天记录中。
82
+
83
+ 参数:
84
+ chatbot (gr.components.Chatbot): 聊天机器人组件。
85
+ task_history (list): 任务历史记录。
86
+ audio_file_path (str): 音频文件的路径。
87
+
88
+ 返回:
89
+ tuple: 更新后的聊天机器人界面和任务历史记录。
90
+ """
91
+ # 确保任务历史记录中的音频条目是正确的格式
92
+ task_history.append({"role": "user", "content": [{"type": "audio", "audio_url": audio_file_path}]})
93
+
94
+ # 更新聊天记录,直接使用 audio_file_path 而不是 gr.Audio 组件
95
+ chatbot.append((None, {"type": "audio", "audio_url": audio_file_path}))
96
+
97
+ return chatbot, task_history
98
+ '''
99
+ import os
100
+
101
+ def add_file(chatbot, task_history, audio_path):
102
+ if not os.path.isfile(audio_path):
103
+ print(f"Error: The file {audio_path} does not exist.")
104
+ return chatbot, task_history
105
+
106
+ # 将音频文件信息添加到任务历史
107
+ task_history.append({
108
+ "role": "user",
109
+ "content": [{"type": "audio", "audio_url": audio_path}]
110
+ })
111
+
112
+ # 假设 chatbot 组件可以接受字典格式的输入
113
+ chatbot_state = [{
114
+ "text": f"[Audio file: {os.path.basename(audio_path)}]",
115
+ "files": [audio_path] # 直接使用文件路径而不是 gr.File
116
+ }, None]
117
+ chatbot.append(chatbot_state) # 更新 chatbot 状态
118
+
119
+ return chatbot, task_history
120
+
121
+ def reset_user_input():
122
+ """
123
+ 重置用户输入字段。
124
+
125
+ 返回:
126
+ gr.update: 将文本框的值重置为空。
127
+ """
128
+ return gr.Textbox.update(value='')
129
+
130
+ def reset_state(task_history):
131
+ """
132
+ 重置聊天记录和任务历史。
133
+
134
+ 参数:
135
+ task_history (list): 当前的任务历史记录。
136
+
137
+ 返回:
138
+ tuple: 清空的聊天记录和任务历史。
139
+ """
140
+ return [], []
141
+
142
+ def regenerate(chatbot, task_history):
143
+ """
144
+ 重新生成最后的机器人响应。
145
+
146
+ 参数:
147
+ chatbot (gr.components.Chatbot): 聊天机器人组件。
148
+ task_history (list): 任务历史记录。
149
+
150
+ 返回:
151
+ tuple: 更新后的聊天机器人界面和任务历史记录。
152
+ """
153
+ # 如果最后一条消息是助手生成的,则移除它
154
+ if task_history and task_history[-1]['role'] == 'assistant':
155
+ task_history.pop()
156
+ chatbot.pop()
157
+ # 如果任务历史记录不为空,重新生成响应
158
+ if task_history:
159
+ chatbot, task_history = predict(chatbot, task_history)
160
+ return chatbot, task_history
161
+
162
+ def predict(chatbot, task_history):
163
+ """
164
+ 根据当前任务历史记录生成模型响应,并将响应转换为音频文件添加到聊天记录中。
165
+
166
+ 参数:
167
+ chatbot (gr.components.Chatbot): 聊天机器人组件。
168
+ task_history (list): 任务历史记录。
169
+
170
+ 返回:
171
+ tuple: 更新后的聊天机器人界面和任务历史记录。
172
+ """
173
+ print(f"{task_history=}")
174
+ print(f"{chatbot=}")
175
+
176
+ # 使用处理器将任务历史记录格式化为模型输入
177
+ text = processor.apply_chat_template(task_history, add_generation_prompt=True, tokenize=False)
178
+ audios = []
179
+
180
+ # 遍历任务历史,查找音频内容并加载
181
+ for message in task_history:
182
+ if isinstance(message["content"], list):
183
+ for ele in message["content"]:
184
+ if ele["type"] == "audio":
185
+ audios.append(
186
+ librosa.load(ele['audio_url'], sr=processor.feature_extractor.sampling_rate)[0]
187
+ )
188
+
189
+ if len(audios) == 0: # 如果没有音频,则设置为 None
190
+ audios = None
191
+ print(f"{text=}")
192
+ print(f"{audios=}")
193
+
194
+ # 使用处理器生成模型输入
195
+ inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True)
196
+ if not _get_args().cpu_only: # 如果支持 GPU,则将输入数据移动到 CUDA 设备
197
+ inputs["input_ids"] = inputs.input_ids.to("cuda")
198
+
199
+ # 生成响应
200
+ generate_ids = model.generate(**inputs, max_length=256)
201
+ generate_ids = generate_ids[:, inputs.input_ids.size(1):]
202
+
203
+ # 解码生成的文本响应
204
+ # 假设其他参数已经正确设置
205
+ response = processor.batch_decode(generate_ids, skip_special_tokens=True)[0]
206
+ task_history.append({'role': 'assistant', 'content': response})
207
+ chatbot.append((None, response)) # 添加文本响应
208
+
209
+ # 将文本响应转换为语音
210
+ audio_file_path = text_to_speech(response)
211
+ if audio_file_path:
212
+ chatbot, task_history = add_file(chatbot, task_history, audio_file_path)
213
+
214
+ return chatbot, task_history
215
+
216
+ def _launch_demo(args):
217
+ """
218
+ 启动Gradio的Web用户界面,展示Qwen2-Audio-Instruct模型的聊天功能。
219
+
220
+ 参数:
221
+ args (argparse.Namespace): 从命令行解析的参数。
222
+ """
223
+ with gr.Blocks() as demo:
224
+ # 添加页面标题和描述
225
+ gr.Markdown(
226
+ """<p align="center"><img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/assets/blog/qwenaudio/qwen2audio_logo.png" style="height: 80px"/><p>""")
227
+ gr.Markdown("""<center><font size=8>Qwen2-Audio-Instruct Bot</center>""")
228
+ gr.Markdown(
229
+ """\
230
+ <center><font size=3>This WebUI is based on Qwen2-Audio-Instruct, developed by Alibaba Cloud. \
231
+ (本WebUI基于Qwen2-Audio-Instruct打造,实现聊天机器人功能。)</center>""")
232
+ gr.Markdown("""\
233
+ <center><font size=4>Qwen2-Audio <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B">🤖 </a>
234
+ | <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B">🤗</a>&nbsp |
235
+ Qwen2-Audio-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct">🤖 </a> |
236
+ <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct">🤗</a>&nbsp |
237
+ &nbsp<a href="https://github.com/QwenLM/Qwen2-Audio">Github</a></center>""")
238
+
239
+ # 创建聊天机器人组件
240
+ chatbot = mgr.Chatbot(label='Qwen2-Audio-7B-Instruct', elem_classes="control-height", height=750)
241
+
242
+ # 创建用户输入组件,支持文本、麦克风和文件上传
243
+ user_input = mgr.MultimodalInput(
244
+ interactive=True,
245
+ sources=['microphone', 'upload'],
246
+ submit_button_props=dict(value="🚀 Submit (发送)"),
247
+ upload_button_props=dict(value="📁 Upload (上传文件)", show_progress=True),
248
+ )
249
+ task_history = gr.State([]) # 初始化任务历史状态
250
+
251
+ with gr.Row(): # 创建清除历史和重试按钮
252
+ empty_bin = gr.Button("🧹 Clear History (清除历史)")
253
+ regen_btn = gr.Button("🤔️ Regenerate (重试)")
254
+
255
+ # 当用户提交输入时,调用add_text函数,然后调用predict函数生成响应
256
+ user_input.submit(fn=add_text,
257
+ inputs=[chatbot, task_history, user_input],
258
+ outputs=[chatbot, task_history, user_input]).then(
259
+ predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
260
+ )
261
+ # 清除历史按钮的点击事件处理,重置聊天记录和任务历史
262
+ empty_bin.click(reset_state, outputs=[chatbot, task_history], show_progress=True)
263
+ # 重试按钮的点击事件处理,重新生成最后的响应
264
+ regen_btn.click(regenerate, [chatbot, task_history], [chatbot, task_history], show_progress=True)
265
+
266
+ # 启动Gradio界面
267
+ demo.queue().launch(
268
+ share=False, # 不共享URL
269
+ inbrowser=args.inbrowser, # 是否自动在浏览器中打开
270
+ server_port=args.server_port, # 指定服务器端口
271
+ server_name=args.server_name, # 指定服务器名称
272
+ ssl_certfile="/root/project/cert.pem",
273
+ ssl_keyfile="/root/project/key.pem",
274
+ ssl_verify=False
275
+ )
276
+
277
+ if __name__ == "__main__":
278
+ args = _get_args() # 获取命令行参数
279
+ if args.cpu_only:
280
+ device_map = "cpu" # 如果指定了仅使用CPU,设置设备映射为CPU
281
+ else:
282
+ device_map = "auto" # 否则自动选择设备
283
+
284
+ # 加载模型
285
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
286
+ args.checkpoint_path,
287
+ torch_dtype="auto", # 自动选择数据类型
288
+ device_map=device_map, # 设置设备映射
289
+ resume_download=True, # 断点续传
290
+ ).eval()
291
+ model.generation_config.max_new_tokens = 2048 # 设置最大生成token数,用于长对话
292
+ print("generation_config", model.generation_config)
293
+ processor = AutoProcessor.from_pretrained(args.checkpoint_path, resume_download=True) # 加载处理器
294
+ _launch_demo(args) # 启动演示界面
295
+
text_clasi.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import json
3
+ import sys
4
+ import os
5
+ path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
6
+ sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
7
+ print(path_root)
8
+ # 分类下的引入, pytorch_textclassification
9
+ from tcTools import get_current_time
10
+ from tcRun import TextClassification
11
+ from tcConfig import model_config
12
+
13
+ evaluate_steps = 320 # 评估步数
14
+ save_steps = 320 # 存储步数
15
+ # pytorch预训练模型目录, 必填
16
+ pretrained_model_name_or_path = "bert-base-chinese"
17
+ # 训练-验证语料地址, 可以只输入训练地址
18
+ path_corpus = os.path.join(path_root, "corpus", "text_classification", "school")
19
+ path_train = os.path.join(path_corpus, "train.json")
20
+ path_dev = os.path.join(path_corpus, "dev.json")
21
+
22
+
23
+ if __name__ == "__main__":
24
+
25
+ model_config["evaluate_steps"] = evaluate_steps # 评估步数
26
+ model_config["save_steps"] = save_steps # 存储步数
27
+ model_config["path_train"] = path_train # 训练模语料, 必须
28
+ model_config["path_dev"] = path_dev # 验证语料, 可为None
29
+ model_config["path_tet"] = None # 测试语料, 可为None
30
+ # 损失函数类型,
31
+ # multi-class: 可选 None(BCE), BCE, BCE_LOGITS, MSE, FOCAL_LOSS, DICE_LOSS, LABEL_SMOOTH
32
+ # multi-label: SOFT_MARGIN_LOSS, PRIOR_MARGIN_LOSS, FOCAL_LOSS, CIRCLE_LOSS, DICE_LOSS等
33
+ model_config["path_tet"] = "FOCAL_LOSS"
34
+ os.environ["CUDA_VISIBLE_DEVICES"] = str(model_config["CUDA_VISIBLE_DEVICES"])
35
+
36
+ model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path
37
+ model_config["model_save_path"] = "../output/text_classification/model_{}".format(model_type[idx])
38
+ model_config["model_type"] = "BERT"
39
+ # main
40
+ lc = TextClassification(model_config)
41
+ lc.process()
42
+ lc.train()
web_demo_audio.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import modelscope_studio as mgr
3
+ import librosa
4
+ from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
5
+ from argparse import ArgumentParser
6
+
7
+ DEFAULT_CKPT_PATH = 'Qwen/Qwen2-Audio-7B-Instruct'
8
+
9
+
10
+ def _get_args():
11
+ parser = ArgumentParser()
12
+ parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
13
+ help="Checkpoint name or path, default to %(default)r")
14
+ parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
15
+ parser.add_argument("--inbrowser", action="store_true", default=False,
16
+ help="Automatically launch the interface in a new tab on the default browser.")
17
+ parser.add_argument("--server-port", type=int, default=8000,
18
+ help="Demo server port.")
19
+ parser.add_argument("--server-name", type=str, default="127.0.0.1",
20
+ help="Demo server name.")
21
+
22
+ args = parser.parse_args()
23
+ return args
24
+
25
+
26
+ def add_text(chatbot, task_history, input):
27
+ text_content = input.text
28
+ content = []
29
+ if len(input.files) > 0:
30
+ for i in input.files:
31
+ content.append({'type': 'audio', 'audio_url': i.path})
32
+ if text_content:
33
+ content.append({'type': 'text', 'text': text_content})
34
+ task_history.append({"role": "user", "content": content})
35
+
36
+ chatbot.append([{
37
+ "text": input.text,
38
+ "files": input.files,
39
+ }, None])
40
+ return chatbot, task_history, None
41
+
42
+
43
+ def add_file(chatbot, task_history, audio_file):
44
+ """Add audio file to the chat history."""
45
+ task_history.append({"role": "user", "content": [{"audio": audio_file.name}]})
46
+ chatbot.append((f"[Audio file: {audio_file.name}]", None))
47
+ return chatbot, task_history
48
+
49
+
50
+ def reset_user_input():
51
+ """Reset the user input field."""
52
+ return gr.Textbox.update(value='')
53
+
54
+
55
+ def reset_state(task_history):
56
+ """Reset the chat history."""
57
+ return [], []
58
+
59
+
60
+ def regenerate(chatbot, task_history):
61
+ """Regenerate the last bot response."""
62
+ if task_history and task_history[-1]['role'] == 'assistant':
63
+ task_history.pop()
64
+ chatbot.pop()
65
+ if task_history:
66
+ chatbot, task_history = predict(chatbot, task_history)
67
+ return chatbot, task_history
68
+
69
+
70
+ def predict(chatbot, task_history):
71
+ """Generate a response from the model."""
72
+ print(f"{task_history=}")
73
+ print(f"{chatbot=}")
74
+ text = processor.apply_chat_template(task_history, add_generation_prompt=True, tokenize=False)
75
+ audios = []
76
+ for message in task_history:
77
+ if isinstance(message["content"], list):
78
+ for ele in message["content"]:
79
+ if ele["type"] == "audio":
80
+ audios.append(
81
+ librosa.load(ele['audio_url'], sr=processor.feature_extractor.sampling_rate)[0]
82
+ )
83
+
84
+ if len(audios)==0:
85
+ audios=None
86
+ print(f"{text=}")
87
+ print(f"{audios=}")
88
+ inputs = processor(text=text, audios=audios, return_tensors="pt", padding=True)
89
+ if not _get_args().cpu_only:
90
+ inputs["input_ids"] = inputs.input_ids.to("cuda")
91
+
92
+ generate_ids = model.generate(**inputs, max_length=256)
93
+ generate_ids = generate_ids[:, inputs.input_ids.size(1):]
94
+
95
+ response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
96
+ print(f"{response=}")
97
+ task_history.append({'role': 'assistant',
98
+ 'content': response})
99
+ chatbot.append((None, response)) # Add the response to chatbot
100
+ return chatbot, task_history
101
+
102
+
103
+ def _launch_demo(args):
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown(
106
+ """<p align="center"><img src="https://qianwen-res.oss-cn-beijing.aliyuncs.com/assets/blog/qwenaudio/qwen2audio_logo.png" style="height: 80px"/><p>""")
107
+ gr.Markdown("""<center><font size=8>Qwen2-Audio-Instruct Bot</center>""")
108
+ gr.Markdown(
109
+ """\
110
+ <center><font size=3>This WebUI is based on Qwen2-Audio-Instruct, developed by Alibaba Cloud. \
111
+ (本WebUI基于Qwen2-Audio-Instruct打造,实现聊天机器人功能。)</center>""")
112
+ gr.Markdown("""\
113
+ <center><font size=4>Qwen2-Audio <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B">🤖 </a>
114
+ | <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B">🤗</a>&nbsp |
115
+ Qwen2-Audio-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct">🤖 </a> |
116
+ <a href="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct">🤗</a>&nbsp |
117
+ &nbsp<a href="https://github.com/QwenLM/Qwen2-Audio">Github</a></center>""")
118
+ chatbot = mgr.Chatbot(label='Qwen2-Audio-7B-Instruct', elem_classes="control-height", height=750)
119
+
120
+ user_input = mgr.MultimodalInput(
121
+ interactive=True,
122
+ sources=['microphone', 'upload'],
123
+ submit_button_props=dict(value="🚀 Submit (发送)"),
124
+ upload_button_props=dict(value="📁 Upload (上传文件)", show_progress=True),
125
+ )
126
+ task_history = gr.State([])
127
+
128
+ with gr.Row():
129
+ empty_bin = gr.Button("🧹 Clear History (清除历史)")
130
+ regen_btn = gr.Button("🤔️ Regenerate (重试)")
131
+
132
+ user_input.submit(fn=add_text,
133
+ inputs=[chatbot, task_history, user_input],
134
+ outputs=[chatbot, task_history, user_input]).then(
135
+ predict, [chatbot, task_history], [chatbot, task_history], show_progress=True
136
+ )
137
+ empty_bin.click(reset_state, outputs=[chatbot, task_history], show_progress=True)
138
+ regen_btn.click(regenerate, [chatbot, task_history], [chatbot, task_history], show_progress=True)
139
+
140
+ demo.queue().launch(
141
+ share=True,
142
+ inbrowser=args.inbrowser,
143
+ server_port=args.server_port,
144
+ server_name=args.server_name,
145
+ )
146
+
147
+
148
+ if __name__ == "__main__":
149
+ args = _get_args()
150
+ if args.cpu_only:
151
+ device_map = "cpu"
152
+ else:
153
+ device_map = "auto"
154
+
155
+ model = Qwen2AudioForConditionalGeneration.from_pretrained(
156
+ args.checkpoint_path,
157
+ torch_dtype="auto",
158
+ device_map=device_map,
159
+ resume_download=True,
160
+ ).eval()
161
+ model.generation_config.max_new_tokens = 2048 # For chat.
162
+ print("generation_config", model.generation_config)
163
+ processor = AutoProcessor.from_pretrained(args.checkpoint_path, resume_download=True)
164
+ _launch_demo(args)