README.md CHANGED
@@ -1,299 +1,6 @@
1
  ---
2
- license_link: https://freedevproject.org/faipl-1.0-sd/
3
- tags:
4
- - tag
5
- - '1234'
6
- dd: 1
7
- language:
8
- - en
9
- - cn
10
- - zh
11
- base_model: stepfun-ai/GOT-OCR2_0
12
- pipeline_tag: image-to-text
13
- library_name: diffusers
14
- metrics:
15
- - accuracy
16
  ---
17
- <style>
18
- @import url('https://fonts.googleapis.com/css2?family=Montserrat&family=Playwrite+DE+Grund:[email protected]&display=swap');
19
- .title-container {
20
- display: flex;
21
- justify-content: center;
22
- align-items: center;
23
- height: 20vh;
24
- }
25
- /* Title Base Styling */
26
- .title {
27
- text-align: center;
28
- letter-spacing: -0.02em;
29
- line-height: 1.2;
30
- padding: 0.5em 0;
31
- }
32
- .playwrite-de-grund-title {
33
- font-size: 40px;
34
- font-style: normal; /* You can change to italic if needed */
35
- color: black;
36
- }
37
- @keyframes titlePulse {
38
- 0% { transform: scale(1); }
39
- 100% { transform: scale(1.05); }
40
- }
41
- .custom-table {
42
- table-layout: fixed;
43
- width: 100%;
44
- border-collapse: separate;
45
- border-spacing: 1em;
46
- margin-top: 2em;
47
- }
48
- .custom-table td {
49
- width: 33.333%;
50
- vertical-align: top;
51
- padding: 0;
52
- }
53
- .custom-image-container {
54
- position: relative;
55
- width: 100%;
56
- height: 100%
57
- margin-bottom: 1em;
58
- overflow: hidden;
59
- align-items: center;
60
- border-radius: 15px;
61
- box-shadow: 0 10px 20px rgba(0, 0, 0, 0.3);
62
- transition: all 0.3s ease;
63
- }
64
- .custom-image-container:hover {
65
- transform: translateY(-10px);
66
- box-shadow: 0 15px 30px rgba(0, 0, 0, 0.4);
67
- }
68
- .custom-image {
69
- width: 100%;
70
- height: auto;
71
- object-fit: cover;
72
- transition: transform 0.5s;
73
- }
74
- .last-image-container {
75
- display: grid;
76
- grid-template-columns: 1fr; /* One column for vertical layout */
77
- gap: 0px; /* Remove space between images */
78
- width: 80%; /* Adjust as needed */
79
- height: 100%; /* Set full height */
80
- }
81
- .last-image-container img {
82
- width: 100%; /* Full width for each image */
83
- height: auto; /* Maintain aspect ratio */
84
- }
85
- .custom-image-container:hover .custom-image {
86
- transform: scale(1.1);
87
- }
88
- .playwrite-de-grund-title .company-name {
89
- font-size: 40px;
90
- }
91
- .nsfw-filter {
92
- filter: blur(10px);
93
- transition: filter 0.3s ease;
94
- }
95
- .custom-image-container:hover .nsfw-filter {
96
- filter: blur(5px);
97
- }
98
- .overlay {
99
- position: absolute;
100
- top: 0;
101
- left: 0;
102
- right: 0;
103
- bottom: 0;
104
- background: rgba(0, 0, 0, 0.7);
105
- display: flex;
106
- flex-direction: column;
107
- justify-content: center;
108
- align-items: center;
109
- opacity: 0;
110
- transition: opacity 0.3s;
111
- }
112
- .custom-image-container:hover .overlay {
113
- opacity: 1;
114
- }
115
- .overlay-text {
116
- font-size: 1.5em;
117
- font-weight: bold;
118
- color: #FFFFFF;
119
- text-align: center;
120
- padding: 0.5em;
121
- background: linear-gradient(45deg, #E74C3C, #C0392B);
122
- -webkit-background-clip: text;
123
- -webkit-text-fill-color: transparent;
124
- text-shadow: 3px 3px 6px rgba(0, 0, 0, 0.7);
125
- }
126
- .overlay-subtext {
127
- font-size: 0.85em;
128
- color: #F0F0F0;
129
- margin-top: 0.5em;
130
- font-style: italic;
131
- text-shadow: 3px 3px 6px rgba(0, 0, 0, 0.5);
132
- }
133
- .model-info {
134
- font-style: bold;
135
- }
136
- @media (max-width: 768px) {
137
- .title {
138
- font-size: 3rem;
139
- }
140
- .custom-table td {
141
- display: block;
142
- width: 70%;
143
- }
144
- }
145
- .playwrite-de-grund-title .trained-by {
146
- font-size: 32px; /* Smaller font size for "trained by" part */
147
- }
148
- </style>
149
- <head>
150
- <link
151
- rel="stylesheet"
152
- href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.8.2/css/all.min.css"
153
- />
154
- </head>
155
- <body>
156
- <div class="title-container">
157
- <h1 class="title">
158
- <i class="fa-thin fa-palette"></i>
159
- <span class="playwrite-de-grund-title"><b>Illustrious XL v0.1</b><br> <span class="trained-by">trained by</span> <a rel="nofollow" href="https://onomaai.com/"><b><span class="company-name">Onoma AI</span></b></a></span>
160
- </h1>
161
- </div>
162
- <table class="custom-table">
163
- <tr>
164
- <td>
165
- <div class="custom-image-container">
166
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/dXvGxUKjcsqzt_gDWc9FU.png" alt="s00">
167
- </div>
168
- <div class="custom-image-container">
169
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/TjfHgNIgpfhX1Josy-a1h.png" alt="s01">
170
- </div>
171
- <div class="custom-image-container">
172
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/YMxjs05WcbuS5sIjeqOJr.png" alt="s02">
173
- </div>
174
- </td>
175
- <td>
176
- <div class="custom-image-container">
177
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/ChTQ2UKphqbFsyKF9ddNY.png" alt="s10">
178
- </div>
179
- <div class="custom-image-container">
180
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/PO3_B7AeUVq59OWHidEas.png" alt="s11">
181
- </div>
182
- <div class="custom-image-container">
183
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/hLR6af7AluIYQPB6GXQYh.png" alt="s12">
184
- </div>
185
- </td>
186
- <td>
187
- <div class="custom-image-container">
188
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/4kdzhZAGp_VLEqat6T5Yv.png" alt="s20">
189
- </div>
190
- <div class="custom-image-container">
191
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/05bgqY-9S2dNxtpa6WmNV.png" alt="s21">
192
- </div>
193
- <div class="custom-image-container">
194
- <img class="custom-image" src="https://cdn-uploads.huggingface.co/production/uploads/65eea2d62cc24ebc6dbe16c0/yAYxcQ1IK_dytlPGObMe4.png" alt="s22">
195
- </div>
196
- </td>
197
- </tr>
198
- </table>
199
-
200
- <div>
201
- <p>
202
- Illustrious XL is the Illustration focused Stable Diffusion XL model which is continued from Kohaku XL Beta 5, trained by OnomaAI Research Team.
203
- The model focuses on utilizing large-scale annotated dataset, <a href="https://huggingface.co/datasets/nyanko7/danbooru2023">Danbooru2023.</a>
204
- We release the v0.1 and v0.1-GUIDED model here, under fair public ai license, however discourages the usage of model over monetization purpose / any closed source purposes.
205
- For full technical details, please refer to our technical report.
206
- </p>
207
- <p>
208
- <strong>Model Information:</strong>
209
- </p>
210
- <ul style="margin-left: 20px;">
211
- <li><strong>Name:</strong> Illustrious-XL-v0.1</li>
212
- <li><strong>Model Type:</strong> Stable Diffusion XL Model</li>
213
- <li><strong>Dataset:</strong> Fine-tuned on Danbooru2023 Dataset</li>
214
- </ul>
215
- <p>
216
- <strong>Description</strong>:
217
- </p>
218
- <ul style="margin-left: 20px;">
219
- <li><strong>Illustrious-XL</strong> is a powerful generative model series, fine-tuned on the comprehensive Danbooru2023 dataset and its variants. It includes a wide variety of character designs, styles, and artistic knowledge derived from the dataset, making it suitable for creative and artistic AI generation tasks.</li>
220
- <li><strong>Illustrious-XL-v0.1</strong> is untuned BASE model, which works as possible base for all future model variants. LoRAs / Adapters can be trained on this model, ensuring future usecases. The model is research-only purpose, as not tuned for aesthetics / preferences.</li>
221
- <li><strong>Illustrious-XL-v0.1-GUIDED</strong> is minimally safety controlled model, which works as better option for usual usecases.</li>
222
- </ul>
223
- We plan to release several aesthetic-finetuned model variants in near future.
224
- <p>
225
- <strong>Technical Details:</strong>
226
- </p>
227
- <ul style="margin-left: 20px;">
228
- <li> <a href="https://arxiv.org/abs/2409.19946" target="_blank">https://arxiv.org/abs/2409.19946</a> </li>
229
- </ul>
230
- <p>
231
- <strong>Terms and Conditions:</strong>
232
- </p>
233
- <ul style="margin-left: 20px;">
234
- <li>We recommend to use official repositories, to prevent malicious attacks.</li>
235
- <li>Users must agree with LICENSE to use the model. As mentioned in LICENSE, we do NOT take any actions about generated results or possible variants.</li>
236
- <li> <strong>As mentioned in LICENSE, users must NOT use the generated result for any prohibited purposes, including but not limited to:</strong></li>
237
- <ul style="margin-left: 20px;">
238
- <li><strong>Harmful or malicious activities</strong>: This includes harassment, threats, spreading misinformation, or any use intended to harm individuals or groups.</li>
239
- <li><strong>Illegal activities</strong>: Using generated content to violate any applicable laws or regulations.</li>
240
- <li><strong>Unethical, offensive content generation</strong>: Generating offensive, defamatory, or controversial content that violates ethical guidelines.</li>
241
- </ul>
242
- </ul>
243
- By using this model, users agree to comply with the conditions outlined in the LICENSE and acknowledge responsibility for how they utilize the generated content.
244
- <p>
245
- <strong>Safety Control Recommendation:</strong>
246
- </p>
247
- <ul style="margin-left: 20px;">
248
- <li>Generative models can occasionally produce unintended or harmful outputs.</li>
249
- <li>To minimize this risk, it is strongly recommended to use the GUIDED model variant, which incorporates additional safety mechanisms for responsible content generation.</li>
250
- <li>By choosing this variant, users can significantly reduce the likelihood of generating harmful or unintended content.</li>
251
- <li>We plan to update GUIDED model variants and its methodologies, with extensive research.</li>
252
- </ul>
253
- <p>
254
- <strong>Training/Merging Policy:</strong><br>
255
- You may fine-tune, merge, or train LoRA based on this model. However, to foster an open-source community, you are required to:
256
- </p>
257
- <ul style="margin-left: 20px;">
258
- <li>Openly share details of any derived models, including references to the original model licensed under the fair-ai-public-license.</li>
259
- <li>Provide information on datasets and "merge recipes" used for fine-tuning or training.</li>
260
- <li>Adhere to the <strong>fair-ai-public-license</strong>, ensuring that any derivative works are also open source.</li>
261
- </ul>
262
- <p>
263
- <strong>Uploading / Generation Policy:</strong><br>
264
- We do not restrict any upload or spread of the generation results, as we do not own any rights regard to generated materials. This includes 'personally trained models / finetuned models / trained lora-related results'. However, we kindly ask you to open the generation details, to foster the open source communities and researches.
265
- </p>
266
- <p>
267
- <strong>Monetization Prohibition:</strong>
268
- <ul style="margin-left: 20px;">
269
- <li>You are prohibited from monetizing any <strong>close-sourced fine-tuned / merged model, which disallows the public from accessing the model's source code / weights and its usages.</strong></li>
270
- <li>As per the license, you must openly publish any derivative models and variants. This model is intended for open-source use, and all derivatives must follow the same principles.</li>
271
- </ul>
272
- </p>
273
- <p>
274
- <strong>Usage:</strong><br>
275
- We do not recommend overusing critical composition tags such as 'close-up', 'upside-down', or 'cowboy shot', as they can be conflicting and lead to confusion, affecting model results.<br>
276
- Recommended sampling method: Euler a, Sampling Steps: 20–28, CFG: 5–7.5 (may vary based on use case).<br>
277
- We suggest using suitable composition tags like "upper body," "cowboy shot," "portrait," or "full body" depending on your use case.<br>
278
- The model supports quality tags such as: "worst quality," "bad quality," "average quality," "good quality," "best quality," and "masterpiece (quality)."<br>
279
- Note: The model does not have any default style. This is intended behavior for the base model.
280
- </p>
281
- <div class="last-image-container">
282
- <img src="https://cdn-uploads.huggingface.co/production/uploads/651d27e3a00c49c5e50c0653/RiStls1S26meeu8UV8wKj.png" alt="s23">
283
- <p><strong>Prompt:</strong><br>
284
- 1boy, holding knife, blue eyes, jewelry, jacket, shirt, open mouth, hand up, simple background, hair between eyes, vest, knife, tongue, holding weapon, grey vest, upper body, necktie, solo, looking at viewer, smile, pink blood, weapon, dagger, open clothes, collared shirt, blood on face, tongue out, blonde hair, holding dagger, red necktie, white shirt, blood, short hair, holding, earrings, long sleeves, black jacket, dark theme
285
- </p>
286
- <p><strong>Negative Prompt:</strong><br>
287
- worst quality, comic, multiple views, bad quality, low quality, lowres, displeasing, very displeasing, bad anatomy, bad hands, scan artifacts, monochrome, greyscale, signature, twitter username, jpeg artifacts, 2koma, 4koma, guro, extra digits, fewer digits
288
- </p>
289
- <img src="https://cdn-uploads.huggingface.co/production/uploads/63398de08f27255b6b50081a/2QgPFOXbu0W6XjAMvLryY.png" alt="s24">
290
- <p><strong>Prompt:</strong><br>
291
- 1girl, extremely dark, black theme, silhouette, rim lighting, black, looking at viewer, low contrast, masterpiece
292
- </p>
293
- <p><strong>Negative Prompt:</strong><br>
294
- worst quality, comic, multiple views, bad quality, low quality, lowres, displeasing, very displeasing, bad anatomy, bad hands, scan artifacts, monochrome, greyscale, twitter username, jpeg artifacts, 2koma, 4koma, guro, extra digits, fewer digits, jaggy lines, unclear
295
- </p>
296
- </div>
297
-
298
- </div>
299
- </body>
 
1
  ---
2
+ license: apache-2.0
3
+ tag: 123
4
+ ts: 123
5
+ 123: 456
 
 
 
 
 
 
 
 
 
 
6
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
testMd DELETED
@@ -1,153 +0,0 @@
1
- # 目录
2
- - [📖 模型介绍](#模型介绍)
3
- - [⚙️ 快速开始](#快速开始)
4
- - [📊 Benchmark评估](#评估)
5
- - [📜 声明与协议](#声明与协议)
6
-
7
- # 模型介绍
8
-
9
- - Baichuan 2 是[百川智能]推出的**新一代开源大语言模型**,采用 **2.6 万亿** Tokens 的高质量语料训练。
10
- - Baichuan 2 在多个权威的中文、英文和多语言的通用、领域 benchmark 上取得同尺寸**最佳**的效果。
11
- - 本次发布包含有 **7B**、**13B** 的 **Base** 和 **Chat** 版本,并提供了 Chat 版本的 **4bits 量化**。
12
- - 所有版本对学术研究完全开放。同时,开发者通过邮件申请并获得官方商用许可后,即可**免费商用**,请参考[协议](#协议)章节。
13
- - 欢迎阅读我们的技术报告 [Baichuan 2: Open Large-scale Language Models] 获取更多信息。
14
-
15
- 本次发布版本和下载链接见下表:
16
-
17
- | | 基座模型 | 对齐模型 | 对齐模型 4bits 量化 |
18
- |:---:|:--------------------:|:--------------------:|:--------------------------:|
19
- | 7B | [Baichuan2-7B-Base] | [Baichuan2-7B-Chat] | [Baichuan2-7B-Chat-4bits] |
20
- | 13B | [Baichuan2-13B-Base] | [Baichuan2-13B-Chat] | [Baichuan2-13B-Chat-4bits] |
21
-
22
- # 快速开始
23
-
24
- ```python
25
- import torch
26
- from modelscope import snapshot_download, AutoModelForCausalLM, AutoTokenizer,GenerationConfig
27
- model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat", revision='v1.0.1')
28
- tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto",
29
- trust_remote_code=True, torch_dtype=torch.float16)
30
- model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto",
31
- trust_remote_code=True, torch_dtype=torch.float16)
32
- model.generation_config = GenerationConfig.from_pretrained(model_dir)
33
- messages = []
34
- messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
35
- response = model.chat(tokenizer, messages)
36
- print(response)
37
- messages.append({'role': 'assistant', 'content': response})
38
- messages.append({"role": "user", "content": "背诵一下将进酒"})
39
- response = model.chat(tokenizer, messages)
40
- print(response)
41
- ```
42
- 在魔搭社区的免费算力上,也可以通过量化的方式使用13B对话模型
43
- ```python
44
- import torch
45
- from modelscope import snapshot_download, AutoModelForCausalLM, AutoTokenizer,GenerationConfig
46
- from transformers import BitsAndBytesConfig
47
-
48
- quantization_config = BitsAndBytesConfig(
49
- False,
50
- True,
51
- bnb_4bit_compute_dtype=torch.bfloat16,
52
- bnb_4bit_quant_type='nf4',
53
- bnb_4bit_use_double_quant=True)
54
- model_dir = snapshot_download("baichuan-inc/Baichuan2-13B-Chat", revision='v1.0.1')
55
- tokenizer = AutoTokenizer.from_pretrained(model_dir, device_map="auto",
56
- trust_remote_code=True, torch_dtype=torch.float16)
57
- model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto",
58
- trust_remote_code=True, torch_dtype=torch.float16,
59
- quantization_config=quantization_config)
60
- model.generation_config = GenerationConfig.from_pretrained(model_dir)
61
- messages = []
62
- messages.append({"role": "user", "content": "讲解一下“温故而知新”"})
63
- response = model.chat(tokenizer, messages)
64
- print(response)
65
- messages.append({'role': 'assistant', 'content': response})
66
- messages.append({"role": "user", "content": "背诵一下将进酒"})
67
- response = model.chat(tokenizer, messages)
68
- print(response)
69
- ```
70
- # Benchmark 结果
71
-
72
- 我们在[通用]、[法律]、[医疗]、[数学]、[代码]和[多语言翻译]六个领域的中英文权威数据集上对模型进行了广泛测试,更多详细测评结果可查看[GitHub]。
73
-
74
- ### 7B 模型结果
75
-
76
- | | **C-Eval** | **MMLU** | **CMMLU** | **Gaokao** | **AGIEval** | **BBH** |
77
- |:-----------------------:|:----------:|:--------:|:---------:|:----------:|:-----------:|:-------:|
78
- | | 5-shot | 5-shot | 5-shot | 5-shot | 5-shot | 3-shot |
79
- | **GPT-4** | 68.40 | 83.93 | 70.33 | 66.15 | 63.27 | 75.12 |
80
- | **GPT-3.5 Turbo** | 51.10 | 68.54 | 54.06 | 47.07 | 46.13 | 61.59 |
81
- | **LLaMA-7B** | 27.10 | 35.10 | 26.75 | 27.81 | 28.17 | 32.38 |
82
- | **LLaMA2-7B** | 28.90 | 45.73 | 31.38 | 25.97 | 26.53 | 39.16 |
83
- | **MPT-7B** | 27.15 | 27.93 | 26.00 | 26.54 | 24.83 | 35.20 |
84
- | **Falcon-7B** | 24.23 | 26.03 | 25.66 | 24.24 | 24.10 | 28.77 |
85
- | **ChatGLM2-6B** | 50.20 | 45.90 | 49.00 | 49.44 | 45.28 | 31.65 |
86
- | **[Baichuan-7B]** | 42.80 | 42.30 | 44.02 | 36.34 | 34.44 | 32.48 |
87
- | **[Baichuan2-7B-Base]** | 54.00 | 54.16 | 57.07 | 47.47 | 42.73 | 41.56 |
88
-
89
- ### 13B 模型结果
90
-
91
- | | **C-Eval** | **MMLU** | **CMMLU** | **Gaokao** | **AGIEval** | **BBH** |
92
- |:---------------------------:|:----------:|:--------:|:---------:|:----------:|:-----------:|:-------:|
93
- | | 5-shot | 5-shot | 5-shot | 5-shot | 5-shot | 3-shot |
94
- | **GPT-4** | 68.40 | 83.93 | 70.33 | 66.15 | 63.27 | 75.12 |
95
- | **GPT-3.5 Turbo** | 51.10 | 68.54 | 54.06 | 47.07 | 46.13 | 61.59 |
96
- | **LLaMA-13B** | 28.50 | 46.30 | 31.15 | 28.23 | 28.22 | 37.89 |
97
- | **LLaMA2-13B** | 35.80 | 55.09 | 37.99 | 30.83 | 32.29 | 46.98 |
98
- | **Vicuna-13B** | 32.80 | 52.00 | 36.28 | 30.11 | 31.55 | 43.04 |
99
- | **Chinese-Alpaca-Plus-13B** | 38.80 | 43.90 | 33.43 | 34.78 | 35.46 | 28.94 |
100
- | **XVERSE-13B** | 53.70 | 55.21 | 58.44 | 44.69 | 42.54 | 38.06 |
101
- | **[Baichuan-13B-Base]** | 52.40 | 51.60 | 55.30 | 49.69 | 43.20 | 43.01 |
102
- | **[Baichuan2-13B-Base]** | 58.10 | 59.17 | 61.97 | 54.33 | 48.17 | 48.78 |
103
-
104
-
105
- ## 训练过程模型
106
-
107
- 除了训练了 2.6 万亿 Tokens 的 [Baichuan2-7B-Base] 模型,我们还提供了在此之前的另外 11 个中间过程的模型(分别对应训练了约 0.2 ~ 2.4 万亿 Tokens)供社区研究使用([训练过程heckpoint下载])。下图给出了这些 checkpoints 在 C-Eval、MMLU、CMMLU 三个 benchmark 上的效果变化:
108
-
109
- ![checkpoint](https://modelscope.cn/api/v1/models/baichuan-inc/Baichuan2-7B-Base/repo?Revision=master&FilePath=media/checkpoints.jpeg&View=true)
110
-
111
- # 声明与协议
112
-
113
- ## 声明
114
-
115
- 我们在此声明,我们的开发团队并未基于 Baichuan 2 模型开发任何应用,无论是在 iOS、Android、网页或任何其他平台。我们强烈呼吁所有使用者,不要利用
116
- Baichuan 2 模型进行任何危害国家社会安全或违法的活动。另外,我们也要求使用者不要将 Baichuan 2
117
- 模型用于未经适当安全审查和备案的互联网服务。我们希望所有的使用者都能遵守这个原则,确保科技的发展能在规范和合法的环境下进行。
118
-
119
- 我们已经尽我们所能,来确保模型训练过程中使用的数据的合规性。然而,尽管我们已经做出了巨大的努力,但由于模型和数据的复杂性,仍有可能存在一些无法预见的问题。因此,如果由于使用
120
- Baichuan 2 开源模型而导致的任何问题,包括但不限于数据安全问题、公共舆论风险,或模型被误导、滥用、传播或不当利用所带来的任何风险和问题,我们将不承担任何责任。
121
-
122
- ## 协议
123
-
124
- * Baichuan 2 模型的社区使用需遵循[《Baichuan 2 模型社区许可协议》]。
125
- * Baichuan 2 支持商用,如果将 Baichuan 2 模型或其衍生品用作商业用途,请您按照如下方式联系许可方,以进行登记并向许可方申请书面授权:联系邮箱 [[email protected]]。
126
-
127
- [GitHub]:https://github.com/baichuan-inc/Baichuan2
128
- [Baichuan2]:https://github.com/baichuan-inc/Baichuan2
129
-
130
- [Baichuan-7B]:https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary
131
- [Baichuan2-7B-Base]:https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Base/summary
132
- [Baichuan2-7B-Chat]:https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat/summary
133
- [Baichuan2-7B-Chat-4bits]:https://modelscope.cn/models/baichuan-inc/Baichuan2-7B-Chat-4bits/summary
134
- [Baichuan-13B-Base]:https://modelscope.cn/models/baichuan-inc/Baichuan-13B-Base/summary
135
- [Baichuan2-13B-Base]:https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Base/summary
136
- [Baichuan2-13B-Chat]:https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat/summary
137
- [Baichuan2-13B-Chat-4bits]:https://modelscope.cn/models/baichuan-inc/Baichuan2-13B-Chat-4bits/summary
138
-
139
- [通用]:https://github.com/baichuan-inc/Baichuan2#%E9%80%9A%E7%94%A8%E9%A2%86%E5%9F%9F
140
- [法律]:https://github.com/baichuan-inc/Baichuan2#%E6%B3%95%E5%BE%8B%E5%8C%BB%E7%96%97
141
- [医疗]:https://github.com/baichuan-inc/Baichuan2#%E6%B3%95%E5%BE%8B%E5%8C%BB%E7%96%97
142
- [数学]:https://github.com/baichuan-inc/Baichuan2#%E6%95%B0%E5%AD%A6%E4%BB%A3%E7%A0%81
143
- [代码]:https://github.com/baichuan-inc/Baichuan2#%E6%95%B0%E5%AD%A6%E4%BB%A3%E7%A0%81
144
- [多语言翻译]:https://github.com/baichuan-inc/Baichuan2#%E5%A4%9A%E8%AF%AD%E8%A8%80%E7%BF%BB%E8%AF%91
145
-
146
- [《Baichuan 2 模型社区许可协议》]:https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Baichuan2%20%E6%A8%A1%E5%9E%8B%E7%A4%BE%E5%8C%BA%E8%AE%B8%E5%8F%AF%E5%8D%8F%E8%AE%AE.pdf
147
-
148
- [邮件申请]: mailto:[email protected]
149
- [Email]: mailto:[email protected]
150
151
- [训练过程heckpoint下载]: https://huggingface.co/baichuan-inc/Baichuan2-7B-Intermediate-Checkpoints
152
- [百川智能]: https://www.baichuan-ai.com
153
- [Baichuan 2: Open Large-scale Language Models]:https://cdn.baichuan-ai.com/paper/Baichuan2-technical-report.pdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
undraw_Code_review_re_woeb DELETED
Binary file (21.7 kB)
 
undraw_chatting_re_j55r DELETED
@@ -1 +0,0 @@
1
- <svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" width="891.29496" height="745.19434" viewBox="0 0 891.29496 745.19434" xmlns:xlink="http://www.w3.org/1999/xlink"><ellipse cx="418.64354" cy="727.19434" rx="352" ry="18" fill="#f2f2f2"/><path d="M778.64963,250.35008h-3.99878V140.80476a63.40187,63.40187,0,0,0-63.4018-63.40193H479.16232a63.40188,63.40188,0,0,0-63.402,63.4017v600.9744a63.40189,63.40189,0,0,0,63.4018,63.40192H711.24875a63.40187,63.40187,0,0,0,63.402-63.40168V328.32632h3.99878Z" transform="translate(-154.35252 -77.40283)" fill="#3f3d56"/><path d="M761.156,141.24713v600.09a47.35072,47.35072,0,0,1-47.35,47.35h-233.2a47.35084,47.35084,0,0,1-47.35-47.35v-600.09a47.3509,47.3509,0,0,1,47.35-47.35h28.29a22.50659,22.50659,0,0,0,20.83,30.99h132.96a22.50672,22.50672,0,0,0,20.83-30.99h30.29A47.35088,47.35088,0,0,1,761.156,141.24713Z" transform="translate(-154.35252 -77.40283)" fill="#fff"/><path d="M686.03027,400.0032q-2.32543,1.215-4.73047,2.3-2.18994.99-4.4497,1.86c-.5503.21-1.10987.42-1.66992.63a89.52811,89.52811,0,0,1-13.6001,3.75q-3.43506.675-6.96,1.06-2.90991.33-5.87989.47c-1.41015.07-2.82031.1-4.24023.1a89.84124,89.84124,0,0,1-16.75977-1.57c-1.44043-.26-2.85009-.57-4.26025-.91a88.77786,88.77786,0,0,1-19.66992-7.26c-.56006-.28-1.12012-.58-1.68018-.87-.83008-.44-1.63965-.9-2.4497-1.38.38964-.54.81005-1.07,1.23974-1.59a53.03414,53.03414,0,0,1,78.87012-4.1,54.27663,54.27663,0,0,1,5.06006,5.86C685.25977,398.89316,685.6499,399.44321,686.03027,400.0032Z" transform="translate(-154.35252 -77.40283)" fill="#00b0ff"/><circle cx="492.14325" cy="234.76352" r="43.90974" fill="#2f2e41"/><circle cx="642.49883" cy="327.46205" r="32.68086" transform="translate(-232.6876 270.90663) rotate(-28.66315)" fill="#a0616a"/><path d="M676.8388,306.90589a44.44844,44.44844,0,0,1-25.402,7.85033,27.23846,27.23846,0,0,0,10.796,4.44154,89.62764,89.62764,0,0,1-36.61.20571,23.69448,23.69448,0,0,1-7.66395-2.63224,9.699,9.699,0,0,1-4.73055-6.3266c-.80322-4.58859,2.77227-8.75743,6.488-11.567a47.85811,47.85811,0,0,1,40.21662-8.03639c4.49246,1.16124,8.99288,3.12327,11.91085,6.731s3.78232,9.16981,1.00224,12.88488Z" transform="translate(-154.35252 -77.40283)" fill="#2f2e41"/><path d="M644.5,230.17319a89.98675,89.98675,0,0,0-46.83984,166.83l.58007.34q.72.43506,1.43995.84c.81005.48,1.61962.94,2.4497,1.38.56006.29,1.12012.59,1.68018.87a88.77786,88.77786,0,0,0,19.66992,7.26c1.41016.34,2.81982.65,4.26025.91a89.84124,89.84124,0,0,0,16.75977,1.57c1.41992,0,2.83008-.03,4.24023-.1q2.97-.135,5.87989-.47,3.52513-.39,6.96-1.06a89.52811,89.52811,0,0,0,13.6001-3.75c.56005-.21,1.11962-.42,1.66992-.63q2.26464-.87,4.4497-1.86,2.40015-1.08,4.73047-2.3a90.7919,90.7919,0,0,0,37.03955-35.97c.04-.07995.09034-.16.13038-.24a89.30592,89.30592,0,0,0,9.6499-26.41,90.051,90.051,0,0,0-88.3501-107.21Zm77.06006,132.45c-.08008.14-.1499.28-.23.41a88.17195,88.17195,0,0,1-36.48,35.32q-2.29542,1.2-4.66992,2.25c-1.31006.59-2.64991,1.15-4,1.67-.57032.22-1.14991.44-1.73.64a85.72126,85.72126,0,0,1-11.73,3.36,84.69473,84.69473,0,0,1-8.95019,1.41c-1.8501.2-3.73.34-5.62012.41-1.21.05-2.42969.08-3.6499.08a86.762,86.762,0,0,1-16.21973-1.51,85.62478,85.62478,0,0,1-9.63037-2.36,88.46592,88.46592,0,0,1-13.98974-5.67c-.52-.27-1.04-.54-1.5503-.82-.73-.39-1.46972-.79-2.18994-1.22-.54-.3-1.08008-.62-1.60986-.94-.31006-.18-.62012-.37-.93018-.56a88.06851,88.06851,0,1,1,123.18018-32.47Z" transform="translate(-154.35252 -77.40283)" fill="#3f3d56"/><path d="M624.2595,268.86254c-.47244-4.968-6.55849-8.02647-11.3179-6.52583s-7.88411,6.2929-8.82863,11.19308a16.0571,16.0571,0,0,0,2.16528,12.12236c2.40572,3.46228,6.82664,5.623,10.95,4.74406,4.70707-1.00334,7.96817-5.59956,8.90127-10.32105s.00667-9.58929-.91854-14.31234Z" transform="translate(-154.35252 -77.40283)" fill="#2f2e41"/><path d="M691.24187,275.95964c-.47245-4.968-6.5585-8.02646-11.3179-6.52582s-7.88412,6.29289-8.82864,11.19307a16.05711,16.05711,0,0,0,2.16529,12.12236c2.40571,3.46228,6.82663,5.623,10.95,4.74406,4.70707-1.00334,7.96817-5.59955,8.90127-10.32105s.00667-9.58929-.91853-14.31234Z" transform="translate(-154.35252 -77.40283)" fill="#2f2e41"/><path d="M488.93638,356.14169a4.47525,4.47525,0,0,1-3.30664-1.46436L436.00767,300.544a6.02039,6.02039,0,0,0-4.42627-1.94727H169.3618a15.02615,15.02615,0,0,1-15.00928-15.00927V189.025a15.02615,15.02615,0,0,1,15.00928-15.00928H509.087A15.02615,15.02615,0,0,1,524.0963,189.025v94.5625A15.02615,15.02615,0,0,1,509.087,298.59676h-9.63135a6.01157,6.01157,0,0,0-6.00464,6.00489v47.0332a4.474,4.474,0,0,1-2.87011,4.1958A4.52563,4.52563,0,0,1,488.93638,356.14169Z" transform="translate(-154.35252 -77.40283)" fill="#fff"/><path d="M488.93638,356.14169a4.47525,4.47525,0,0,1-3.30664-1.46436L436.00767,300.544a6.02039,6.02039,0,0,0-4.42627-1.94727H169.3618a15.02615,15.02615,0,0,1-15.00928-15.00927V189.025a15.02615,15.02615,0,0,1,15.00928-15.00928H509.087A15.02615,15.02615,0,0,1,524.0963,189.025v94.5625A15.02615,15.02615,0,0,1,509.087,298.59676h-9.63135a6.01157,6.01157,0,0,0-6.00464,6.00489v47.0332a4.474,4.474,0,0,1-2.87011,4.1958A4.52563,4.52563,0,0,1,488.93638,356.14169ZM169.3618,176.01571A13.024,13.024,0,0,0,156.35252,189.025v94.5625a13.024,13.024,0,0,0,13.00928,13.00927H431.5814a8.02436,8.02436,0,0,1,5.90039,2.59571l49.62208,54.1333a2.50253,2.50253,0,0,0,4.34716-1.69092v-47.0332a8.0137,8.0137,0,0,1,8.00464-8.00489H509.087a13.024,13.024,0,0,0,13.00928-13.00927V189.025A13.024,13.024,0,0,0,509.087,176.01571Z" transform="translate(-154.35252 -77.40283)" fill="#3f3d56"/><circle cx="36.81601" cy="125.19345" r="13.13371" fill="#00b0ff"/><path d="M493.76439,275.26947H184.68447a7.00465,7.00465,0,1,1,0-14.00929H493.76439a7.00465,7.00465,0,0,1,0,14.00929Z" transform="translate(-154.35252 -77.40283)" fill="#e6e6e6"/><path d="M393.07263,245.49973H184.68447a7.00465,7.00465,0,1,1,0-14.00929H393.07263a7.00464,7.00464,0,0,1,0,14.00929Z" transform="translate(-154.35252 -77.40283)" fill="#e6e6e6"/><path d="M709.41908,676.83065a4.474,4.474,0,0,1-2.87011-4.1958v-47.0332a6.01157,6.01157,0,0,0-6.00464-6.00489H690.913a15.02615,15.02615,0,0,1-15.00928-15.00927V510.025A15.02615,15.02615,0,0,1,690.913,495.01571H1030.6382a15.02615,15.02615,0,0,1,15.00928,15.00928v94.5625a15.02615,15.02615,0,0,1-15.00928,15.00927H768.4186a6.02039,6.02039,0,0,0-4.42627,1.94727l-49.62207,54.1333a4.47525,4.47525,0,0,1-3.30664,1.46436A4.52563,4.52563,0,0,1,709.41908,676.83065Z" transform="translate(-154.35252 -77.40283)" fill="#fff"/><path d="M709.41908,676.83065a4.474,4.474,0,0,1-2.87011-4.1958v-47.0332a6.01157,6.01157,0,0,0-6.00464-6.00489H690.913a15.02615,15.02615,0,0,1-15.00928-15.00927V510.025A15.02615,15.02615,0,0,1,690.913,495.01571H1030.6382a15.02615,15.02615,0,0,1,15.00928,15.00928v94.5625a15.02615,15.02615,0,0,1-15.00928,15.00927H768.4186a6.02039,6.02039,0,0,0-4.42627,1.94727l-49.62207,54.1333a4.47525,4.47525,0,0,1-3.30664,1.46436A4.52563,4.52563,0,0,1,709.41908,676.83065ZM690.913,497.01571A13.024,13.024,0,0,0,677.9037,510.025v94.5625A13.024,13.024,0,0,0,690.913,617.59676h9.63135a8.0137,8.0137,0,0,1,8.00464,8.00489v47.0332a2.50253,2.50253,0,0,0,4.34716,1.69092l49.62208-54.1333a8.02436,8.02436,0,0,1,5.90039-2.59571h262.2196a13.024,13.024,0,0,0,13.00928-13.00927V510.025a13.024,13.024,0,0,0-13.00928-13.00928Z" transform="translate(-154.35252 -77.40283)" fill="#3f3d56"/><path d="M603.53027,706.11319a89.06853,89.06853,0,0,1-93.65039,1.49,54.12885,54.12885,0,0,1,9.40039-12.65,53.43288,53.43288,0,0,1,83.90967,10.56994C603.2998,705.71316,603.41992,705.91318,603.53027,706.11319Z" transform="translate(-154.35252 -77.40283)" fill="#00b0ff"/><circle cx="398.44256" cy="536.68841" r="44.20157" fill="#2f2e41"/><circle cx="556.81859" cy="629.4886" r="32.89806" transform="translate(-416.96496 738.72884) rotate(-61.33685)" fill="#ffb8b8"/><path d="M522.25039,608.79582a44.74387,44.74387,0,0,0,25.57085,7.9025,27.41946,27.41946,0,0,1-10.8677,4.47107,90.22316,90.22316,0,0,0,36.85334.20707,23.852,23.852,0,0,0,7.71488-2.64973,9.76352,9.76352,0,0,0,4.762-6.36865c.80855-4.61909-2.7907-8.81563-6.53113-11.64387a48.17616,48.17616,0,0,0-40.4839-8.08981c-4.52231,1.169-9.05265,3.144-11.99,6.77579s-3.80746,9.23076-1.0089,12.97052Z" transform="translate(-154.35252 -77.40283)" fill="#2f2e41"/><path d="M555.5,721.17319a89.97205,89.97205,0,1,1,48.5708-14.21875A89.87958,89.87958,0,0,1,555.5,721.17319Zm0-178a88.00832,88.00832,0,1,0,88,88A88.09957,88.09957,0,0,0,555.5,543.17319Z" transform="translate(-154.35252 -77.40283)" fill="#3f3d56"/><circle cx="563.81601" cy="445.19345" r="13.13371" fill="#00b0ff"/><path d="M1020.76439,595.26947H711.68447a7.00465,7.00465,0,1,1,0-14.00929h309.07992a7.00464,7.00464,0,0,1,0,14.00929Z" transform="translate(-154.35252 -77.40283)" fill="#e6e6e6"/><path d="M920.07263,565.49973H711.68447a7.00465,7.00465,0,1,1,0-14.00929H920.07263a7.00465,7.00465,0,0,1,0,14.00929Z" transform="translate(-154.35252 -77.40283)" fill="#e6e6e6"/><ellipse cx="554.64354" cy="605.66091" rx="24.50394" ry="2.71961" fill="#f2f2f2"/><ellipse cx="335.64354" cy="285.66091" rx="24.50394" ry="2.71961" fill="#f2f2f2"/></svg>
 
 
undraw_chatting_re_j55r.svg DELETED