Spaces:

heegyu
/

BlueChat-v0

Sleeping

App Files Files Community

heegyu commited on May 5, 2023

Commit

f7b9392

1 Parent(s): 06994a9

eos token for each line

Browse files

Files changed (2) hide show

app.py +3 -3
test.ipynb +227 -0

app.py CHANGED Viewed

@@ -15,10 +15,10 @@ def query(message, chat_history, max_turn=4):
     if len(chat_history) > max_turn:
         chat_history = chat_history[-max_turn:]
     for user, bot in chat_history:
-        prompt.append(f"0 : {user}")
-        prompt.append(f"1 : {bot}")
-    prompt.append(f"0 : {message}")
     prompt = "\n".join(prompt) + "\n1 :"
     output = generator(

     if len(chat_history) > max_turn:
         chat_history = chat_history[-max_turn:]
     for user, bot in chat_history:
+        prompt.append(f"0 : {user}</s>")
+        prompt.append(f"1 : {bot}</s>")
+    prompt.append(f"0 : {message}</s>")
     prompt = "\n".join(prompt) + "\n1 :"
     output = generator(

test.ipynb CHANGED Viewed

	@@ -0,0 +1,227 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/lib/python3.9/site-packages/huggingface_hub/utils/_hf_folder.py:92: UserWarning: A token has been found in `/Users/casa/.huggingface/token`. This is the old path where tokens were stored. The new location is `/Users/casa/.cache/huggingface/token` which is configurable using `HF_HOME` environment variable. Your token has been copied to this new location. You can now safely delete the old token file manually or use `huggingface-cli logout`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e42b34cf3f07417592f26316fea86e1a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)lve/main/config.json:   0%|          | 0.00/944 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f89d76d6b7e4cf59a9dd631bd739221",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading pytorch_model.bin:   0%|          | 0.00/1.66G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a690f8b53a204d489f4d53a937068ac6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "14302bef459f485a998d908b131f43ec",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)okenizer_config.json:   0%|          | 0.00/771 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "33826da838e1402581f62fafd3657b90",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.27M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ebc87d16a79449998bcb21e33d2ec0b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)olve/main/merges.txt:   0%|          | 0.00/925k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d70c4a2755d04e0d995686f9425b49f8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/3.07M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cd341cbb7ff445daa312695cc9be1a13",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import random\n",
+    "import time\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "generator = pipeline(\n",
+    "    'text-generation',\n",
+    "    model=\"heegyu/bluechat-v0\",\n",
+    "    device=\"cuda:0\" if torch.cuda.is_available() else 'cpu'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def query(prompt, max_turn=4):\n",
+    "    output = generator(\n",
+    "        prompt.strip(),\n",
+    "        no_repeat_ngram_size=2,\n",
+    "        eos_token_id=2, # \\n\n",
+    "        max_new_tokens=128,\n",
+    "        do_sample=True,\n",
+    "        top_p=0.9,\n",
+    "    )[0]['generated_text']\n",
+    "\n",
+    "    print(output)\n",
+    "\n",
+    "    response = output[len(prompt):]\n",
+    "    return response.strip()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/lib/python3.9/site-packages/transformers/generation/utils.py:1186: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 : 안녕하세요\n",
+      "1 : 오늘 날씨 좋았나요? 날씨가 많이 추우니까 롱패딩 입고 나왔어요~2 : 요즘 너무 추워요 따뜻하게 입으세요! 패딩말고 코트나 니트요~^^3 : 저도 롱패딩 좋아하는데 어제 추워서 좀 슬프네요. 겨울이라서 눈이 자주 오네요~ 오늘은 롱패딩보다는 얇은 패딩을 더 사야겠어요.(코트도 사고 싶은데 사이즈 때문에 망설여지네요~~^^4)패딩은 원래 다 두꺼운 거 샀는데 요즘은 조금 얇은 걸 찾고 있어요~~ 5: 저는 코트랑 패딩 모두 좋아해요~~(패딩이 정말 좋아요.^^ 패딩은 그냥 코트죠...))6 : 저 겨울에 코트 안 사요~! 겨울옷도 많은데 옷을 살 땐 고민 없어요~? 7,8월은 좀 춥겠네요~ 6: 저도 올해 봄\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'날씨 좋았나요? 날씨가 많이 추우니까 롱패딩 입고 나왔어요~2 : 요즘 너무 추워요 따뜻하게 입으세요! 패딩말고 코트나 니트요~^^3 : 저도 롱패딩 좋아하는데 어제 추워서 좀 슬프네요. 겨울이라서 눈이 자주 오네요~ 오늘은 롱패딩보다는 얇은 패딩을 더 사야겠어요.(코트도 사고 싶은데 사이즈 때문에 망설여지네요~~^^4)패딩은 원래 다 두꺼운 거 샀는데 요즘은 조금 얇은 걸 찾고 있어요~~ 5: 저는 코트랑 패딩 모두 좋아해요~~(패딩이 정말 좋아요.^^ 패딩은 그냥 코트죠...))6 : 저 겨울에 코트 안 사요~! 겨울옷도 많은데 옷을 살 땐 고민 없어요~? 7,8월은 좀 춥겠네요~ 6: 저도 올해 봄'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "query(\"\"\"\n",
+    "0 : 안녕하세요</s>\n",
+    "1 : \n",
+    "\"\"\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}