Spaces:

koonmania
/

LLM-As-Chatbot

Runtime error

App Files Files Community

koonmania commited on Jul 25, 2023

Commit

4df8249

1 Parent(s): 7854560

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
.gitignore +5 -0
LICENSE +201 -0
README.md +185 -7
__init__.py +0 -0
__pycache__/global_vars.cpython-39.pyc +0 -0
__pycache__/utils.cpython-39.pyc +0 -0
app.py +1109 -0
assets/guimode_preview.gif +3 -0
assets/preview.gif +3 -0
assets/preview.png +3 -0
channels.txt +10 -0
chats/__init__.py +0 -0
chats/__pycache__/__init__.cpython-39.pyc +0 -0
chats/__pycache__/alpaca.cpython-39.pyc +0 -0
chats/__pycache__/alpaca_gpt4.cpython-39.pyc +0 -0
chats/__pycache__/alpacoom.cpython-39.pyc +0 -0
chats/__pycache__/baize.cpython-39.pyc +0 -0
chats/__pycache__/central.cpython-39.pyc +0 -0
chats/__pycache__/custom.cpython-39.pyc +0 -0
chats/__pycache__/falcon.cpython-39.pyc +0 -0
chats/__pycache__/flan_alpaca.cpython-39.pyc +0 -0
chats/__pycache__/freewilly.cpython-39.pyc +0 -0
chats/__pycache__/guanaco.cpython-39.pyc +0 -0
chats/__pycache__/koalpaca.cpython-39.pyc +0 -0
chats/__pycache__/llama2.cpython-39.pyc +0 -0
chats/__pycache__/mpt.cpython-39.pyc +0 -0
chats/__pycache__/os_stablelm.cpython-39.pyc +0 -0
chats/__pycache__/post.cpython-39.pyc +0 -0
chats/__pycache__/pre.cpython-39.pyc +0 -0
chats/__pycache__/redpajama.cpython-39.pyc +0 -0
chats/__pycache__/stable_vicuna.cpython-39.pyc +0 -0
chats/__pycache__/stablelm.cpython-39.pyc +0 -0
chats/__pycache__/starchat.cpython-39.pyc +0 -0
chats/__pycache__/utils.cpython-39.pyc +0 -0
chats/__pycache__/vicuna.cpython-39.pyc +0 -0
chats/__pycache__/wizard_coder.cpython-39.pyc +0 -0
chats/__pycache__/wizard_falcon.cpython-39.pyc +0 -0
chats/__pycache__/xgen.cpython-39.pyc +0 -0
chats/alpaca.py +51 -0
chats/alpaca_gpt4.py +51 -0
chats/alpacoom.py +51 -0
chats/baize.py +68 -0
chats/central.py +380 -0
chats/custom.py +65 -0
chats/falcon.py +62 -0
chats/flan_alpaca.py +51 -0
chats/freewilly.py +74 -0
chats/guanaco.py +63 -0
chats/koalpaca.py +51 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/guimode_preview.gif filter=lfs diff=lfs merge=lfs -text
+assets/preview.gif filter=lfs diff=lfs merge=lfs -text
+assets/preview.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.ipynb_checkpoints
+__pycache__
+nohup.out
+test.py
+.dstack

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,12 +1,190 @@
 ---
-title: LLM As Chatbot
-emoji: 💻
-colorFrom: red
-colorTo: pink
 sdk: gradio
 sdk_version: 3.38.0
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LLM-As-Chatbot
+app_file: app.py
 sdk: gradio
 sdk_version: 3.38.0
 ---
+## UPDATE
+- **Internet search support**: you can enable **internet search** capability in Gradio application and Discord bot. For gradio, there is a `internet mode` option in the control panel. For discord, you need to specify `--internet` option in your prompt. For both cases, you need a Serper API Key which you can get one from [serper.dev](https://serper.dev/). By signing up, you will get free 2,500 free google searches which is pretty much sufficient for a long-term test.
+- **Discord Bot support**: you can serve any model from the model zoo as Discord Bot. Find how to do this in the instruction section below.
+# 💬🚀 LLM as a Chatbot Service
+The purpose of this repository is to let people to use lots of open sourced instruction-following fine-tuned LLM models as a Chatbot service. Because different models behave differently, and different models require differently formmated prompts, I made a very simple library [`Ping Pong`](https://github.com/deep-diver/PingPong) for model agnostic conversation and context managements.
+Also, I made [`GradioChat`](https://github.com/deep-diver/gradio-chat) UI that has a similar shape to [HuggingChat](https://huggingface.co/chat/) but entirely built in Gradio. Those two projects are fully integrated to power this project.
+## Easiest way to try out ( ✅ Gradio, 🚧 Discord Bot )
+### Jarvislabs.ai
+This project has become the one of the default framework at [jarvislabs.ai](https://jarvislabs.ai/). Jarvislabs.ai is one of the cloud GPU VM provider with the cheapest GPU prices. Furthermore, all the weights of the supported popular open source LLMs are pre-downloaded. You don't need to waste of your money and time to wait until download hundreds of GBs to try out a collection of LLMs. In less than 10 minutes, you can try out any model.
+- for further instruction how to run Gradio application, please follow the [official documentation](https://jarvislabs.ai/docs/llmchat) on the `llmchat` framework.
+### dstack
+[`dstack`](https://dstack.ai) is an open-source tool that allows to run LLM-based apps in a a cloud of your choice via single command. `dstack` supports AWS, GCP, Azure, Lambda Cloud, etc.
+Use the `gradio.dstack.yml` and `discord.dstack.yml` configurations to run the Gradio app and Discord bot via `dstack`.
+- for more details on how to run this repo with `dstack`, read the [official documentation](https://dstack.ai/examples/llmchat) by `dstack`.
+## Instructions
+### Standalone Gradio app
+![](https://i.ibb.co/gW7yKj9/2023-05-26-3-31-06.png)
+0. Prerequisites
+    Note that the code only works `Python >= 3.9` and `gradio >= 3.32.0`
+    ```console
+    $ conda create -n llm-serve python=3.9
+    $ conda activate llm-serve
+    ```
+1. Install dependencies.
+    ```console
+    $ cd LLM-As-Chatbot
+    $ pip install -r requirements.txt
+    ```
+2. Run Gradio application
+    There is no required parameter to run the Gradio application. However, there are some small details worth being noted. When `--local-files-only` is set, application won't try to look up the Hugging Face Hub(remote). Instead, it will only use the files already downloaded and cached.
+    Hugging Face libraries stores downloaded contents under `~/.cache` by default, and this application assumes so. However, if you downloaded weights in different location for some reasons, you can set `HF_HOME` environment variable. Find more about the [environment variables here](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables)
+   In order to leverage **internet search** capability, you need Serper API Key. You can set it manually in the control panel or in CLI. When specifying the Serper API Key in CLI, it will be injected into the corresponding UI control. If you don't have it yet, please get one from [serper.dev](https://serper.dev/). By signing up, you will get free 2,500 free google searches which is pretty much sufficient for a long-term test.
+    ```console
+    $ python app.py --root-path "" \
+                    --local-files-only \
+                    --share \
+                    --debug \
+                    --serper-api-key "YOUR SERPER API KEY"
+    ```
+### Discord Bot
+![](https://i.ibb.co/cJ3yDWh/2023-07-14-1-42-23.png)
+0. Prerequisites
+    Note that the code only works `Python >= 3.9`
+    ```console
+    $ conda create -n llm-serve python=3.9
+    $ conda activate llm-serve
+    ```
+1. Install dependencies.
+    ```console
+    $ cd LLM-As-Chatbot
+    $ pip install -r requirements.txt
+    ```
+2. Run Discord Bot application. Choose one of the modes in `--mode-[cpu|mps|8bit|4bit|full-gpu]`. `full-gpu` will be choseon by default(`full` means `half` - consider this as a typo to be fixed later).
+    The `--token` is a required parameter, and you can get it from [Discord Developer Portal](https://discord.com/developers/docs/intro). If you have not setup Discord Bot from the Discord Developer Portal yet, please follow [How to Create a Discord Bot Account](https://www.freecodecamp.org/news/create-a-discord-bot-with-python/) section of the tutorial from [freeCodeCamp](https://www.freecodecamp.org/) to get the token.
+    The `--model-name` is a required parameter, and you can look around the list of supported models from [`model_cards.json`](https://github.com/deep-diver/LLM-As-Chatbot/blob/main/model_cards.json).
+    `--max-workers` is a parameter to determine how many requests to be handled concurrently. This simply defines the value of the `ThreadPoolExecutor`.
+    When `--local-files-only` is set, application won't try to look up the Hugging Face Hub(remote). Instead, it will only use the files already downloaded and cached.
+   In order to leverage **internet search** capability, you need Serper API Key. If you don't have it yet, please get one from [serper.dev](https://serper.dev/). By signing up, you will get free 2,500 free google searches which is pretty much sufficient for a long-term test. Once you have the Serper API Key, you can specify it in `--serper-api-key` option.
+    - Hugging Face libraries stores downloaded contents under `~/.cache` by default, and this application assumes so. However, if you downloaded weights in different location for some reasons, you can set `HF_HOME` environment variable. Find more about the [environment variables here](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables)
+    ```console
+    $ python discord_app.py --token "DISCORD BOT TOKEN" \
+                            --model-name "alpaca-lora-7b" \
+                            --max-workers 1 \
+                            --mode-[cpu|mps|8bit|4bit|full-gpu] \
+                            --local_files_only \
+                            --serper-api-key "YOUR SERPER API KEY"
+    ```
+4. Supported Discord Bot commands
+    There is no slash commands. The only way to interact with the deployed discord bot is to mention the bot. However, you can pass some special strings while mentioning the bot.
+    - **`@bot_name help`**: it will display a simple help message
+    - **`@bot_name model-info`**: it will display the information of the currently selected(deployed) model from the [`model_cards.json`](https://github.com/deep-diver/LLM-As-Chatbot/blob/main/model_cards.json).
+    - **`@bot_name default-params`**: it will display the default parameters to be used in model's `generate` method. That is `GenerationConfig`, and it holds parameters such as `temperature`, `top_p`, and so on.
+    - **`@bot_name user message --max-new-tokens 512 --temperature 0.9 --top-p 0.75 --do_sample --max-windows 5 --internet`**: all parameters are used to dynamically determine the text geneartion behaviour as in `GenerationConfig` except `max-windows`. The `max-windows` determines how many past conversations to look up as a reference. The default value is set to `3`, but as the conversation goes long, you can increase this value. `--internet` will try to answer to your prompt by aggregating information scraped from google search. To use `--internet` option, you need to specify `--serper-api-key` when booting up the program.
+### Context management
+Different model might have different strategies to manage context, so if you want to know the exact strategies applied to each model, take a look at the [`chats`](https://github.com/deep-diver/LLM-As-Chatbot/tree/main/chats) directory. However, here are the basic ideas that I have come up with initially. I have found long prompts will slow down the generation process a lot eventually, so I thought the prompts should be kept as short as possible while as concise as possible at the same time. In the previous version, I have accumulated all the past conversations, and that didn't go well.
+- In every turn of the conversation, the past `N` conversations will be kept. Think about the `N` as a hyper-parameter. As an experiment, currently the past 2-3 conversations are only kept for all models.
+### Currently supported models
+<details><summary>Checkout the list of models</summary>
+  - [tloen/alpaca-lora-7b](https://huggingface.co/tloen/alpaca-lora-7b): the original 7B Alpaca-LoRA checkpoint by tloen (updated by 4/4/2022)
+  - [LLMs/Alpaca-LoRA-7B-elina](https://huggingface.co/LLMs/Alpaca-LoRA-7B-elina): the 7B Alpaca-LoRA checkpoint by Chansung (updated by 5/1/2022)
+  - [LLMs/Alpaca-LoRA-13B-elina](https://huggingface.co/LLMs/Alpaca-LoRA-13B-elina): the 13B Alpaca-LoRA checkpoint by Chansung (updated by 5/1/2022)
+  - [LLMs/Alpaca-LoRA-30B-elina](https://huggingface.co/LLMs/Alpaca-LoRA-30B-elina): the 30B Alpaca-LoRA checkpoint by Chansung (updated by 5/1/2022)
+  - [LLMs/Alpaca-LoRA-65B-elina](https://huggingface.co/LLMs/Alpaca-LoRA-65B-elina): the 65B Alpaca-LoRA checkpoint by Chansung (updated by 5/1/2022)
+  - [LLMs/AlpacaGPT4-LoRA-7B-elina](https://huggingface.co/LLMs/AlpacaGPT4-LoRA-7B-elina): the 7B Alpaca-LoRA checkpoint trained on GPT4 generated Alpaca style dataset by Chansung (updated by 5/1/2022)
+  - [LLMs/AlpacaGPT4-LoRA-13B-elina](https://huggingface.co/LLMs/AlpacaGPT4-LoRA-13B-elina): the 13B Alpaca-LoRA checkpoint trained on GPT4 generated Alpaca style dataset by Chansung (updated by 5/1/2022)
+  - [stabilityai/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b): StableLM based fine-tuned model
+  - [beomi/KoAlpaca-Polyglot-12.8B](https://huggingface.co/beomi/KoAlpaca-Polyglot-12.8B): [Polyglot](https://github.com/EleutherAI/polyglot) based Alpaca style instruction fine-tuned model
+  - [declare-lab/flan-alpaca-xl](https://huggingface.co/declare-lab/flan-alpaca-xl): Flan XL(3B) based Alpaca style instruction fine-tuned model.
+  - [declare-lab/flan-alpaca-xxl](https://huggingface.co/declare-lab/flan-alpaca-xxl): Flan XXL(11B) based Alpaca style instruction fine-tuned model.
+  - [OpenAssistant/stablelm-7b-sft-v7-epoch-3](https://huggingface.co/OpenAssistant/stablelm-7b-sft-v7-epoch-3): StableLM(7B) based OpenAssistant's oasst1 instruction fine-tuned model.
+  - [Writer/camel-5b-hf](https://huggingface.co/Writer/camel-5b-hf): Palmyra-base based instruction fine-tuned model. The foundation model and the data are from its creator, [Writer](https://dev.writer.com).
+  - [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0): T5(3B) based Vicuna style instruction fine-tuned model on SharedGPT by [lm-sys](https://github.com/lm-sys/FastChat)
+  - [LLMs/Stable-Vicuna-13B](https://huggingface.co/LLMs/Stable-Vicuna-13B): Stable Vicuna(13B) from Carpel AI and Stability AI. This is not a delta weight, so use it at your own risk. I will make this repo as private soon and add Hugging Face token field.
+  - [LLMs/Vicuna-7b-v1.1](https://huggingface.co/LLMs/Vicuna-7b-v1.1): Vicuna(7B) from FastChat. This is not a delta weight, so use it at your own risk. I will make this repo as private soon and add Hugging Face token field.
+  - [LLMs/Vicuna-7b-v1.3](https://huggingface.co/lmsys/vicuna-7b-v1.3)
+  - [LLMs/Vicuna-13b-v1.1](https://huggingface.co/LLMs/Vicuna-13b-v1.1): Vicuna(13B) from FastChat. This is not a delta weight, so use it at your own risk. I will make this repo as private soon and add Hugging Face token field.
+  - [LLMs/Vicuna-13b-v1.3](https://huggingface.co/lmsys/vicuna-13b-v1.3)
+  - [LLMs/Vicuna-33b-v1.3](https://huggingface.co/lmsys/vicuna-33b-v1.3)
+  - [togethercomputer/RedPajama-INCITE-Chat-7B-v0.1](https://huggingface.co/togethercomputer/RedPajama-INCITE-Chat-7B-v0.1): RedPajama INCITE Chat(7B) from Together.
+  - [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat): MPT-7B from MOSAIC ML.
+  - [mosaicml/mpt-30b-chat](https://huggingface.co/mosaicml/mpt-30b-chat): MPT-30B from MOSAIC ML.
+  - [teknium/llama-deus-7b-v3-lora](https://huggingface.co/teknium/llama-deus-7b-v3-lora): LLaMA 7B based Alpaca style instruction fine-tuned model. The only difference between Alpaca is that this model is fine-tuned on more data including Alpaca dataset, GPTeacher, General Instruct, Code Instruct, Roleplay Instruct, Roleplay V2 Instruct, GPT4-LLM Uncensored, Unnatural Instructions, WizardLM Uncensored, CamelAI's 20k Biology, 20k Physics, 20k Chemistry, 50k Math GPT4 Datasets, and CodeAlpaca
+  - [HuggingFaceH4/starchat-alpha](https://huggingface.co/HuggingFaceH4/starchat-alpha): Starcoder 15.5B based instruction fine-tuned model. This model is particularly good at answering questions about coding.
+  - [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta): Starcoder 15.5B based instruction fine-tuned model. This model is particularly good at answering questions about coding.
+  - [LLMs/Vicuna-LoRA-EvolInstruct-7B](https://huggingface.co/LLMs/Vicuna-LoRA-EvolInstruct-7B): LLaMA 7B based Vicuna style instruction fine-tuned model. The dataset to fine-tune this model is from WizardLM's Evol Instruction dataset.
+  - [LLMs/Vicuna-LoRA-EvolInstruct-13B](https://huggingface.co/LLMs/Vicuna-LoRA-EvolInstruct-13B): LLaMA 13B based Vicuna style instruction fine-tuned model. The dataset to fine-tune this model is from WizardLM's Evol Instruction dataset.
+  - [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b): LLaMA 7B based Baize
+  - [project-baize/baize-v2-13b](https://huggingface.co/project-baize/baize-v2-7b): LLaMA 13B based Baize
+  - [timdettmers/guanaco-7b](https://huggingface.co/timdettmers/guanaco-7b): LLaMA 7B based Guanaco which is fine-tuned on OASST1 dataset with QLoRA techniques introduced in "QLoRA: Efficient Finetuning of Quantized LLMs" paper.
+  - [timdettmers/guanaco-13b](https://huggingface.co/timdettmers/guanaco-13b): LLaMA 13B based Guanaco which is fine-tuned on OASST1 dataset with QLoRA techniques introduced in "QLoRA: Efficient Finetuning of Quantized LLMs" paper.
+  - [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged): LLaMA 30B based Guanaco which is fine-tuned on OASST1 dataset with QLoRA techniques introduced in "QLoRA: Efficient Finetuning of Quantized LLMs" paper.
+  - [tiiuae/falcon-7b-instruct](https://huggingface.co/tiiuae/falcon-7b-instruct): Falcon 7B based instruction fine-tuned model on Baize, GPT4All, GPTeacher, and RefinedWeb-English datasets.
+  - [tiiuae/falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct): Falcon 40B based instruction fine-tuned model on Baize and RefinedWeb-English datasets.
+  - [LLMs/WizardLM-13B-V1.0](https://huggingface.co/LLMs/WizardLM-13B-V1.0)
+  - [LLMs/WizardLM-30B-V1.0](https://huggingface.co/LLMs/WizardLM-30B-V1.0)
+  - [ehartford/Wizard-Vicuna-13B-Uncensored](https://huggingface.co/ehartford/Wizard-Vicuna-13B-Uncensored)
+  - [ehartford/Wizard-Vicuna-30B-Uncensored](https://huggingface.co/ehartford/Wizard-Vicuna-30B-Uncensored)
+  - [ehartford/samantha-7b](https://huggingface.co/ehartford/samantha-7b)
+  - [ehartford/samantha-13b](https://huggingface.co/ehartford/samantha-13b)
+  - [ehartford/samantha-33b](https://huggingface.co/ehartford/samantha-33b)
+  - [CalderaAI/30B-Lazarus](https://huggingface.co/CalderaAI/30B-Lazarus)
+  - [elinas/chronos-13b](https://huggingface.co/elinas/chronos-13b)
+  - [elinas/chronos-33b](https://huggingface.co/elinas/chronos-33b)
+  - [WizardLM/WizardCoder-15B-V1.0](https://huggingface.co/WizardLM/WizardCoder-15B-V1.0)
+  - [ehartford/WizardLM-Uncensored-Falcon-7b](https://huggingface.co/ehartford/WizardLM-Uncensored-Falcon-7b)
+  - [ehartford/WizardLM-Uncensored-Falcon-40b](https://huggingface.co/ehartford/WizardLM-Uncensored-Falcon-40b)
+</details>
+## Todos
+- [X] Gradio components to control the configurations of the generation
+- [X] Multiple conversation management
+- [X] Internet search capability (by integrating ChromaDB, `intfloat/e5-large-v2`)
+- [ ] Implement server only option w/ FastAPI
+## Acknowledgements
+- I am thankful to [Jarvislabs.ai](https://jarvislabs.ai/) who generously provided free GPU resources to experiment with Alpaca-LoRA deployment and share it to communities to try out.
+- I am thankful to [AI Network](https://www.ainetwork.ai) who generously provided A100(40G) x 8 DGX workstation for fine-tuning and serving the models.

__init__.py ADDED Viewed

File without changes

__pycache__/global_vars.cpython-39.pyc ADDED Viewed

Binary file (6.04 kB). View file

__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (22 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,1109 @@

+import os
+import time
+import json
+import copy
+import types
+from os import listdir
+from os.path import isfile, join
+import argparse
+import gradio as gr
+import global_vars
+from chats import central
+from transformers import AutoModelForCausalLM
+from miscs.styles import MODEL_SELECTION_CSS
+from miscs.js import GET_LOCAL_STORAGE, UPDATE_LEFT_BTNS_STATE
+from utils import get_chat_manager, get_global_context
+from pingpong.pingpong import PingPong
+from pingpong.gradio import GradioAlpacaChatPPManager
+from pingpong.gradio import GradioKoAlpacaChatPPManager
+from pingpong.gradio import GradioStableLMChatPPManager
+from pingpong.gradio import GradioFlanAlpacaChatPPManager
+from pingpong.gradio import GradioOSStableLMChatPPManager
+from pingpong.gradio import GradioVicunaChatPPManager
+from pingpong.gradio import GradioStableVicunaChatPPManager
+from pingpong.gradio import GradioStarChatPPManager
+from pingpong.gradio import GradioMPTChatPPManager
+from pingpong.gradio import GradioRedPajamaChatPPManager
+from pingpong.gradio import GradioBaizeChatPPManager
+# no cpu for
+# - falcon families (too slow)
+load_mode_list = ["cpu"]
+ex_file = open("examples.txt", "r")
+examples = ex_file.read().split("\n")
+ex_btns = []
+chl_file = open("channels.txt", "r")
+channels = chl_file.read().split("\n")
+channel_btns = []
+default_ppm = GradioAlpacaChatPPManager()
+default_ppm.ctx = "Context at top"
+default_ppm.pingpongs = [
+    PingPong("user input #1...", "bot response #1..."),
+    PingPong("user input #2...", "bot response #2..."),
+]
+chosen_ppm = copy.deepcopy(default_ppm)
+prompt_styles = {
+    "Alpaca": default_ppm,
+    "Baize": GradioBaizeChatPPManager(),
+    "Koalpaca": GradioKoAlpacaChatPPManager(),
+    "MPT": GradioMPTChatPPManager(),
+    "OpenAssistant StableLM": GradioOSStableLMChatPPManager(),
+    "RedPajama": GradioRedPajamaChatPPManager(),
+    "StableVicuna": GradioVicunaChatPPManager(),
+    "StableLM": GradioStableLMChatPPManager(),
+    "StarChat": GradioStarChatPPManager(),
+    "Vicuna": GradioVicunaChatPPManager(),
+}
+response_configs = [
+    f"configs/response_configs/{f}"
+    for f in listdir("configs/response_configs")
+    if isfile(join("configs/response_configs", f))
+]
+summarization_configs = [
+    f"configs/summarization_configs/{f}"
+    for f in listdir("configs/summarization_configs")
+    if isfile(join("configs/summarization_configs", f))
+]
+model_info = json.load(open("model_cards.json"))
+###
+def move_to_model_select_view():
+    return (
+        "move to model select view",
+        gr.update(visible=False),
+        gr.update(visible=True),
+    )
+def use_chosen_model():
+    try:
+        test = global_vars.model
+    except AttributeError:
+        raise gr.Error("There is no previously chosen model")
+    gen_config = global_vars.gen_config
+    gen_sum_config = global_vars.gen_config_summarization
+    if global_vars.model_type == "custom":
+        ppmanager_type = chosen_ppm
+    else:
+        ppmanager_type = get_chat_manager(global_vars.model_type)
+    return (
+        "Preparation done!",
+        gr.update(visible=False),
+        gr.update(visible=True),
+        gr.update(label=global_vars.model_type),
+        {
+            "ppmanager_type": ppmanager_type,
+            "model_type": global_vars.model_type,
+        },
+        get_global_context(global_vars.model_type),
+        gen_config.temperature,
+        gen_config.top_p,
+        gen_config.top_k,
+        gen_config.repetition_penalty,
+        gen_config.max_new_tokens,
+        gen_config.num_beams,
+        gen_config.use_cache,
+        gen_config.do_sample,
+        gen_config.eos_token_id,
+        gen_config.pad_token_id,
+        gen_sum_config.temperature,
+        gen_sum_config.top_p,
+        gen_sum_config.top_k,
+        gen_sum_config.repetition_penalty,
+        gen_sum_config.max_new_tokens,
+        gen_sum_config.num_beams,
+        gen_sum_config.use_cache,
+        gen_sum_config.do_sample,
+        gen_sum_config.eos_token_id,
+        gen_sum_config.pad_token_id,
+    )
+def move_to_byom_view():
+    load_mode_list = []
+    if global_vars.cuda_availability:
+        load_mode_list.extend(["gpu(half)", "gpu(load_in_8bit)", "gpu(load_in_4bit)"])
+    if global_vars.mps_availability:
+        load_mode_list.append("apple silicon")
+    load_mode_list.append("cpu")
+    return (
+        "move to the byom view",
+        gr.update(visible=False),
+        gr.update(visible=True),
+        gr.update(choices=load_mode_list, value=load_mode_list[0])
+    )
+def prompt_style_change(key):
+    ppm = prompt_styles[key]
+    ppm.ctx = "Context at top"
+    ppm.pingpongs = [
+        PingPong("user input #1...", "bot response #1..."),
+        PingPong("user input #2...", "bot response #2..."),
+    ]
+    chosen_ppm = copy.deepcopy(ppm)
+    chosen_ppm.ctx = ""
+    chosen_ppm.pingpongs = []
+    return ppm.build_prompts()
+def byom_load(
+    base, ckpt, model_cls, tokenizer_cls,
+    bos_token_id, eos_token_id, pad_token_id,
+    load_mode,
+):
+    # mode_cpu, model_mps, mode_8bit, mode_4bit, mode_full_gpu
+    global_vars.initialize_globals_byom(
+        base, ckpt, model_cls, tokenizer_cls,
+        bos_token_id, eos_token_id, pad_token_id,
+        True if load_mode == "cpu" else False,
+        True if load_mode == "apple silicon" else False,
+        True if load_mode == "8bit" else False,
+        True if load_mode == "4bit" else False,
+        True if load_mode == "gpu(half)" else False,
+    )
+    return (
+        ""
+    )
+def channel_num(btn_title):
+    choice = 0
+    for idx, channel in enumerate(channels):
+        if channel == btn_title:
+            choice = idx
+    return choice
+def set_chatbot(btn, ld, state):
+    choice = channel_num(btn)
+    res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
+    empty = len(res[choice].pingpongs) == 0
+    return (res[choice].build_uis(), choice, gr.update(visible=empty), gr.update(interactive=not empty))
+def set_example(btn):
+    return btn, gr.update(visible=False)
+def set_popup_visibility(ld, example_block):
+    return example_block
+def move_to_second_view(btn):
+    info = model_info[btn]
+    guard_vram = 5 * 1024.
+    vram_req_full = int(info["vram(full)"]) + guard_vram
+    vram_req_8bit = int(info["vram(8bit)"]) + guard_vram
+    vram_req_4bit = int(info["vram(4bit)"]) + guard_vram
+    load_mode_list = []
+    if global_vars.cuda_availability:
+        print(f"total vram = {global_vars.available_vrams_mb}")
+        print(f"required vram(full={info['vram(full)']}, 8bit={info['vram(8bit)']}, 4bit={info['vram(4bit)']})")
+        if global_vars.available_vrams_mb >= vram_req_full:
+            load_mode_list.append("gpu(half)")
+        if global_vars.available_vrams_mb >= vram_req_8bit:
+            load_mode_list.append("gpu(load_in_8bit)")
+        if global_vars.available_vrams_mb >= vram_req_4bit:
+            load_mode_list.append("gpu(load_in_4bit)")
+    if global_vars.mps_availability:
+        load_mode_list.append("apple silicon")
+    load_mode_list.extend(["cpu"])
+    return (
+        gr.update(visible=False),
+        gr.update(visible=True),
+        info["thumb"],
+        f"## {btn}",
+        f"**Parameters**\n: Approx. {info['parameters']}",
+        f"**🤗 Hub(base)**\n: {info['hub(base)']}",
+        f"**🤗 Hub(LoRA)**\n: {info['hub(ckpt)']}",
+        info['desc'],
+        f"""**Min VRAM requirements** :
+|             half precision            |             load_in_8bit           |              load_in_4bit          |
+| ------------------------------------- | ---------------------------------- | ---------------------------------- |
+|   {round(vram_req_full/1024., 1)}GiB  | {round(vram_req_8bit/1024., 1)}GiB | {round(vram_req_4bit/1024., 1)}GiB |
+""",
+        info['default_gen_config'],
+        info['example1'],
+        info['example2'],
+        info['example3'],
+        info['example4'],
+        info['thumb-tiny'],
+        gr.update(choices=load_mode_list, value=load_mode_list[0]),
+        "",
+    )
+def move_to_first_view():
+    return (gr.update(visible=True), gr.update(visible=False))
+def download_completed(
+    model_name,
+    model_base,
+    model_ckpt,
+    gen_config_path,
+    gen_config_sum_path,
+    load_mode,
+    thumbnail_tiny,
+    force_download,
+):
+    global local_files_only
+    tmp_args = types.SimpleNamespace()
+    tmp_args.base_url = model_base.split(":")[-1].split("</p")[0].strip()
+    tmp_args.ft_ckpt_url = model_ckpt.split(":")[-1].split("</p")[0].strip()
+    tmp_args.gen_config_path = gen_config_path
+    tmp_args.gen_config_summarization_path = gen_config_sum_path
+    tmp_args.force_download_ckpt = force_download
+    tmp_args.thumbnail_tiny = thumbnail_tiny
+    tmp_args.mode_cpu = True if load_mode == "cpu" else False
+    tmp_args.mode_mps = True if load_mode == "apple silicon" else False
+    tmp_args.mode_8bit = True if load_mode == "gpu(load_in_8bit)" else False
+    tmp_args.mode_4bit = True if load_mode == "gpu(load_in_4bit)" else False
+    tmp_args.mode_full_gpu = True if load_mode == "gpu(half)" else False
+    tmp_args.local_files_only = local_files_only
+    try:
+        global_vars.initialize_globals(tmp_args)
+    except RuntimeError as e:
+        raise gr.Error("GPU memory is not enough to load this model.")
+    return "Download completed!"
+def move_to_third_view():
+    gen_config = global_vars.gen_config
+    gen_sum_config = global_vars.gen_config_summarization
+    if global_vars.model_type == "custom":
+        ppmanager_type = chosen_ppm
+    else:
+        ppmanager_type = get_chat_manager(global_vars.model_type)
+    return (
+        "Preparation done!",
+        gr.update(visible=False),
+        gr.update(visible=True),
+        gr.update(label=global_vars.model_type),
+        {
+            "ppmanager_type": ppmanager_type,
+            "model_type": global_vars.model_type,
+        },
+        get_global_context(global_vars.model_type),
+        gen_config.temperature,
+        gen_config.top_p,
+        gen_config.top_k,
+        gen_config.repetition_penalty,
+        gen_config.max_new_tokens,
+        gen_config.num_beams,
+        gen_config.use_cache,
+        gen_config.do_sample,
+        gen_config.eos_token_id,
+        gen_config.pad_token_id,
+        gen_sum_config.temperature,
+        gen_sum_config.top_p,
+        gen_sum_config.top_k,
+        gen_sum_config.repetition_penalty,
+        gen_sum_config.max_new_tokens,
+        gen_sum_config.num_beams,
+        gen_sum_config.use_cache,
+        gen_sum_config.do_sample,
+        gen_sum_config.eos_token_id,
+        gen_sum_config.pad_token_id,
+    )
+def toggle_inspector(view_selector):
+    if view_selector == "with context inspector":
+        return gr.update(visible=True)
+    else:
+        return gr.update(visible=False)
+def reset_chat(idx, ld, state):
+    res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
+    res[idx].pingpongs = []
+    return (
+        "",
+        [],
+        str(res),
+        gr.update(visible=True),
+        gr.update(interactive=False),
+    )
+def rollback_last(idx, ld, state):
+    res = [state["ppmanager_type"].from_json(json.dumps(ppm_str)) for ppm_str in ld]
+    last_user_message = res[idx].pingpongs[-1].ping
+    res[idx].pingpongs = res[idx].pingpongs[:-1]
+    return (
+        last_user_message,
+        res[idx].build_uis(),
+        str(res),
+        gr.update(interactive=False)
+    )
+def gradio_main(args):
+    global local_files_only
+    local_files_only = args.local_files_only
+    with gr.Blocks(css=MODEL_SELECTION_CSS, theme='gradio/soft') as demo:
+        with gr.Column(visible=True, elem_id="landing-container") as landing_view:
+            gr.Markdown("# Chat with LLM", elem_classes=["center"])
+            with gr.Row(elem_id="landing-container-selection"):
+                with gr.Column():
+                    gr.Markdown("""This is the landing page of the project, [LLM As Chatbot](https://github.com/deep-diver/LLM-As-Chatbot). This appliction is designed for personal use only. A single model will be selected at a time even if you open up a new browser or a tab. As an initial choice, please select one of the following menu""")
+                    gr.Markdown("""
+**Bring your own model**: You can chat with arbitrary models. If your own custom model is based on 🤗 Hugging Face's [transformers](https://huggingface.co/docs/transformers/index) library, you will propbably be able to bring it into this application with this menu
+**Select a model from model pool**: You can chat with one of the popular open source Large Language Model
+**Use currently selected model**: If you have already selected, but if you came back to this landing page accidently, you can directly go back to the chatting mode with this menu
+""")
+                    byom = gr.Button("🫵🏼 Bring your own model", elem_id="go-byom-select", elem_classes=["square", "landing-btn"])
+                    select_model = gr.Button("🦙 Select a model from model pool", elem_id="go-model-select", elem_classes=["square", "landing-btn"])
+                    chosen_model = gr.Button("↪️ Use currently selected model", elem_id="go-use-selected-model", elem_classes=["square", "landing-btn"])
+                    with gr.Column(elem_id="landing-bottom"):
+                        progress_view0 = gr.Textbox(label="Progress", elem_classes=["progress-view"])
+                        gr.Markdown("""[project](https://github.com/deep-diver/LLM-As-Chatbot)
+[developer](https://github.com/deep-diver)
+""", elem_classes=["center"])
+        with gr.Column(visible=False) as model_choice_view:
+            gr.Markdown("# Choose a Model", elem_classes=["center"])
+            with gr.Row(elem_id="container"):
+                with gr.Column():
+                    gr.Markdown("## ~ 10B Parameters")
+                    with gr.Row(elem_classes=["sub-container"]):
+                        with gr.Column(min_width=20):
+                            t5_vicuna_3b = gr.Button("t5-vicuna-3b", elem_id="t5-vicuna-3b", elem_classes=["square"])
+                            gr.Markdown("T5 Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20, visible=False):
+                            flan3b = gr.Button("flan-3b", elem_id="flan-3b", elem_classes=["square"])
+                            gr.Markdown("Flan-XL", elem_classes=["center"])
+                        # with gr.Column(min_width=20):
+                        #     replit_3b = gr.Button("replit-3b", elem_id="replit-3b", elem_classes=["square"])
+                        #     gr.Markdown("Replit Instruct", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            camel5b = gr.Button("camel-5b", elem_id="camel-5b", elem_classes=["square"])
+                            gr.Markdown("Camel", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            alpaca_lora7b = gr.Button("alpaca-lora-7b", elem_id="alpaca-lora-7b", elem_classes=["square"])
+                            gr.Markdown("Alpaca-LoRA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            stablelm7b = gr.Button("stablelm-7b", elem_id="stablelm-7b", elem_classes=["square"])
+                            gr.Markdown("StableLM", elem_classes=["center"])
+                        with gr.Column(min_width=20, visible=False):
+                            os_stablelm7b = gr.Button("os-stablelm-7b", elem_id="os-stablelm-7b", elem_classes=["square"])
+                            gr.Markdown("OA+StableLM", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            gpt4_alpaca_7b = gr.Button("gpt4-alpaca-7b", elem_id="gpt4-alpaca-7b", elem_classes=["square"])
+                            gr.Markdown("GPT4-Alpaca-LoRA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            mpt_7b = gr.Button("mpt-7b", elem_id="mpt-7b", elem_classes=["square"])
+                            gr.Markdown("MPT", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            redpajama_7b = gr.Button("redpajama-7b", elem_id="redpajama-7b", elem_classes=["square"])
+                            gr.Markdown("RedPajama", elem_classes=["center"])
+                        with gr.Column(min_width=20, visible=False):
+                            redpajama_instruct_7b = gr.Button("redpajama-instruct-7b", elem_id="redpajama-instruct-7b", elem_classes=["square"])
+                            gr.Markdown("RedPajama Instruct", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            vicuna_7b = gr.Button("vicuna-7b", elem_id="vicuna-7b", elem_classes=["square"])
+                            gr.Markdown("Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            vicuna_7b_1_3 = gr.Button("vicuna-7b-1-3", elem_id="vicuna-7b-1-3", elem_classes=["square"])
+                            gr.Markdown("Vicuna 1.3", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            llama_deus_7b = gr.Button("llama-deus-7b", elem_id="llama-deus-7b",elem_classes=["square"])
+                            gr.Markdown("LLaMA Deus", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            evolinstruct_vicuna_7b = gr.Button("evolinstruct-vicuna-7b", elem_id="evolinstruct-vicuna-7b", elem_classes=["square"])
+                            gr.Markdown("EvolInstruct Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20, visible=False):
+                            alpacoom_7b = gr.Button("alpacoom-7b", elem_id="alpacoom-7b", elem_classes=["square"])
+                            gr.Markdown("Alpacoom", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            baize_7b = gr.Button("baize-7b", elem_id="baize-7b", elem_classes=["square"])
+                            gr.Markdown("Baize", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            guanaco_7b = gr.Button("guanaco-7b", elem_id="guanaco-7b", elem_classes=["square"])
+                            gr.Markdown("Guanaco", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            falcon_7b = gr.Button("falcon-7b", elem_id="falcon-7b", elem_classes=["square"])
+                            gr.Markdown("Falcon", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizard_falcon_7b = gr.Button("wizard-falcon-7b", elem_id="wizard-falcon-7b", elem_classes=["square"])
+                            gr.Markdown("Wizard Falcon", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            airoboros_7b = gr.Button("airoboros-7b", elem_id="airoboros-7b", elem_classes=["square"])
+                            gr.Markdown("Airoboros", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            samantha_7b = gr.Button("samantha-7b", elem_id="samantha-7b", elem_classes=["square"])
+                            gr.Markdown("Samantha", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            openllama_7b = gr.Button("openllama-7b", elem_id="openllama-7b", elem_classes=["square"])
+                            gr.Markdown("OpenLLaMA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            orcamini_7b = gr.Button("orcamini-7b", elem_id="orcamini-7b", elem_classes=["square"])
+                            gr.Markdown("Orca Mini", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            xgen_7b = gr.Button("xgen-7b", elem_id="xgen-7b", elem_classes=["square"])
+                            gr.Markdown("XGen", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            llama2_7b = gr.Button("llama2-7b", elem_id="llama2-7b", elem_classes=["square"])
+                            gr.Markdown("LLaMA 2", elem_classes=["center"])
+                    gr.Markdown("## ~ 20B Parameters")
+                    with gr.Row(elem_classes=["sub-container"]):
+                        with gr.Column(min_width=20, visible=False):
+                            flan11b = gr.Button("flan-11b", elem_id="flan-11b", elem_classes=["square"])
+                            gr.Markdown("Flan-XXL", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            koalpaca = gr.Button("koalpaca", elem_id="koalpaca", elem_classes=["square"])
+                            gr.Markdown("koalpaca", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            kullm = gr.Button("kullm", elem_id="kullm", elem_classes=["square"])
+                            gr.Markdown("KULLM", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            alpaca_lora13b = gr.Button("alpaca-lora-13b", elem_id="alpaca-lora-13b", elem_classes=["square"])
+                            gr.Markdown("Alpaca-LoRA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            gpt4_alpaca_13b = gr.Button("gpt4-alpaca-13b", elem_id="gpt4-alpaca-13b", elem_classes=["square"])
+                            gr.Markdown("GPT4-Alpaca-LoRA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            stable_vicuna_13b = gr.Button("stable-vicuna-13b", elem_id="stable-vicuna-13b", elem_classes=["square"])
+                            gr.Markdown("Stable-Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            starchat_15b = gr.Button("starchat-15b", elem_id="starchat-15b", elem_classes=["square"])
+                            gr.Markdown("StarChat", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            starchat_beta_15b = gr.Button("starchat-beta-15b", elem_id="starchat-beta-15b", elem_classes=["square"])
+                            gr.Markdown("StarChat β", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            vicuna_13b = gr.Button("vicuna-13b", elem_id="vicuna-13b", elem_classes=["square"])
+                            gr.Markdown("Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            vicuna_13b_1_3 = gr.Button("vicuna-13b-1-3", elem_id="vicuna-13b-1-3", elem_classes=["square"])
+                            gr.Markdown("Vicuna 1.3", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            evolinstruct_vicuna_13b = gr.Button("evolinstruct-vicuna-13b", elem_id="evolinstruct-vicuna-13b", elem_classes=["square"])
+                            gr.Markdown("EvolInstruct Vicuna", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            baize_13b = gr.Button("baize-13b", elem_id="baize-13b", elem_classes=["square"])
+                            gr.Markdown("Baize", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            guanaco_13b = gr.Button("guanaco-13b", elem_id="guanaco-13b", elem_classes=["square"])
+                            gr.Markdown("Guanaco", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            nous_hermes_13b = gr.Button("nous-hermes-13b", elem_id="nous-hermes-13b", elem_classes=["square"])
+                            gr.Markdown("Nous Hermes", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            airoboros_13b = gr.Button("airoboros-13b", elem_id="airoboros-13b", elem_classes=["square"])
+                            gr.Markdown("Airoboros", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            samantha_13b = gr.Button("samantha-13b", elem_id="samantha-13b", elem_classes=["square"])
+                            gr.Markdown("Samantha", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            chronos_13b = gr.Button("chronos-13b", elem_id="chronos-13b", elem_classes=["square"])
+                            gr.Markdown("Chronos", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizardlm_13b = gr.Button("wizardlm-13b", elem_id="wizardlm-13b", elem_classes=["square"])
+                            gr.Markdown("WizardLM", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizard_vicuna_13b = gr.Button("wizard-vicuna-13b", elem_id="wizard-vicuna-13b", elem_classes=["square"])
+                            gr.Markdown("Wizard Vicuna (Uncensored)", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizard_coder_15b = gr.Button("wizard-coder-15b", elem_id="wizard-coder-15b", elem_classes=["square"])
+                            gr.Markdown("Wizard Coder", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            openllama_13b = gr.Button("openllama-13b", elem_id="openllama-13b", elem_classes=["square"])
+                            gr.Markdown("OpenLLaMA", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            orcamini_13b = gr.Button("orcamini-13b", elem_id="orcamini-13b", elem_classes=["square"])
+                            gr.Markdown("Orca Mini", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            llama2_13b = gr.Button("llama2-13b", elem_id="llama2-13b", elem_classes=["square"])
+                            gr.Markdown("LLaMA 2", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            nous_hermes_13b_v2 = gr.Button("nous-hermes-13b-llama2", elem_id="nous-hermes-13b-llama2", elem_classes=["square"])
+                            gr.Markdown("Nous Hermes v2", elem_classes=["center"])
+                    gr.Markdown("## ~ 30B Parameters", visible=False)
+                    with gr.Row(elem_classes=["sub-container"], visible=False):
+                        with gr.Column(min_width=20):
+                            camel20b = gr.Button("camel-20b", elem_id="camel-20b", elem_classes=["square"])
+                            gr.Markdown("Camel", elem_classes=["center"])
+                    gr.Markdown("## ~ 40B Parameters")
+                    with gr.Row(elem_classes=["sub-container"]):
+                        with gr.Column(min_width=20):
+                            guanaco_33b = gr.Button("guanaco-33b", elem_id="guanaco-33b", elem_classes=["square"])
+                            gr.Markdown("Guanaco", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            falcon_40b = gr.Button("falcon-40b", elem_id="falcon-40b", elem_classes=["square"])
+                            gr.Markdown("Falcon", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizard_falcon_40b = gr.Button("wizard-falcon-40b", elem_id="wizard-falcon-40b", elem_classes=["square"])
+                            gr.Markdown("Wizard Falcon", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            samantha_33b = gr.Button("samantha-33b", elem_id="samantha-33b", elem_classes=["square"])
+                            gr.Markdown("Samantha", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            lazarus_30b = gr.Button("lazarus-30b", elem_id="lazarus-30b", elem_classes=["square"])
+                            gr.Markdown("Lazarus", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            chronos_33b = gr.Button("chronos-33b", elem_id="chronos-33b", elem_classes=["square"])
+                            gr.Markdown("Chronos", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizardlm_30b = gr.Button("wizardlm-30b", elem_id="wizardlm-30b", elem_classes=["square"])
+                            gr.Markdown("WizardLM", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            wizard_vicuna_30b = gr.Button("wizard-vicuna-30b", elem_id="wizard-vicuna-30b", elem_classes=["square"])
+                            gr.Markdown("Wizard Vicuna (Uncensored)", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            vicuna_33b_1_3 = gr.Button("vicuna-33b-1-3", elem_id="vicuna-33b-1-3", elem_classes=["square"])
+                            gr.Markdown("Vicuna 1.3", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            mpt_30b = gr.Button("mpt-30b", elem_id="mpt-30b", elem_classes=["square"])
+                            gr.Markdown("MPT", elem_classes=["center"])
+                        with gr.Column(min_width=20):
+                            upstage_llama_30b = gr.Button("upstage-llama-30b", elem_id="upstage-llama-30b", elem_classes=["square"])
+                            gr.Markdown("Upstage LLaMA", elem_classes=["center"])
+                    gr.Markdown("## ~ 70B Parameters")
+                    with gr.Row(elem_classes=["sub-container"]):
+                        with gr.Column(min_width=20):
+                            free_willy2_70b = gr.Button("free-willy2-70b", elem_id="free-willy2-70b", elem_classes=["square"])
+                            gr.Markdown("Free Willy 2", elem_classes=["center"])
+                    progress_view = gr.Textbox(label="Progress", elem_classes=["progress-view"])
+        with gr.Column(visible=False) as byom_input_view:
+            with gr.Column(elem_id="container3"):
+                gr.Markdown("# Bring Your Own Model", elem_classes=["center"])
+                gr.Markdown("### Model configuration")
+                byom_base = gr.Textbox(label="Base", placeholder="Enter path or 🤗 hub ID of the base model", interactive=True)
+                byom_ckpt = gr.Textbox(label="LoRA ckpt", placeholder="Enter path or 🤗 hub ID of the LoRA checkpoint", interactive=True)
+                with gr.Accordion("Advanced options", open=False):
+                    gr.Markdown("If you leave the below textboxes empty, `transformers.AutoModelForCausalLM` and `transformers.AutoTokenizer` classes will be used by default. If you need any specific class, please type them below.")
+                    byom_model_cls = gr.Textbox(label="Base model class", placeholder="Enter base model class", interactive=True)
+                    byom_tokenizer_cls = gr.Textbox(label="Base tokenizer class", placeholder="Enter base tokenizer class", interactive=True)
+                    with gr.Column():
+                        gr.Markdown("If you leave the below textboxes empty, any token ids for bos, eos, and pad will not be specified in `GenerationConfig`. If you think that you need to specify them. please type them below in decimal format.")
+                        with gr.Row():
+                            byom_bos_token_id = gr.Textbox(label="bos_token_id", placeholder="for GenConfig")
+                            byom_eos_token_id = gr.Textbox(label="eos_token_id", placeholder="for GenConfig")
+                            byom_pad_token_id = gr.Textbox(label="pad_token_id", placeholder="for GenConfig")
+                    with gr.Row():
+                        byom_load_mode = gr.Radio(
+                            load_mode_list,
+                            value=load_mode_list[0],
+                            label="load mode",
+                            elem_classes=["load-mode-selector"]
+                        )
+                gr.Markdown("### Prompt configuration")
+                prompt_style_selector = gr.Dropdown(
+                    label="Prompt style",
+                    interactive=True,
+                    choices=list(prompt_styles.keys()),
+                    value="Alpaca"
+                )
+                with gr.Accordion("Prompt style preview", open=False):
+                    prompt_style_previewer = gr.Textbox(
+                        label="How prompt is actually structured",
+                        lines=16,
+                        value=default_ppm.build_prompts())
+                with gr.Row():
+                    byom_back_btn = gr.Button("Back")
+                    byom_confirm_btn = gr.Button("Confirm")
+                with gr.Column(elem_classes=["progress-view"]):
+                    txt_view3 = gr.Textbox(label="Status")
+                    progress_view3 = gr.Textbox(label="Progress")
+        with gr.Column(visible=False) as model_review_view:
+            gr.Markdown("# Confirm the chosen model", elem_classes=["center"])
+            with gr.Column(elem_id="container2"):
+                gr.Markdown("Please expect loading time to be longer than expected. Depending on the size of models, it will probably take from 100 to 1000 seconds or so. Please be patient.")
+                with gr.Row():
+                    model_image = gr.Image(None, interactive=False, show_label=False)
+                    with gr.Column():
+                        model_name = gr.Markdown("**Model name**")
+                        model_desc = gr.Markdown("...")
+                        model_params = gr.Markdown("Parameters\n: ...")
+                        model_base = gr.Markdown("🤗 Hub(base)\n: ...")
+                        model_ckpt = gr.Markdown("🤗 Hub(LoRA)\n: ...")
+                        model_vram = gr.Markdown(f"""**Minimal VRAM requirement** :
+|          half precision        |        load_in_8bit       |         load_in_4bit      |
+| ------------------------------ | ------------------------- | ------------------------- |
+|   {round(7830/1024., 1)}GiB    | {round(5224/1024., 1)}GiB | {round(4324/1024., 1)}GiB |
+""")
+                        model_thumbnail_tiny = gr.Textbox("", visible=False)
+                with gr.Column():
+                    gen_config_path = gr.Dropdown(
+                        response_configs,
+                        value=response_configs[0],
+                        interactive=True,
+                        label="Gen Config(response)",
+                    )
+                    gen_config_sum_path = gr.Dropdown(
+                        summarization_configs,
+                        value=summarization_configs[0],
+                        interactive=True,
+                        label="Gen Config(summarization)",
+                        visible=False,
+                    )
+                    with gr.Row():
+                        load_mode = gr.Radio(
+                            load_mode_list,
+                            value=load_mode_list[0],
+                            label="load mode",
+                            elem_classes=["load-mode-selector"]
+                        )
+                        force_redownload = gr.Checkbox(label="Force Re-download", interactive=False, visible=False)
+                    with gr.Accordion("Example showcases", open=False):
+                        with gr.Tab("Ex1"):
+                            example_showcase1 = gr.Chatbot(
+                                [("hello", "world"), ("damn", "good")]
+                            )
+                        with gr.Tab("Ex2"):
+                            example_showcase2 = gr.Chatbot(
+                                [("hello", "world"), ("damn", "good")]
+                            )
+                        with gr.Tab("Ex3"):
+                            example_showcase3 = gr.Chatbot(
+                                [("hello", "world"), ("damn", "good")]
+                            )
+                        with gr.Tab("Ex4"):
+                            example_showcase4 = gr.Chatbot(
+                                [("hello", "world"), ("damn", "good")]
+                            )
+                with gr.Row():
+                    back_to_model_choose_btn = gr.Button("Back")
+                    confirm_btn = gr.Button("Confirm")
+                with gr.Column(elem_classes=["progress-view"]):
+                    txt_view = gr.Textbox(label="Status")
+                    progress_view2 = gr.Textbox(label="Progress")
+        with gr.Column(visible=False) as chat_view:
+            idx = gr.State(0)
+            chat_state = gr.State()
+            local_data = gr.JSON({}, visible=False)
+            with gr.Row():
+                with gr.Column(scale=1, min_width=180):
+                    gr.Markdown("GradioChat", elem_id="left-top")
+                    with gr.Column(elem_id="left-pane"):
+                        chat_back_btn = gr.Button("Back", elem_id="chat-back-btn")
+                        with gr.Accordion("Histories", elem_id="chat-history-accordion", open=False):
+                            channel_btns.append(gr.Button(channels[0], elem_classes=["custom-btn-highlight"]))
+                            for channel in channels[1:]:
+                                channel_btns.append(gr.Button(channel, elem_classes=["custom-btn"]))
+                with gr.Column(scale=8, elem_id="right-pane"):
+                    with gr.Column(
+                        elem_id="initial-popup", visible=False
+                    ) as example_block:
+                        with gr.Row(scale=1):
+                            with gr.Column(elem_id="initial-popup-left-pane"):
+                                gr.Markdown("GradioChat", elem_id="initial-popup-title")
+                                gr.Markdown("Making the community's best AI chat models available to everyone.")
+                            with gr.Column(elem_id="initial-popup-right-pane"):
+                                gr.Markdown("Chat UI is now open sourced on Hugging Face Hub")
+                                gr.Markdown("check out the [↗ repository](https://huggingface.co/spaces/chansung/test-multi-conv)")
+                        with gr.Column(scale=1):
+                            gr.Markdown("Examples")
+                            with gr.Row():
+                                for example in examples:
+                                    ex_btns.append(gr.Button(example, elem_classes=["example-btn"]))
+                    with gr.Column(elem_id="aux-btns-popup", visible=True):
+                        with gr.Row():
+                            stop = gr.Button("Stop", elem_classes=["aux-btn"])
+                            regenerate = gr.Button("Regen", interactive=False, elem_classes=["aux-btn"])
+                            clean = gr.Button("Clean", elem_classes=["aux-btn"])
+                    with gr.Accordion("Context Inspector", elem_id="aux-viewer", open=False):
+                        context_inspector = gr.Textbox(
+                            "",
+                            elem_id="aux-viewer-inspector",
+                            label="",
+                            lines=30,
+                            max_lines=50,
+                        )
+                    chatbot = gr.Chatbot(elem_id='chatbot')
+                    instruction_txtbox = gr.Textbox(placeholder="Ask anything", label="", elem_id="prompt-txt")
+            with gr.Accordion("Control Panel", open=False) as control_panel:
+                with gr.Column():
+                    with gr.Column():
+                        gr.Markdown("#### Global context")
+                        with gr.Accordion("global context will persist during conversation, and it is placed at the top of the prompt", open=False):
+                            global_context = gr.Textbox(
+                                "global context",
+                                lines=5,
+                                max_lines=10,
+                                interactive=True,
+                                elem_id="global-context"
+                            )
+                        gr.Markdown("#### Internet search")
+                        with gr.Row():
+                            internet_option = gr.Radio(choices=["on", "off"], value="off", label="mode")
+                            serper_api_key = gr.Textbox(
+                                value= "" if args.serper_api_key is None else args.serper_api_key,
+                                placeholder="Get one by visiting serper.dev",
+                                label="Serper api key"
+                            )
+                        gr.Markdown("#### GenConfig for **response** text generation")
+                        with gr.Row():
+                            res_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
+                            res_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
+                            res_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
+                            res_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
+                            res_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
+                            res_beams = gr.Slider(1, 4, 0, step=1, label="beams")
+                            res_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
+                            res_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
+                            res_eosid = gr.Number(value=0, visible=False, precision=0)
+                            res_padid = gr.Number(value=0, visible=False, precision=0)
+                    with gr.Column(visible=False):
+                        gr.Markdown("#### GenConfig for **summary** text generation")
+                        with gr.Row():
+                            sum_temp = gr.Slider(0.0, 2.0, 0, step=0.1, label="temp", interactive=True)
+                            sum_topp = gr.Slider(0.0, 2.0, 0, step=0.1, label="top_p", interactive=True)
+                            sum_topk = gr.Slider(20, 1000, 0, step=1, label="top_k", interactive=True)
+                            sum_rpen = gr.Slider(0.0, 2.0, 0, step=0.1, label="rep_penalty", interactive=True)
+                            sum_mnts = gr.Slider(64, 2048, 0, step=1, label="new_tokens", interactive=True)
+                            sum_beams = gr.Slider(1, 8, 0, step=1, label="beams", interactive=True)
+                            sum_cache = gr.Radio([True, False], value=0, label="cache", interactive=True)
+                            sum_sample = gr.Radio([True, False], value=0, label="sample", interactive=True)
+                            sum_eosid = gr.Number(value=0, visible=False, precision=0)
+                            sum_padid = gr.Number(value=0, visible=False, precision=0)
+                    with gr.Column():
+                        gr.Markdown("#### Context managements")
+                        with gr.Row():
+                            ctx_num_lconv = gr.Slider(2, 10, 3, step=1, label="number of recent talks to keep", interactive=True)
+                            ctx_sum_prompt = gr.Textbox(
+                                "summarize our conversations. what have we discussed about so far?",
+                                label="design a prompt to summarize the conversations",
+                                visible=False
+                            )
+            btns = [
+                t5_vicuna_3b, flan3b, camel5b, alpaca_lora7b, stablelm7b,
+                gpt4_alpaca_7b, os_stablelm7b, mpt_7b, redpajama_7b, redpajama_instruct_7b, llama_deus_7b,
+                evolinstruct_vicuna_7b, alpacoom_7b, baize_7b, guanaco_7b, vicuna_7b_1_3,
+                falcon_7b, wizard_falcon_7b, airoboros_7b, samantha_7b, openllama_7b, orcamini_7b,
+                xgen_7b,llama2_7b,
+                flan11b, koalpaca, kullm, alpaca_lora13b, gpt4_alpaca_13b, stable_vicuna_13b,
+                starchat_15b, starchat_beta_15b, vicuna_7b, vicuna_13b, evolinstruct_vicuna_13b,
+                baize_13b, guanaco_13b, nous_hermes_13b, airoboros_13b, samantha_13b, chronos_13b,
+                wizardlm_13b, wizard_vicuna_13b, wizard_coder_15b, vicuna_13b_1_3, openllama_13b, orcamini_13b,
+                llama2_13b, nous_hermes_13b_v2, camel20b,
+                guanaco_33b, falcon_40b, wizard_falcon_40b, samantha_33b, lazarus_30b, chronos_33b,
+                wizardlm_30b, wizard_vicuna_30b, vicuna_33b_1_3, mpt_30b, upstage_llama_30b,
+                free_willy2_70b
+            ]
+            for btn in btns:
+                btn.click(
+                    move_to_second_view,
+                    btn,
+                    [
+                        model_choice_view, model_review_view,
+                        model_image, model_name, model_params, model_base, model_ckpt,
+                        model_desc, model_vram, gen_config_path,
+                        example_showcase1, example_showcase2, example_showcase3, example_showcase4,
+                        model_thumbnail_tiny, load_mode,
+                        progress_view
+                    ]
+                )
+            select_model.click(
+                move_to_model_select_view,
+                None,
+                [progress_view0, landing_view, model_choice_view]
+            )
+            chosen_model.click(
+                use_chosen_model,
+                None,
+                [progress_view0, landing_view, chat_view, chatbot, chat_state, global_context,
+                res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+                sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid]
+            )
+            byom.click(
+                move_to_byom_view,
+                None,
+                [progress_view0, landing_view, byom_input_view, byom_load_mode]
+            )
+            byom_back_btn.click(
+                move_to_first_view,
+                None,
+                [landing_view, byom_input_view]
+            )
+            byom_confirm_btn.click(
+                lambda: "Start downloading/loading the model...", None, txt_view3
+            ).then(
+                byom_load,
+                [byom_base, byom_ckpt, byom_model_cls, byom_tokenizer_cls,
+                byom_bos_token_id, byom_eos_token_id, byom_pad_token_id,
+                byom_load_mode],
+                [progress_view3]
+            ).then(
+                lambda: "Model is fully loaded...", None, txt_view3
+            ).then(
+                move_to_third_view,
+                None,
+                [progress_view3, byom_input_view, chat_view, chatbot, chat_state, global_context,
+                res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+                sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid]
+            )
+            prompt_style_selector.change(
+                prompt_style_change,
+                prompt_style_selector,
+                prompt_style_previewer
+            )
+            back_to_model_choose_btn.click(
+                move_to_first_view,
+                None,
+                [model_choice_view, model_review_view]
+            )
+            confirm_btn.click(
+                lambda: "Start downloading/loading the model...", None, txt_view
+            ).then(
+                download_completed,
+                [model_name, model_base, model_ckpt, gen_config_path, gen_config_sum_path, load_mode, model_thumbnail_tiny, force_redownload],
+                [progress_view2]
+            ).then(
+                lambda: "Model is fully loaded...", None, txt_view
+            ).then(
+                lambda: time.sleep(2), None, None
+            ).then(
+                move_to_third_view,
+                None,
+                [progress_view2, model_review_view, chat_view, chatbot, chat_state, global_context,
+                res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+                sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid]
+            )
+            for btn in channel_btns:
+                btn.click(
+                    set_chatbot,
+                    [btn, local_data, chat_state],
+                    [chatbot, idx, example_block, regenerate]
+                ).then(
+                    None, btn, None,
+                    _js=UPDATE_LEFT_BTNS_STATE
+                )
+            for btn in ex_btns:
+                btn.click(
+                    set_example,
+                    [btn],
+                    [instruction_txtbox, example_block]
+                )
+            instruction_txtbox.submit(
+                lambda: [
+                    gr.update(visible=False),
+                    gr.update(interactive=True)
+                ],
+                None,
+                [example_block, regenerate]
+            )
+            send_event = instruction_txtbox.submit(
+                central.chat_stream,
+                [idx, local_data, instruction_txtbox, chat_state,
+                global_context, ctx_num_lconv, ctx_sum_prompt,
+                res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+                sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+                internet_option, serper_api_key],
+                [instruction_txtbox, chatbot, context_inspector, local_data],
+            )
+            instruction_txtbox.submit(
+                None, local_data, None,
+                _js="(v)=>{ setStorage('local_data',v) }"
+            )
+            regenerate.click(
+                rollback_last,
+                [idx, local_data, chat_state],
+                [instruction_txtbox, chatbot, local_data, regenerate]
+            ).then(
+                central.chat_stream,
+                [idx, local_data, instruction_txtbox, chat_state,
+                global_context, ctx_num_lconv, ctx_sum_prompt,
+                res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+                sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+                internet_option, serper_api_key],
+                [instruction_txtbox, chatbot, context_inspector, local_data],
+            ).then(
+                lambda: gr.update(interactive=True),
+                None,
+                regenerate
+            ).then(
+                None, local_data, None,
+                _js="(v)=>{ setStorage('local_data',v) }"
+            )
+            stop.click(
+                None, None, None,
+                cancels=[send_event]
+            )
+            clean.click(
+                reset_chat,
+                [idx, local_data, chat_state],
+                [instruction_txtbox, chatbot, local_data, example_block, regenerate]
+            ).then(
+                None, local_data, None,
+                _js="(v)=>{ setStorage('local_data',v) }"
+            )
+            chat_back_btn.click(
+                lambda: [gr.update(visible=False), gr.update(visible=True)],
+                None,
+                [chat_view, landing_view]
+            )
+            demo.load(
+              None,
+              inputs=None,
+              outputs=[chatbot, local_data],
+              _js=GET_LOCAL_STORAGE,
+            )
+    demo.queue().launch(
+        server_port=6006,
+        server_name="0.0.0.0",
+        debug=args.debug,
+        share=args.share,
+        root_path=f"{args.root_path}"
+    )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--root-path', default="")
+    parser.add_argument('--local-files-only', default=False, action=argparse.BooleanOptionalAction)
+    parser.add_argument('--share', default=False, action=argparse.BooleanOptionalAction)
+    parser.add_argument('--debug', default=False, action=argparse.BooleanOptionalAction)
+    parser.add_argument('--serper-api-key', default=None, type=str)
+    args = parser.parse_args()
+    gradio_main(args)

assets/guimode_preview.gif ADDED Viewed

Git LFS Details

SHA256: a8ed57c6d4bca465aaa8490d21ca45b9e4f82c17d36e40e219d7ac0236b7c9e0
Pointer size: 132 Bytes
Size of remote file: 2.37 MB

assets/preview.gif ADDED Viewed

Git LFS Details

SHA256: c7df81c43bfe1327bf222473e06f492b15b3cf64054df27381951ba15a1172ff
Pointer size: 132 Bytes
Size of remote file: 9.76 MB

assets/preview.png ADDED Viewed

Git LFS Details

SHA256: 62e092ae338a5970423a3c4fd1e4caf2280d3083cd3a7eb2612d3bf3c5b80c67
Pointer size: 132 Bytes
Size of remote file: 1.1 MB

channels.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+1st Channel
+2nd Channel
+3rd Channel
+4th Channel
+5th Channel
+6th Channel
+7th Channel
+8th Channel
+9th Channel
+10th Channel

chats/__init__.py ADDED Viewed

File without changes

chats/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (129 Bytes). View file

chats/__pycache__/alpaca.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/alpaca_gpt4.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/alpacoom.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/baize.cpython-39.pyc ADDED Viewed

Binary file (1.87 kB). View file

chats/__pycache__/central.cpython-39.pyc ADDED Viewed

Binary file (5.45 kB). View file

chats/__pycache__/custom.cpython-39.pyc ADDED Viewed

Binary file (1.88 kB). View file

chats/__pycache__/falcon.cpython-39.pyc ADDED Viewed

Binary file (2.09 kB). View file

chats/__pycache__/flan_alpaca.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/freewilly.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/guanaco.cpython-39.pyc ADDED Viewed

Binary file (2.09 kB). View file

chats/__pycache__/koalpaca.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/llama2.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/mpt.cpython-39.pyc ADDED Viewed

Binary file (2.34 kB). View file

chats/__pycache__/os_stablelm.cpython-39.pyc ADDED Viewed

Binary file (2.12 kB). View file

chats/__pycache__/post.cpython-39.pyc ADDED Viewed

Binary file (285 Bytes). View file

chats/__pycache__/pre.cpython-39.pyc ADDED Viewed

Binary file (2.19 kB). View file

chats/__pycache__/redpajama.cpython-39.pyc ADDED Viewed

Binary file (2.56 kB). View file

chats/__pycache__/stable_vicuna.cpython-39.pyc ADDED Viewed

Binary file (2.32 kB). View file

chats/__pycache__/stablelm.cpython-39.pyc ADDED Viewed

Binary file (2.11 kB). View file

chats/__pycache__/starchat.cpython-39.pyc ADDED Viewed

Binary file (2.1 kB). View file

chats/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.73 kB). View file

chats/__pycache__/vicuna.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

chats/__pycache__/wizard_coder.cpython-39.pyc ADDED Viewed

Binary file (2.09 kB). View file

chats/__pycache__/wizard_falcon.cpython-39.pyc ADDED Viewed

Binary file (2.1 kB). View file

chats/__pycache__/xgen.cpython-39.pyc ADDED Viewed

Binary file (2.4 kB). View file

chats/alpaca.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/alpaca_gpt4.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/alpacoom.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/baize.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, internet_search
+def text_stream(ppmanager, streamer):
+    count = 0
+    for new_text in streamer:
+        if "[|Human|]" in new_text or \
+            "[|AI|]" in new_text:
+            break
+        if count == 0:
+            ppmanager.append_pong(f"![]({global_vars.model_thumbnail_tiny})***[{global_vars.model_type}]***\n")
+            count = count + 1
+        ppmanager.append_pong(new_text)
+        yield ppmanager, ppmanager.build_uis()
+    yield ppmanager, ppmanager.build_uis()
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/central.py ADDED Viewed

	@@ -0,0 +1,380 @@

+from chats import stablelm
+from chats import alpaca
+from chats import koalpaca
+from chats import flan_alpaca
+from chats import os_stablelm
+from chats import vicuna
+from chats import stable_vicuna
+from chats import starchat
+from chats import wizard_coder
+from chats import redpajama
+from chats import mpt
+from chats import alpacoom
+from chats import baize
+from chats import guanaco
+from chats import falcon
+from chats import wizard_falcon
+from chats import xgen
+from chats import llama2
+from chats import freewilly
+from chats import custom
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    model_type = state["model_type"]
+    if internet_option == "on" and serper_api_key.strip() != "":
+        internet_option = True
+    else:
+        internet_option = False
+    if model_type == "custom":
+        cs = custom.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "free-willy":
+        cs = freewilly.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "upstage-llama":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "llama2":
+        cs = llama2.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "xgen":
+        cs = xgen.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "stablelm":
+        cs = stablelm.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "falcon":
+        cs = falcon.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "wizard-falcon":
+        cs = wizard_falcon.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "baize":
+        cs = baize.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "alpaca":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "openllama":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "orcamini":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "alpaca-gpt4":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "nous-hermes":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "replit-instruct":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "alpacoom":
+        cs = alpacoom.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "llama-deus":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "camel":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "koalpaca-polyglot":
+        cs = koalpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "kullm-polyglot":
+        cs = koalpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "flan-alpaca":
+        cs = flan_alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "os-stablelm":
+        cs = os_stablelm.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "t5-vicuna":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "stable-vicuna":
+        cs = stable_vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "vicuna":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "wizardlm":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "wizard-vicuna":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "airoboros":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "samantha-vicuna":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "evolinstruct-vicuna":
+        cs = vicuna.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "starchat":
+        cs = starchat.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "wizard-coder":
+        cs = wizard_coder.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "mpt":
+        cs = mpt.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "redpajama":
+        cs = redpajama.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "redpajama-instruct":
+        cs = redpajama.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "guanaco":
+        cs = guanaco.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "lazarus":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    elif model_type == "chronos":
+        cs = alpaca.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+            internet_option, serper_api_key
+        )
+    for idx, x in enumerate(cs):
+        yield x

chats/custom.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, internet_search
+def text_stream(ppmanager, streamer):
+    count = 0
+    thumbnail_tiny = "https://i.ibb.co/f80BpgR/byom.png"
+    for new_text in streamer:
+        if count == 0:
+            ppmanager.append_pong(f"![]({global_vars.model_thumbnail_tiny})***[{global_vars.model_type}]***\n")
+            count = count + 1
+        ppmanager.append_pong(new_text)
+        yield ppmanager, ppmanager.build_uis()
+    yield ppmanager, ppmanager.build_uis()
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/falcon.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+from transformers import StoppingCriteria, StoppingCriteriaList
+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+class StopOnTokens(StoppingCriteria):
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [11]
+        for stop_id in stop_ids:
+            if input_ids[0][-1] == stop_id:
+                return True
+        return False
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/flan_alpaca.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/freewilly.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+#     output = f"![]({global_vars.model_thumbnail_tiny})***[{global_vars.model_type}]***\n"
+#     inputs = global_vars.tokenizer(
+#         prompt, return_tensors="pt"
+#     ).to(global_vars.device)
+#     output = output + global_vars.model.generate(
+#         **inputs,
+#         temperature=res_temp,
+#         do_sample=res_sample,
+#         top_p=res_topp,
+#         top_k=res_topk,
+#         repetition_penalty=res_rpen,
+#         num_beams=res_beams,
+#         use_cache=res_cache,
+#         eos_token_id=res_eosid,
+#         pad_token_id=res_padid,
+#         max_new_tokens=res_mnts
+#     )
+#     ppm.replace_last_pong(output)
+#     yield "", ppm.build_uis(), prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/guanaco.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from transformers import StoppingCriteria, StoppingCriteriaList
+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+class StopOnTokens(StoppingCriteria):
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_token_ids = [0]
+        for stop_id in stop_token_ids:
+            if input_ids[0][-1] == stop_id:
+                return True
+        return False
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)

chats/koalpaca.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+from chats.utils import build_prompts, text_stream, internet_search
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+    ppm = res[idx]
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts,
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)