nyuuzyou commited on
Commit
e6115bb
1 Parent(s): 3724b41

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +124 -0
  3. requirements.txt +8 -0
  4. style.css +16 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: EuroLLM 9B Instruct
3
- emoji: 🦀
4
  colorFrom: red
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 5.9.0
8
  app_file: app.py
 
1
  ---
2
  title: EuroLLM 9B Instruct
3
+ emoji:
4
  colorFrom: red
5
+ colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.9.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ from threading import Thread
5
+ from typing import Iterator
6
+ import spaces
7
+ import gradio as gr
8
+ import torch
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
10
+
11
+ MAX_MAX_NEW_TOKENS = 2048
12
+ DEFAULT_MAX_NEW_TOKENS = 1024
13
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
14
+
15
+ if not torch.cuda.is_available():
16
+ DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
17
+
18
+ if torch.cuda.is_available():
19
+ model_id = "utter-project/EuroLLM-9B-Instruct"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
21
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
22
+
23
+ @spaces.GPU
24
+ def generate(
25
+ message: str,
26
+ chat_history: list[tuple[str, str]],
27
+ max_new_tokens: int = 1024,
28
+ temperature: float = 0.06,
29
+ top_p: float = 0.95,
30
+ top_k: int = 40,
31
+ repetition_penalty: float = 1.2,
32
+ ) -> Iterator[str]:
33
+
34
+ historical_text = ""
35
+ #Prepend the entire chat history to the message with new lines between each message
36
+ for user, assistant in chat_history:
37
+ historical_text += f"\n{user}\n{assistant}"
38
+
39
+ if len(historical_text) > 0:
40
+ message = historical_text + f"\n{message}"
41
+ input_ids = tokenizer([message], return_tensors="pt").input_ids
42
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
43
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
44
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
45
+ input_ids = input_ids.to(model.device)
46
+
47
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
48
+ generate_kwargs = dict(
49
+ {"input_ids": input_ids},
50
+ streamer=streamer,
51
+ max_new_tokens=max_new_tokens,
52
+ do_sample=True,
53
+ top_p=top_p,
54
+ top_k=top_k,
55
+ temperature=temperature,
56
+ num_beams=1,
57
+ pad_token_id = tokenizer.eos_token_id,
58
+ repetition_penalty=repetition_penalty,
59
+ no_repeat_ngram_size=5,
60
+ early_stopping=False,
61
+ )
62
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
63
+ t.start()
64
+
65
+ outputs = []
66
+ for text in streamer:
67
+ outputs.append(text)
68
+ yield "".join(outputs)
69
+
70
+
71
+ chat_interface = gr.ChatInterface(
72
+ fn=generate,
73
+ additional_inputs=[
74
+ gr.Slider(
75
+ label="Max new tokens",
76
+ minimum=1,
77
+ maximum=MAX_MAX_NEW_TOKENS,
78
+ step=1,
79
+ value=DEFAULT_MAX_NEW_TOKENS,
80
+ ),
81
+ gr.Slider(
82
+ label="Temperature",
83
+ minimum=0.1,
84
+ maximum=1.2,
85
+ step=0.1,
86
+ value=0.2,
87
+ ),
88
+ gr.Slider(
89
+ label="Top-p (nucleus sampling)",
90
+ minimum=0.05,
91
+ maximum=1.0,
92
+ step=0.05,
93
+ value=0.9,
94
+ ),
95
+ gr.Slider(
96
+ label="Top-k",
97
+ minimum=1,
98
+ maximum=1000,
99
+ step=1,
100
+ value=50,
101
+ ),
102
+ gr.Slider(
103
+ label="Repetition penalty",
104
+ minimum=1.0,
105
+ maximum=2.0,
106
+ step=0.05,
107
+ value=1.2,
108
+ ),
109
+ ],
110
+ stop_btn=None,
111
+ examples=[
112
+ ["Describe the significance of the Eiffel Tower in French culture and history."],
113
+ ["Что такое 'загадочная русская душа' и как это понятие отражается в русской литературе?"], # Russian: What is the "mysterious Russian soul" and how is this concept reflected in Russian literature?
114
+ ["Jakie są najbardziej znane polskie tradycje bożonarodzeniowe?"], # Polish: What are the most well-known Polish Christmas traditions?
115
+ ["Welche Rolle spielte die Hanse im mittelalterlichen Europa?"], # German: What role did the Hanseatic League play in medieval Europe?
116
+ ["日本の茶道の精神と作法について説明してください。"] # Japanese: Please explain the spirit and etiquette of Japanese tea ceremony.
117
+ ],
118
+ )
119
+
120
+ with gr.Blocks(css="style.css") as demo:
121
+ chat_interface.render()
122
+
123
+ if __name__ == "__main__":
124
+ demo.queue(max_size=20).launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.28.0
2
+ gradio==4.28.2
3
+ scipy==1.12.0
4
+ sentencepiece==0.2.0
5
+ spaces==0.26.2
6
+ torch==2.1.1
7
+ transformers==4.40.1
8
+ tokenizers==0.19.1
style.css ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ }
4
+
5
+ #duplicate-button {
6
+ margin: auto;
7
+ color: white;
8
+ background: #1565c0;
9
+ border-radius: 100vh;
10
+ }
11
+
12
+ .contain {
13
+ max-width: 900px;
14
+ margin: auto;
15
+ padding-top: 1.5rem;
16
+ }