Hiroaki Ogasawara commited on
Commit
e642ef1
1 Parent(s): fb1a07e

chore: separate load script

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -9,6 +9,21 @@ from utils import evaluate, report
9
  from transformers import AutoTokenizer
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def process_jsonl_file(jsonl_file_path: str, api_key: str):
13
  try:
14
  content = open(jsonl_file_path, "r", encoding="utf-8").readlines()
@@ -32,7 +47,7 @@ def process_jsonl_file(jsonl_file_path: str, api_key: str):
32
  ) as temp_file:
33
  temp_file.write(html_content)
34
  output_file = temp_file.name
35
-
36
  output_csv = None
37
  keys = json_data[0].keys()
38
  with tempfile.NamedTemporaryFile(
@@ -53,8 +68,7 @@ def process_jsonl_file(jsonl_file_path: str, api_key: str):
53
  return None, None, e
54
 
55
 
56
- # Gradioデモ
57
- with gr.Blocks() as reporting:
58
  jsonl_input = gr.File(label="JSONLファイルをアップロード")
59
  api_key_input = gr.Textbox(
60
  label="GeminiのAPIキー(スコアのセルフ評価を行う場合)", type="password"
@@ -72,6 +86,8 @@ with gr.Blocks() as reporting:
72
  outputs=[output_file, output_csv, output_text],
73
  )
74
 
 
 
75
  llm_jp_3 = "llm-jp/llm-jp-3-1.8b"
76
  gemma_2 = "google/gemma-2-2b"
77
 
@@ -102,20 +118,8 @@ def tokenize_text(text: str, tokenizer_name: str):
102
  token_count = len(tokens)
103
  return f"<p>{tokenized_text}</p><p>Token Count: {token_count}</p>"
104
 
105
- # https://x.com/abidlabs/status/1721548226250371264/photo/1
106
- ga_script = """
107
- <!-- Google tag (gtag.js) -->
108
- <script async src="https://www.googletagmanager.com/gtag/js?id=G-0SHLFV3PV0"></script>
109
- <script>
110
- window.dataLayer = window.dataLayer || [];
111
- function gtag(){dataLayer.push(arguments);}
112
- gtag('js', new Date());
113
-
114
- gtag('config', 'G-0SHLFV3PV0');
115
- </script>
116
- """
117
 
118
- with gr.Blocks(head=ga_script) as tokenization:
119
  with gr.Row():
120
  tokenizer_dropdown = gr.Dropdown(
121
  label="Tokenizerを選択", choices=tokenizer_names, value=tokenizer_names[0]
 
9
  from transformers import AutoTokenizer
10
 
11
 
12
+ # https://x.com/abidlabs/status/1721548226250371264/photo/1
13
+ # https://github.com/gradio-app/gradio/issues/5954
14
+ ga_script = """
15
+ <script async src="https://www.googletagmanager.com/gtag/js?id=G-0SHLFV3PV0"></script>
16
+ """
17
+ ga_load = """
18
+ function() {
19
+ window.dataLayer = window.dataLayer || [];
20
+ function gtag(){dataLayer.push(arguments);}
21
+ gtag('js', new Date());
22
+
23
+ gtag('config', 'G-0SHLFV3PV0');
24
+ }
25
+ """
26
+
27
  def process_jsonl_file(jsonl_file_path: str, api_key: str):
28
  try:
29
  content = open(jsonl_file_path, "r", encoding="utf-8").readlines()
 
47
  ) as temp_file:
48
  temp_file.write(html_content)
49
  output_file = temp_file.name
50
+
51
  output_csv = None
52
  keys = json_data[0].keys()
53
  with tempfile.NamedTemporaryFile(
 
68
  return None, None, e
69
 
70
 
71
+ with gr.Blocks(head=ga_script) as reporting:
 
72
  jsonl_input = gr.File(label="JSONLファイルをアップロード")
73
  api_key_input = gr.Textbox(
74
  label="GeminiのAPIキー(スコアのセルフ評価を行う場合)", type="password"
 
86
  outputs=[output_file, output_csv, output_text],
87
  )
88
 
89
+ reporting.load(None, js=ga_load)
90
+
91
  llm_jp_3 = "llm-jp/llm-jp-3-1.8b"
92
  gemma_2 = "google/gemma-2-2b"
93
 
 
118
  token_count = len(tokens)
119
  return f"<p>{tokenized_text}</p><p>Token Count: {token_count}</p>"
120
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ with gr.Blocks() as tokenization:
123
  with gr.Row():
124
  tokenizer_dropdown = gr.Dropdown(
125
  label="Tokenizerを選択", choices=tokenizer_names, value=tokenizer_names[0]