Spaces:

Shuu12121
/

code-doc-generator

Running on Zero

Shuu12121 commited on Apr 14

Commit

986ff0b

verified ·

1 Parent(s): 501d796

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,15 +70,16 @@ def generate_docstring(code: str) -> str:
             output_ids = model.generate(
                 input_ids=inputs.input_ids,
                 attention_mask=inputs.attention_mask,
-                max_length=256,           # 生成するDocstringの最大長
-                num_beams=5,              # ビームサーチのビーム数
-                early_stopping=False,      # 早く停止させるか
-                # decoder_start_token_idは通常model.configから自動設定される
-                eos_token_id=decoder_tokenizer.eos_token_id, # EOSトークンID
-                pad_token_id=pad_token_id, # PADトークンID (Noneでないことを保証)
-                no_repeat_ngram_size=2    # 繰り返さないN-gramサイズ
             )
         print(f"Generated output tokens length: {output_ids.shape[1]}")
         # デコードしてテキストに変換

             output_ids = model.generate(
                 input_ids=inputs.input_ids,
                 attention_mask=inputs.attention_mask,
+                max_length=128,
+                num_beams=4,
+                early_stopping=True,
+                eos_token_id=decoder_tokenizer.eos_token_id,
+                pad_token_id=pad_token_id,
+                no_repeat_ngram_size=3,
+                bad_words_ids=decoder_tokenizer(["sexual", "abuse", "child"], add_special_tokens=False).input_ids
             )
         print(f"Generated output tokens length: {output_ids.shape[1]}")
         # デコードしてテキストに変換