Uploaded model

  • Developed by: hama-jp
  • License: Gemma Terms of Use
  • Finetuned from model : google/gemma-2-27b :: Improved using Qwen

This gemma2 model was trained 2x faster with Unsloth and Huggingface's TRL library.

output.jsonlの生成方法

%%capture
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

# Install Flash Attention 2 for softcapping support
import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"
from unsloth import FastLanguageModel
import torch
import json

max_seq_length = 4096 
dtype = None 
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "hama-jp/gemma2-27b-sft-241213-lora-06",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
#@title ELYZA-tasks-100-TVの読み込み
import json

# testファイルのパスを指定
file_path = 'elyza-tasks-100-TV_0.jsonl'

# データセットの辞書を初期化
dataset_test = {}

# JSONLファイルを読み込む
with open(file_path, 'r', encoding='utf-8') as file:
    for line in file:
        # 各行をJSON形式で読み取る
        task_data = json.loads(line.strip())
        # task_idとinputを取得
        task_id = task_data.get("task_id")
        input_data = task_data.get("input")
        # task_idをキーにしてdataset_testに格納
        if task_id is not None:
            dataset_test[task_id] = {"input": input_data}

EOS_TOKEN = tokenizer.eos_token

# プロンプトテンプレート
alpaca_prompt = """### 指示
以下の入力に従って適切に処理してください。
### 入力:
{}
### 出力:
"""

# dataset_testに"text"キーを追加
for task_id, content in dataset_test.items():
    input_text = content["input"]
    prompt_text = alpaca_prompt.format(input_text) + EOS_TOKEN
    dataset_test[task_id]["text"] = prompt_text
from unsloth import FastLanguageModel


FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

def extract_response(full_text):
    """
    Extracts the response part after '### 出力:'.
    Assumes the response starts after ':\n### 出力' and removes any trailing whitespace.
    """
    response_marker = "\n### 出力:"
    if response_marker in full_text:
        return full_text.split(response_marker, 1)[-1].strip()
    return full_text.strip()

with open("output.jsonl", "w", encoding="utf-8") as outfile:
    for i in range(100):
        # Get the input text
        input_text = dataset_test[i]["text"]

        # Tokenize and move input to GPU
        inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

        # Generate output
        output = model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.15,
            repetition_penalty=1.05,
            use_cache=True,
            do_sample=True
        )

        # Decode output text
        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

        # Extract only the response part
        response_only = extract_response(decoded_output)

        # Print for debugging
        print("task_id:",i)
        print("input:",dataset_test[i]["input"])
        print("output:",response_only)
        print("---")

        # Prepare a dictionary for JSONL
        result = {
            "task_id": i,
            "input": dataset_test[i]["input"],
            "output": response_only
        }

        # Save to JSONL
        outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for hama-jp/gemma2-27b-sft-241213-lora-06

Base model

google/gemma-2-27b
Finetuned
(53)
this model