Uploaded model

  • Developed by: 84basi
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.


%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

from unsloth import FastLanguageModel
import torch
import json

model_name = "84basi/llm-jp-3-13b-finetune-2.1"
token = "Hugging Face Token" #@param {type:"string"}

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = token,
)
FastLanguageModel.for_inference(model)

datasets = []
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):
        datasets.append(json.loads(item))
        item = ""

from tqdm import tqdm

results = []
for dt in tqdm(datasets):
  input = dt["input"]
  prompt = f"""### 指示\n{input}\n### 回答\n"""
  inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
  outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
  results.append({"task_id": dt["task_id"], "input": input, "output": prediction})

with open(f"/content/llm-jp-3-13b-finetune-2.1_output-2.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')

!pip install python-docx

import json

from docx import Document  # pip install python-docxでインストールする
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH


def read_jsonl_data(jsonl_path):
    """
    提出用jsonlを読み、json形式で返す

    Args:
        jsonl_path (str): 提出用jsonlへのパス

    Returns:
        jsonデータ (list of dict)
    """
    results = []
    with open(jsonl_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    results.append(json.loads(line))
                except json.JSONDecodeError as e:
                    print(f"JSONデコードエラー(行内容を確認してください): {e}")
    return results


def json_to_word(json_data, output_file):
    """
    JSONデータをWord文書に変換する

    Args:
        json_data (list of dict): JSONデータのリスト
        output_file (str): 出力するWordファイルの名前
    """
    doc = Document()

    title = doc.add_heading('LLM Output Analysis', 0)
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER

    for item in json_data:
        task_id = item.get("task_id", "No Task ID")
        doc.add_heading(f'Task ID: {task_id}', level=1)

        doc.add_heading('Input:', level=2)
        input_text = item.get("input", "No Input")
        input_para = doc.add_paragraph()
        input_para.add_run(input_text).bold = False

        doc.add_heading('Output:', level=2)
        output_text = item.get("output", "No Output")
        output_para = doc.add_paragraph()
        output_para.add_run(output_text).bold = False

        doc.add_paragraph('=' * 50)
    doc.save(output_file)

jsonl_path = '/content/llm-jp-3-13b-finetune-2.1_output-2.jsonl'
output_file = '/content/llm-jp-3-13b-finetune-2.1_output-2.docx'
jsonl_data = read_jsonl_data(jsonl_path)
json_to_word(jsonl_data, output_file)
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for 84basi/llm-jp-3-13b-finetune-2.1

Finetuned
(1118)
this model