Usage for VLLM

from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline

BASE_MODEL = "sh2orc/gemma-1.1-korean-7b-it"

llm = LLM(model=BASE_MODEL)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

instruction = '독도에 λŒ€ν•΄μ„œ μ•Œλ €μ€˜'

messages = [
    {
      "role": "user",
      "content": instruction
    }, 
]


prompt_message = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True,
)

eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<end_of_turn>")]

outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.2, top_p=0.8,max_tokens=4096))

for output in outputs:
    propt = output.prompt
    generated_text = output.outputs[0].text
    print(generated_text)

Result

λ…λ„λŠ” 동해에 μœ„μΉ˜ν•œ λŒ€ν•œλ―Όκ΅­μ˜ νŠΉλ³„μžμΉ˜λ„λ‘œ, 면적은 5.8km이며, 행정ꡬ역은 λ…λ„κ΅°μž…λ‹ˆλ‹€. λ…λ„μ˜ 면적은 λŒ€λž΅ 5.8km이며, μΈκ΅¬λŠ” μ•½ 10λͺ…μž…λ‹ˆλ‹€. λ…λ„λŠ” ν•œκ΅­κ³Ό 일본의 ꡭ경을 ν˜•μ„±ν•˜κ³  있으며, λ…λ„μ˜ 주민듀은 일본의 영ν–₯을 많이 λ°›μ•˜μŠ΅λ‹ˆλ‹€. λ…λ„μ˜ κ²½μ œλŠ” κ΄€κ΄‘κ³Ό μ†Œκ·œλͺ¨μ˜ 어업이 μ£Όλ₯Ό 이루며, λ…λ„λŠ” 1949λ…„λΆ€ν„° 1954λ…„κΉŒμ§€ 일본에 μ˜ν•΄ μ λ Ήλ˜μ—ˆμŠ΅λ‹ˆλ‹€. λ…λ„λŠ” 1954λ…„ 인천 κ°•ν™” μ‘°μ•½μœΌλ‘œ 인해 μš°λ¦¬λ‚˜λΌμ˜ μ˜ν† κ°€ λ˜μ—ˆμŠ΅λ‹ˆλ‹€.
Downloads last month
11
Safetensors
Model size
8.54B params
Tensor type
BF16
Β·
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.