BioQwen: A Small-Parameter, High-Performance Bilingual Model for Biomedical Multi-Tasks

For model inference, please refer to the following example code:

import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM

transformers.logging.set_verbosity_error()
max_length = 512
model_path = 'yueqingyou/BioQwen-1.8B'
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2').eval()

def predict(prompt):
    zh_system = "你是千问生物智能助手,一个专注于生物领域的先进人工智能。"
    en_system = "You are BioQwen, an advanced AI specializing in the field of biology."
    
    english_count, chinese_count = 0, 0
    for char in prompt:
        if '\u4e00' <= char <= '\u9fff':
            chinese_count += 1
        elif 'a' <= char.lower() <= 'z':
            english_count += 1
    lang = 'zh' if chinese_count > english_count else 'en'
    
    messages = [
        {"role": "system", "content": zh_system if lang == 'zh' else en_system},
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    model_inputs = tokenizer([text], return_tensors="pt").to('cuda')

    with torch.no_grad():
        generated_ids = model.generate(
            model_inputs.input_ids,
            max_new_tokens=max_length,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            top_p = 0.9,
            temperature = 0.3,
            repetition_penalty = 1.1
        )

    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    return response.strip()

prompt = 'I am suffering from irregular periods. I am currently taking medication Levothyroxine 50. My T3 is 0.87 ng/mL, T4 is 8.30 ug/dL, TSH is 2.43 uIU/mL. I am 34 years old, weigh 75 kg, and 5 feet tall. Please advice.'
print(f'Question:\t{prompt}\n\nAnswer:\t{predict(prompt)}')

For more detailed information and code, please refer to GitHub.

Downloads last month
11
Safetensors
Model size
1.84B params
Tensor type
BF16
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for liyinghong/BioQwen-1.8B

Quantizations
1 model

Dataset used to train liyinghong/BioQwen-1.8B