ThinkLite-VL
Collection
4 items
•
Updated
•
2
Here we show a code snippet to show you how to use the model with transformers for inference.
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
instruct_prompt = r"You FIRST think about the reasoning process as an internal monologue and then provide the final answer. The reasoning process MUST BE enclosed within <think> </think> tags. The final answer MUST BE put in \boxed{}."
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"russwang/ThinkLite-VL-7B", torch_dtype="auto", device_map="auto"
)
processor = AutoProcessor.from_pretrained("russwang/ThinkLite-VL-7B")
greedy_generation_config = GenerationConfig(
do_sample=False,
max_new_tokens=2048
)
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
},
{"type": "text", "text": "Describe this image." + instruct_prompt},
],
}
]
text = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = processor(
text=text,
images=image_inputs,
padding=True,
return_tensors="pt",
).to("cuda")
output = model.generate(
**inputs,
generation_config=greedy_generation_config,
tokenizer=processor.tokenizer
)
output_text = processor.decode(
output[0],
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
print(output_text)