Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,741 Bytes
a9106b7 b7bc525 a9106b7 41cc794 931ea15 5ed473c b57680f 5ed473c f910bef 931ea15 f910bef a9106b7 5ed473c a3f1767 fecb2b3 a3f1767 5ed473c b7bc525 b2b7f7a 5c20eaf b2b7f7a 5c20eaf b2b7f7a 124c297 5ed473c b2b7f7a 931ea15 a9106b7 5ed473c b2b7f7a a3f1767 b2b7f7a ce4dc04 b2b7f7a a9106b7 ce4dc04 a9106b7 ce4dc04 a9106b7 ce4dc04 a9106b7 ce4dc04 a9106b7 ce4dc04 a9106b7 124c297 a9106b7 1d67a76 a9106b7 5ed473c a9106b7 1d67a76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from transformers import (
AutoModelForCausalLM,
BitsAndBytesConfig,
AutoTokenizer,
TextIteratorStreamer,
)
GENERATION_TIME=180
DESCRIPTION = f"""\
# ORLM LLaMA-3-8B
Hello! I'm ORLM-LLaMA-3-8B, here to automate your optimization modeling tasks! Check our [repo](https://github.com/Cardinal-Operations/ORLM) and [paper](https://arxiv.org/abs/2405.17743)!
Please note that solution generation may be terminated if it exceeds {GENERATION_TIME} seconds. We strongly recommend running the demo locally using our [sample script](https://github.com/Cardinal-Operations/ORLM/blob/master/scripts/inference.py) for a smoother experience.
If the demo successfully generates a code solution, execute it in your Python environment with `coptpy` installed to obtain the final optimal value for your task.
"""
MAX_MAX_NEW_TOKENS = 4096
DEFAULT_MAX_NEW_TOKENS = 2048
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "1024"))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_id = "CardinalOperations/ORLM-LLaMA-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
)
model.config.sliding_window = 4096
model.eval()
PROMPT_TEMPLATE = r"""
Below is an operations research question. Build a mathematical model and corresponding python code using `coptpy` that appropriately addresses the question.
# Question:
{Question}
# Response:
"""
@spaces.GPU(duration=GENERATION_TIME)
def generate(
message: str,
chat_history: list[tuple[str, str]],
max_new_tokens: int = 1024,
temperature: float = 0.6,
top_p: float = 0.9,
top_k: int = 50,
repetition_penalty: float = 1.2,
) -> Iterator[str]:
# conversation = []
# for user, assistant in chat_history:
# conversation.extend(
# [
# {"role": "user", "content": user},
# {"role": "assistant", "content": assistant},
# ]
# )
# conversation.append({"role": "user", "content": message})
# input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
# if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
# input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
# gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
# input_ids = input_ids.to(model.device)
prompt = PROMPT_TEMPLATE.replace("{Question}", message).strip()
tokenized_example = tokenizer(prompt, return_tensors='pt', max_length=MAX_INPUT_TOKEN_LENGTH, truncation=True)
input_ids = tokenized_example.input_ids
input_ids = input_ids.to(model.device)
streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
{"input_ids": input_ids},
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=False if temperature == 0.0 else True,
top_p=top_p,
top_k=top_k,
temperature=temperature,
num_beams=1,
repetition_penalty=repetition_penalty,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
outputs = []
for text in streamer:
outputs.append(text)
yield "".join(outputs)
chat_interface = gr.ChatInterface(
fn=generate,
additional_inputs=[
gr.Slider(
label="Max new tokens",
minimum=1,
maximum=MAX_MAX_NEW_TOKENS,
step=1,
value=DEFAULT_MAX_NEW_TOKENS,
),
gr.Slider(
label="Temperature",
minimum=0.0,
maximum=4.0,
step=0.1,
value=0.0,
),
gr.Slider(
label="Top-p (nucleus sampling)",
minimum=0.05,
maximum=1.0,
step=0.05,
value=0.95,
),
gr.Slider(
label="Top-k",
minimum=1,
maximum=1000,
step=1,
value=20,
),
gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=2.0,
step=0.05,
value=1.0,
),
],
stop_btn=None,
examples=[
[r"A lab has 1000 units of medicinal ingredients to make two pills, a large pill and a small pill. A large pill requires 3 units of medicinal ingredients and 2 units of filler. A small pill requires 2 units of medicinal ingredients and 1 unit of filler. The lab has to make at least 100 large pills. However, since small pills are more popular at least 60% of the total number of pills must be small. How many of each should be made to minimize the total number of filler material needed?"],
[r"Let's say you're on a mission to create the perfect meal plan for yourself. You're focused on getting the right balance of nutrients without digging too deep into your pockets. You have six different food options to consider: Chicken, Oats, Tofu, Rice, Beef, and Salmon. Each of these foods provides a certain amount of protein, carbohydrates, and calories, and they all come with their own distinct prices.\n\nHere's a detailed breakdown of the nutritional content and cost of each food item:\n\n- Chicken: Delivers 14 grams of protein, a single gram of carbohydrates, and 164 calories at a cost of $6.\n- Oats: Gives you 11 grams of protein, 6 grams of carbohydrates, and 210 calories for just $2.\n- Tofu: Offers 8 grams of protein, 12 grams of carbohydrates, and 98 calories at a cost of $9.\n- Rice: Provides 1 gram of protein, a generous 17 grams of carbohydrates, and 92 calories for $3.\n- Beef: Comes packed with 16 grams of protein, 11 grams of carbohydrates, and 211 calories, priced at $7.\n- Salmon: Brings a hefty 19 grams of protein, 13 grams of carbohydrates, and 211 calories but costs $9.\n\nYou want to make sure your meal plan meets the following nutritional targets: at least 70 grams of protein, 117 grams of carbohydrates, and 1837 calories. Considering these six foods, what is the least amount of money you need to spend to meet these dietary needs?\nRemember, your response should only contain the optimal value of the cost to meet the requirements."],
[r"Haus Toys can manufacture and sell toy trucks, toy planes, toy boats, and toy trains. The profit from selling one truck is $5, from one plane is $10, from one boat is $8, and from one train is $7. How many types of toys should Haus Toys manufacture to maximize profit?\n\nThere are 890 units of wood available. Manufacturing one truck requires 12 units of wood, one plane requires 20 units of wood, one boat requires 15 units of wood, and one train requires 10 units of wood.\n\nThere are 500 units of steel available. Manufacturing one plane requires 3 units of steel, one boat requires 5 units of steel, one train requires 4 units of steel, and one truck requires 6 units of steel.\n\nIf Haus Toys manufactures trucks, then they will not manufacture trains.\n\nHowever, if they manufacture boats, they will also manufacture planes.\n\nThe number of toy boats manufactured cannot exceed the number of toy trains manufactured."],
],
cache_examples=False
)
with gr.Blocks(css="style.css", fill_height=True) as demo:
gr.Markdown(DESCRIPTION)
# gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
chat_interface.render()
if __name__ == "__main__":
demo.queue(max_size=20).launch()
|