import uvicorn from fastapi import FastAPI from transformers import AutoModelForCausalLM, AutoTokenizer app = FastAPI() model_name = 'facebook/incoder-1B' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, low_cpu_mem_usage=True) print('load ok') @app.get("/") def read_root(input_text, max_length, top_p, top_k, num_beams, temperature, repetition_penalty): inpt = tokenizer.encode(input_text, return_tensors="pt") out = model.generate(inpt, max_length=max_length, top_p=top_p, top_k=top_k, temperature=temperature, num_beams=num_beams, repetition_penalty=repetition_penalty) res = tokenizer.decode(out[0]) return {res}