import chainlit as cl from huggingface_hub import AsyncInferenceClient #load model API_TOKEN = "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" headers = {"Authorization": f"Bearer {API_TOKEN}","Content-Type": "application/json"} API_URL = "https://kfsb1xfskc2136wg.eu-west-1.aws.endpoints.huggingface.cloud" client = AsyncInferenceClient(model=API_URL,token=API_TOKEN) @cl.on_chat_start async def main(): cl.user_session.set("history", []) msg = cl.Message(content=f"Loading Chat please wait ...") await msg.send() # Let the user know that the system is ready await msg.update(content=f"Chat has been loaded. You can now ask questions!") return @cl.on_message async def main(message: str): h = cl.user_session.get("history") h.append("<|prompter|>"+message+"<|endoftext|><|assistant|>") resp = "" msg = cl.Message(content="") async for token in await client.text_generation("".join(h), stream=True,max_new_tokens =250): if token!="<|endoftext|>": print(token, end="") resp += token await msg.stream_token(token) h.append(resp+"<|endoftext|>") cl.user_session.set("history",h) print(h) await msg.send()