peterpeter8585 commited on
Commit
97d4967
·
verified ·
1 Parent(s): db9b53c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -6,12 +6,12 @@ import random
6
  from diffusers import DiffusionPipeline
7
  import torch
8
  import transformers
 
9
  transformers.utils.move_cache()
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  import os
12
  password1=os.environ["password"]
13
  model_id = "peterpeter8585/ai2"
14
- client2=InferenceClient("peterpeter8585/ai2")
15
 
16
  def respond1(multimodal_input, history: list[tuple[str, str]],system_message,max_tokens,temperature,top_p,password):
17
  if password==password1:
@@ -26,16 +26,24 @@ def respond1(multimodal_input, history: list[tuple[str, str]],system_message,max
26
  messages.append({"role": "assistant", "content": [{"type":"text", "text":val[1]}]})
27
  response = ""
28
 
29
- for message in client2.chat_completion(
30
- messages,
31
- max_tokens=max_tokens,
32
- stream=True,
33
- temperature=temperature,
34
- top_p=top_p,
35
- ):
36
- token = message
37
- response += token
38
- yield response
 
 
 
 
 
 
 
 
39
 
40
 
41
 
 
6
  from diffusers import DiffusionPipeline
7
  import torch
8
  import transformers
9
+ from transformers import AutoModelForVision2Seq, AutoProcessor, BitsAndBytesConfig
10
  transformers.utils.move_cache()
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  import os
13
  password1=os.environ["password"]
14
  model_id = "peterpeter8585/ai2"
 
15
 
16
  def respond1(multimodal_input, history: list[tuple[str, str]],system_message,max_tokens,temperature,top_p,password):
17
  if password==password1:
 
26
  messages.append({"role": "assistant", "content": [{"type":"text", "text":val[1]}]})
27
  response = ""
28
 
29
+ model_id = "HuggingFaceM4/idefics2-8b"
30
+
31
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16)
32
+ processor = AutoProcessor.from_pretrained(model_id)
33
+ model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float16, quantization_config=quantization_config)
34
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
35
+ inputs = processor(text=prompt, images=[images], return_tensors="pt")
36
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
37
+ num_tokens = len(inputs["input_ids"][0])
38
+ with torch.inference_mode():
39
+ generated_ids = model.generate(**inputs, max_new_tokens=max_tokens,top_p=top_p, temperature=1.0,)
40
+
41
+ new_tokens = generated_ids[:, num_tokens:]
42
+ generated_text = processor.batch_decode(new_tokens, skip_special_tokens=True)[0]
43
+
44
+ token = generated_text
45
+ response+=token
46
+ yield response
47
 
48
 
49