asoria HF staff commited on
Commit
f2ee5d3
1 Parent(s): 5a8d02c

Second attempt: Llama2 for representation model

Browse files
Files changed (2) hide show
  1. app.py +35 -4
  2. requirements.txt +4 -3
app.py CHANGED
@@ -17,12 +17,19 @@ from cuml.cluster import HDBSCAN
17
  from huggingface_hub import HfApi
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sentence_transformers import SentenceTransformer
20
- from transformers import pipeline
 
 
 
 
 
 
21
 
22
  # These imports at the end because of torch/datamapplot issue in Zero GPU
23
  # import spaces
24
  import gradio as gr
25
 
 
26
 
27
  """
28
  TODOs:
@@ -53,9 +60,33 @@ CHUNK_SIZE = 10_000
53
  session = requests.Session()
54
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
55
 
56
- prompt = "I have a topic described by the following keywords: [KEYWORDS]. Based on the previous keywords, what is this topic about?"
57
- generator = pipeline("text2text-generation", model="google/flan-t5-base")
58
- representation_model = TextGeneration(generator)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  vectorizer_model = CountVectorizer(stop_words="english")
61
 
 
17
  from huggingface_hub import HfApi
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sentence_transformers import SentenceTransformer
20
+ from transformers import (
21
+ BitsAndBytesConfig,
22
+ AutoTokenizer,
23
+ AutoModelForCausalLM,
24
+ pipeline,
25
+ )
26
+ from torch import bfloat16
27
 
28
  # These imports at the end because of torch/datamapplot issue in Zero GPU
29
  # import spaces
30
  import gradio as gr
31
 
32
+ from prompts import REPRESENTATION_PROMPT
33
 
34
  """
35
  TODOs:
 
60
  session = requests.Session()
61
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
62
 
63
+ # Representation model
64
+ bnb_config = BitsAndBytesConfig(
65
+ load_in_4bit=True,
66
+ bnb_4bit_quant_type="nf4",
67
+ bnb_4bit_use_double_quant=True,
68
+ bnb_4bit_compute_dtype=bfloat16,
69
+ )
70
+
71
+ model_id = "meta-llama/Llama-2-7b-chat-hf"
72
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
73
+ model = AutoModelForCausalLM.from_pretrained(
74
+ model_id,
75
+ trust_remote_code=True,
76
+ quantization_config=bnb_config,
77
+ device_map="auto",
78
+ )
79
+ model.eval()
80
+ generator = pipeline(
81
+ model=model,
82
+ tokenizer=tokenizer,
83
+ task="text-generation",
84
+ temperature=0.1,
85
+ max_new_tokens=500,
86
+ repetition_penalty=1.1,
87
+ )
88
+ representation_model = TextGeneration(generator, prompt=REPRESENTATION_PROMPT)
89
+ # End of representation model
90
 
91
  vectorizer_model = CountVectorizer(stop_words="english")
92
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- # --extra-index-url https://pypi.nvidia.com
2
- # cuml-cu11
3
  gradio_huggingfacehub_search==0.0.7
4
  duckdb
5
  accelerate
@@ -12,4 +12,5 @@ pandas
12
  torch
13
  numpy
14
  python-dotenv
15
- kaleido
 
 
1
+ --extra-index-url https://pypi.nvidia.com
2
+ cuml-cu11
3
  gradio_huggingfacehub_search==0.0.7
4
  duckdb
5
  accelerate
 
12
  torch
13
  numpy
14
  python-dotenv
15
+ kaleido
16
+ transformers