nsthorat-lilac commited on
Commit
5ad5918
1 Parent(s): 42ed5d8

Push to HF space

Browse files
Files changed (3) hide show
  1. README.md +1 -3
  2. data/lilac.yml +414 -45
  3. dist/lilac-1337.0.0-py3-none-any.whl +2 -2
README.md CHANGED
@@ -6,8 +6,6 @@ colorTo: purple
6
  sdk: docker
7
  app_port: 5432
8
  datasets:
9
- - lilacai/nikhil_staging-local-arxiv-karpathy
10
- - lilacai/nikhil_staging-local-pydantic-github
11
- - lilacai/nikhil_staging-local-lilac-github
12
 
13
  ---
 
6
  sdk: docker
7
  app_port: 5432
8
  datasets:
9
+ - lilacai/nikhil_staging-lilac-OpenOrca-100k
 
 
10
 
11
  ---
data/lilac.yml CHANGED
@@ -1,56 +1,425 @@
1
  datasets:
2
- - namespace: local
3
- name: lilac-github
4
  tags: []
5
  source:
6
- repo: lilacai/lilac
7
- branch: main
8
- ignore_directories:
9
- - lilac
10
- ignore_file_extensions:
11
- - .png
12
- - .mp4
13
- github_token: ''
14
- source_name: github
15
  embeddings:
16
- - path: content
17
  embedding: gte-small
18
- signals: []
19
- settings:
20
- ui:
21
- media_paths:
22
- - content
23
- markdown_paths: []
24
- preferred_embedding: null
25
- - namespace: local
26
- name: pydantic-github
27
- tags: []
28
- source:
29
- repo: pydantic/pydantic
30
- branch: main
31
- ignore_directories: []
32
- ignore_file_extensions: []
33
- github_token: ''
34
- source_name: github
35
- embeddings: []
36
- signals: []
37
- settings:
38
- ui:
39
- media_paths:
40
- - text
41
- markdown_paths: []
42
- preferred_embedding: null
43
- - namespace: local
44
- name: arxiv-karpathy
45
- tags: []
46
- source:
47
- source_name: llama_index_docs
48
- embeddings: []
49
- signals: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  settings:
51
  ui:
52
  media_paths:
53
- - text
 
54
  markdown_paths: []
55
  preferred_embedding: null
56
  signals: []
 
1
  datasets:
2
+ - namespace: lilac
3
+ name: OpenOrca-100k
4
  tags: []
5
  source:
6
+ dataset_name: Open-Orca/OpenOrca
7
+ config_name: null
8
+ split: null
9
+ sample_size: 100000
10
+ revision: null
11
+ load_from_disk: false
12
+ source_name: huggingface
 
 
13
  embeddings:
14
+ - path: question
15
  embedding: gte-small
16
+ - path: response
17
+ embedding: gte-small
18
+ signals:
19
+ - path: question
20
+ signal:
21
+ threshold: 0.85
22
+ signal_name: near_dup
23
+ - path: question
24
+ signal:
25
+ signal_name: pii
26
+ - path: question
27
+ signal:
28
+ split_by_paragraph: false
29
+ signal_name: lang_detection
30
+ - path: question
31
+ signal:
32
+ embedding: gte-small
33
+ namespace: lilac
34
+ concept_name: positive-sentiment
35
+ version: null
36
+ draft: main
37
+ signal_name: concept_score
38
+ - path: question
39
+ signal:
40
+ embedding: gte-small
41
+ namespace: lilac
42
+ concept_name: non-english
43
+ version: null
44
+ draft: main
45
+ signal_name: concept_score
46
+ - path: question
47
+ signal:
48
+ embedding: gte-small
49
+ namespace: lilac
50
+ concept_name: toxicity
51
+ version: null
52
+ draft: main
53
+ signal_name: concept_score
54
+ - path: question
55
+ signal:
56
+ embedding: gte-small
57
+ namespace: lilac
58
+ concept_name: question
59
+ version: null
60
+ draft: main
61
+ signal_name: concept_score
62
+ - path: question
63
+ signal:
64
+ embedding: gte-small
65
+ namespace: lilac
66
+ concept_name: legal-termination
67
+ version: null
68
+ draft: main
69
+ signal_name: concept_score
70
+ - path: question
71
+ signal:
72
+ embedding: gte-small
73
+ namespace: lilac
74
+ concept_name: source-code
75
+ version: null
76
+ draft: main
77
+ signal_name: concept_score
78
+ - path: question
79
+ signal:
80
+ embedding: gte-small
81
+ namespace: lilac
82
+ concept_name: negative-sentiment
83
+ version: null
84
+ draft: main
85
+ signal_name: concept_score
86
+ - path: question
87
+ signal:
88
+ embedding: gte-small
89
+ namespace: lilac
90
+ concept_name: profanity
91
+ version: null
92
+ draft: main
93
+ signal_name: concept_score
94
+ - path: question
95
+ signal:
96
+ signal_name: text_statistics
97
+ - path: response
98
+ signal:
99
+ threshold: 0.85
100
+ signal_name: near_dup
101
+ - path: response
102
+ signal:
103
+ signal_name: pii
104
+ - path: response
105
+ signal:
106
+ split_by_paragraph: false
107
+ signal_name: lang_detection
108
+ - path: response
109
+ signal:
110
+ embedding: gte-small
111
+ namespace: lilac
112
+ concept_name: positive-sentiment
113
+ version: null
114
+ draft: main
115
+ signal_name: concept_score
116
+ - path: response
117
+ signal:
118
+ embedding: gte-small
119
+ namespace: lilac
120
+ concept_name: non-english
121
+ version: null
122
+ draft: main
123
+ signal_name: concept_score
124
+ - path: response
125
+ signal:
126
+ embedding: gte-small
127
+ namespace: lilac
128
+ concept_name: toxicity
129
+ version: null
130
+ draft: main
131
+ signal_name: concept_score
132
+ - path: response
133
+ signal:
134
+ embedding: gte-small
135
+ namespace: lilac
136
+ concept_name: question
137
+ version: null
138
+ draft: main
139
+ signal_name: concept_score
140
+ - path: response
141
+ signal:
142
+ embedding: gte-small
143
+ namespace: lilac
144
+ concept_name: legal-termination
145
+ version: null
146
+ draft: main
147
+ signal_name: concept_score
148
+ - path: response
149
+ signal:
150
+ embedding: gte-small
151
+ namespace: lilac
152
+ concept_name: source-code
153
+ version: null
154
+ draft: main
155
+ signal_name: concept_score
156
+ - path: response
157
+ signal:
158
+ embedding: gte-small
159
+ namespace: lilac
160
+ concept_name: negative-sentiment
161
+ version: null
162
+ draft: main
163
+ signal_name: concept_score
164
+ - path: response
165
+ signal:
166
+ embedding: gte-small
167
+ namespace: lilac
168
+ concept_name: profanity
169
+ version: null
170
+ draft: main
171
+ signal_name: concept_score
172
+ - path: response
173
+ signal:
174
+ signal_name: text_statistics
175
+ - path: system_prompt
176
+ signal:
177
+ signal_name: pii
178
+ - path: question
179
+ signal:
180
+ embedding: gte-small
181
+ namespace: lilac
182
+ concept_name: positive-sentiment
183
+ version: null
184
+ draft: main
185
+ signal_name: concept_score
186
+ - path: question
187
+ signal:
188
+ embedding: gte-small
189
+ namespace: lilac
190
+ concept_name: non-english
191
+ version: null
192
+ draft: main
193
+ signal_name: concept_score
194
+ - path: question
195
+ signal:
196
+ embedding: gte-small
197
+ namespace: lilac
198
+ concept_name: toxicity
199
+ version: null
200
+ draft: main
201
+ signal_name: concept_score
202
+ - path: question
203
+ signal:
204
+ embedding: gte-small
205
+ namespace: lilac
206
+ concept_name: question
207
+ version: null
208
+ draft: main
209
+ signal_name: concept_score
210
+ - path: question
211
+ signal:
212
+ embedding: gte-small
213
+ namespace: lilac
214
+ concept_name: legal-termination
215
+ version: null
216
+ draft: main
217
+ signal_name: concept_score
218
+ - path: question
219
+ signal:
220
+ embedding: gte-small
221
+ namespace: lilac
222
+ concept_name: source-code
223
+ version: null
224
+ draft: main
225
+ signal_name: concept_score
226
+ - path: question
227
+ signal:
228
+ embedding: gte-small
229
+ namespace: lilac
230
+ concept_name: negative-sentiment
231
+ version: null
232
+ draft: main
233
+ signal_name: concept_score
234
+ - path: question
235
+ signal:
236
+ embedding: gte-small
237
+ namespace: lilac
238
+ concept_name: profanity
239
+ version: null
240
+ draft: main
241
+ signal_name: concept_score
242
+ - path: response
243
+ signal:
244
+ embedding: gte-small
245
+ namespace: lilac
246
+ concept_name: positive-sentiment
247
+ version: null
248
+ draft: main
249
+ signal_name: concept_score
250
+ - path: question
251
+ signal:
252
+ embedding: gte-small
253
+ namespace: lilac
254
+ concept_name: positive-sentiment
255
+ version: null
256
+ draft: main
257
+ signal_name: concept_score
258
+ - path: question
259
+ signal:
260
+ embedding: gte-small
261
+ namespace: lilac
262
+ concept_name: non-english
263
+ version: null
264
+ draft: main
265
+ signal_name: concept_score
266
+ - path: question
267
+ signal:
268
+ embedding: gte-small
269
+ namespace: lilac
270
+ concept_name: toxicity
271
+ version: null
272
+ draft: main
273
+ signal_name: concept_score
274
+ - path: question
275
+ signal:
276
+ embedding: gte-small
277
+ namespace: lilac
278
+ concept_name: question
279
+ version: null
280
+ draft: main
281
+ signal_name: concept_score
282
+ - path: question
283
+ signal:
284
+ embedding: gte-small
285
+ namespace: lilac
286
+ concept_name: legal-termination
287
+ version: null
288
+ draft: main
289
+ signal_name: concept_score
290
+ - path: question
291
+ signal:
292
+ embedding: gte-small
293
+ namespace: lilac
294
+ concept_name: source-code
295
+ version: null
296
+ draft: main
297
+ signal_name: concept_score
298
+ - path: response
299
+ signal:
300
+ embedding: gte-small
301
+ namespace: lilac
302
+ concept_name: non-english
303
+ version: null
304
+ draft: main
305
+ signal_name: concept_score
306
+ - path: response
307
+ signal:
308
+ embedding: gte-small
309
+ namespace: lilac
310
+ concept_name: positive-sentiment
311
+ version: null
312
+ draft: main
313
+ signal_name: concept_score
314
+ - path: question
315
+ signal:
316
+ embedding: gte-small
317
+ namespace: lilac
318
+ concept_name: profanity
319
+ version: null
320
+ draft: main
321
+ signal_name: concept_score
322
+ - path: question
323
+ signal:
324
+ embedding: gte-small
325
+ namespace: lilac
326
+ concept_name: source-code
327
+ version: null
328
+ draft: main
329
+ signal_name: concept_score
330
+ - path: question
331
+ signal:
332
+ embedding: gte-small
333
+ namespace: lilac
334
+ concept_name: negative-sentiment
335
+ version: null
336
+ draft: main
337
+ signal_name: concept_score
338
+ - path: question
339
+ signal:
340
+ embedding: gte-small
341
+ namespace: lilac
342
+ concept_name: legal-termination
343
+ version: null
344
+ draft: main
345
+ signal_name: concept_score
346
+ - path: question
347
+ signal:
348
+ embedding: gte-small
349
+ namespace: lilac
350
+ concept_name: question
351
+ version: null
352
+ draft: main
353
+ signal_name: concept_score
354
+ - path: question
355
+ signal:
356
+ embedding: gte-small
357
+ namespace: lilac
358
+ concept_name: positive-sentiment
359
+ version: null
360
+ draft: main
361
+ signal_name: concept_score
362
+ - path: question
363
+ signal:
364
+ embedding: gte-small
365
+ namespace: lilac
366
+ concept_name: non-english
367
+ version: null
368
+ draft: main
369
+ signal_name: concept_score
370
+ - path: question
371
+ signal:
372
+ embedding: gte-small
373
+ namespace: lilac
374
+ concept_name: positive-sentiment
375
+ version: null
376
+ draft: main
377
+ signal_name: concept_score
378
+ - path: question
379
+ signal:
380
+ embedding: gte-small
381
+ namespace: lilac
382
+ concept_name: positive-sentiment
383
+ version: null
384
+ draft: main
385
+ signal_name: concept_score
386
+ - path: question
387
+ signal:
388
+ embedding: gte-small
389
+ namespace: lilac
390
+ concept_name: positive-sentiment
391
+ version: null
392
+ draft: main
393
+ signal_name: concept_score
394
+ - path: question
395
+ signal:
396
+ embedding: gte-small
397
+ namespace: lilac
398
+ concept_name: positive-sentiment
399
+ version: null
400
+ draft: main
401
+ signal_name: concept_score
402
+ - path: question
403
+ signal:
404
+ embedding: gte-small
405
+ namespace: lilac
406
+ concept_name: positive-sentiment
407
+ version: null
408
+ draft: main
409
+ signal_name: concept_score
410
+ - path: question
411
+ signal:
412
+ signal_name: cluster_hddbscan
413
+ - path: question
414
+ signal:
415
+ embedding: gte-small
416
+ min_cluster_size: 5
417
+ signal_name: cluster_hdbscan
418
  settings:
419
  ui:
420
  media_paths:
421
+ - question
422
+ - response
423
  markdown_paths: []
424
  preferred_embedding: null
425
  signals: []
dist/lilac-1337.0.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b180c1ec7c986252304e963a64885ea20c191b8aa5a8a008b60c5d14a511d67
3
- size 1203235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba779f41ef5ea20fb423cfe7709080e3a1a822541de488e219190574aab12df
3
+ size 1206544