Spaces:
Running
Running
File size: 4,309 Bytes
b1494e2 76d83e5 4f4befa 77014bb b1494e2 77014bb b1494e2 7edba0c b1494e2 7edba0c b1494e2 7edba0c af895fd 066d4a0 77014bb b1494e2 31fbf3d b1494e2 6739168 b1494e2 066d4a0 6739168 96e9569 b1494e2 86fe272 02f502d 7edba0c 86fe272 7edba0c 494bc5a 066d4a0 494bc5a 066d4a0 494bc5a 7edba0c 77014bb b1494e2 7edba0c b1494e2 77bf495 066d4a0 7edba0c 066d4a0 77014bb 139e81a 7edba0c 4f4befa 7edba0c 172280a 7edba0c 31fbf3d 7edba0c 96e9569 7edba0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
datasets:
- namespace: lilac
name: Capybara
source:
dataset_name: LDJnr/Capybara
source_name: huggingface
embeddings:
- path:
- conversation
- '*'
- input
embedding: gte-small
- path:
- conversation
- '*'
- output
embedding: gte-small
settings:
ui:
media_paths:
- - conversation
- '*'
- input
- - conversation
- '*'
- output
markdown_paths: []
tags:
- datasets
- namespace: lilac
name: glaive-code-assistant
source:
dataset_name: glaiveai/glaive-code-assistant
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: answer
embedding: gte-small
settings:
ui:
media_paths:
- question
- answer
markdown_paths: []
tags:
- datasets
- namespace: lilac
name: open-assistant-conversations-2
source:
dataset_name: OpenAssistant/oasst2
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
settings:
ui:
media_paths:
- text
tags:
- datasets
- namespace: lilac
name: lmsys-chat-1m
source:
dataset_name: OpenAssistant/oasst2
source_name: huggingface
embeddings:
- path:
- conversation
- '*'
- content
embedding: gte-small
settings:
ui:
media_paths:
- - conversation
- '*'
- content
tags:
- logs
- namespace: lilac
name: databricks-dolly-15k-curated-en
source:
dataset_name: argilla/databricks-dolly-15k-curated-en
source_name: huggingface
embeddings:
- path: original-instruction
embedding: gte-small
- path: original-context
embedding: gte-small
- path: original-response
embedding: gte-small
settings:
ui:
media_paths:
- original-instruction
- original-context
- original-response
- - new-instruction
- value
- '*'
- - new-context
- value
- '*'
- - new-response
- value
- '*'
tags:
- machine-learning
- namespace: lilac
name: OpenOrca-100k
source:
dataset_name: Open-Orca/OpenOrca
sample_size: 100000
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: response
embedding: gte-small
settings:
ui:
media_paths:
- question
- response
tags:
- machine-learning
- namespace: lilac
name: dolphin
tags:
- datasets
source:
dataset_name: cognitivecomputations/dolphin
config_name: flan1m-alpaca-uncensored
source_name: huggingface
settings:
ui:
media_paths:
- instruction
- input
- output
- - input__cluster
- text
markdown_paths: []
use_garden: true
signals:
- signal_name: text_statistics
- signal_name: lang_detection
concept_model_cache_embeddings:
- gte-small
- gte-base
- sbert
- openai
- cohere
clusters:
- dataset_namespace: lilac
dataset_name: Capybara
input_path: !!python/tuple
- conversation
- '*'
- input
- dataset_namespace: lilac
dataset_name: glaive-code-assistant
input_path: !!python/tuple
- question
- dataset_namespace: lilac
dataset_name: open-assistant-conversations-2
input_path: !!python/tuple
- text
- dataset_namespace: lilac
dataset_name: lmsys-chat-1m
input_selector:
format: openai_json
selector: user
output_path: !!python/tuple
- conversation__clusters
- dataset_namespace: lilac
dataset_name: databricks-dolly-15k-curated-en
input_path: !!python/tuple
- original-instruction
- dataset_namespace: lilac
dataset_name: OpenOrca-100k
input_path: !!python/tuple
- question
- dataset_namespace: lilac
dataset_name: dolphin
input_path: !!python/tuple
- input
|