Spaces:
Running
Running
datasets: | |
- namespace: lilac | |
name: Capybara | |
source: | |
dataset_name: LDJnr/Capybara | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversation | |
- '*' | |
- input | |
embedding: gte-small | |
- path: | |
- conversation | |
- '*' | |
- output | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversation | |
- '*' | |
- input | |
- - conversation | |
- '*' | |
- output | |
markdown_paths: [] | |
tags: | |
- datasets | |
- namespace: lilac | |
name: glaive-code-assistant | |
source: | |
dataset_name: glaiveai/glaive-code-assistant | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: answer | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- answer | |
markdown_paths: [] | |
tags: | |
- datasets | |
- namespace: lilac | |
name: open-assistant-conversations-2 | |
source: | |
dataset_name: OpenAssistant/oasst2 | |
source_name: huggingface | |
embeddings: | |
- path: text | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- text | |
tags: | |
- datasets | |
- namespace: lilac | |
name: lmsys-chat-1m | |
source: | |
dataset_name: OpenAssistant/oasst2 | |
source_name: huggingface | |
embeddings: | |
- path: | |
- conversation | |
- '*' | |
- content | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- - conversation | |
- '*' | |
- content | |
tags: | |
- logs | |
- namespace: lilac | |
name: databricks-dolly-15k-curated-en | |
source: | |
dataset_name: argilla/databricks-dolly-15k-curated-en | |
source_name: huggingface | |
embeddings: | |
- path: original-instruction | |
embedding: gte-small | |
- path: original-context | |
embedding: gte-small | |
- path: original-response | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- original-instruction | |
- original-context | |
- original-response | |
- - new-instruction | |
- value | |
- '*' | |
- - new-context | |
- value | |
- '*' | |
- - new-response | |
- value | |
- '*' | |
tags: | |
- machine-learning | |
- namespace: lilac | |
name: OpenOrca | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
tags: | |
- datasets | |
- namespace: lilac | |
name: OpenOrca-100k | |
source: | |
dataset_name: Open-Orca/OpenOrca | |
sample_size: 100000 | |
source_name: huggingface | |
embeddings: | |
- path: question | |
embedding: gte-small | |
- path: response | |
embedding: gte-small | |
settings: | |
ui: | |
media_paths: | |
- question | |
- response | |
tags: | |
- machine-learning | |
- namespace: lilac | |
name: dolphin | |
tags: | |
- datasets | |
source: | |
dataset_name: cognitivecomputations/dolphin | |
config_name: flan1m-alpaca-uncensored | |
source_name: huggingface | |
settings: | |
ui: | |
media_paths: | |
- instruction | |
- input | |
- output | |
- - input__cluster | |
- text | |
markdown_paths: [] | |
use_garden: true | |
signals: | |
- signal_name: text_statistics | |
- signal_name: lang_detection | |
concept_model_cache_embeddings: | |
- gte-small | |
- gte-base | |
- sbert | |
- openai | |
- cohere | |
clusters: | |
- dataset_namespace: lilac | |
dataset_name: Capybara | |
input_path: !!python/tuple | |
- conversation | |
- '*' | |
- input | |
- dataset_namespace: lilac | |
dataset_name: glaive-code-assistant | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: open-assistant-conversations-2 | |
input_path: !!python/tuple | |
- text | |
- dataset_namespace: lilac | |
dataset_name: lmsys-chat-1m | |
input_selector: | |
format: openai_json | |
selector: user | |
output_path: !!python/tuple | |
- conversation__clusters | |
- dataset_namespace: lilac | |
dataset_name: OpenOrca | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: databricks-dolly-15k-curated-en | |
input_path: !!python/tuple | |
- original-instruction | |
- dataset_namespace: lilac | |
dataset_name: OpenOrca-100k | |
input_path: !!python/tuple | |
- question | |
- dataset_namespace: lilac | |
dataset_name: dolphin | |
input_path: !!python/tuple | |
- input | |