lilac / data /lilac.yml
nsthorat-lilac's picture
Push to HF space
96e9569 verified
raw
history blame
4.31 kB
datasets:
- namespace: lilac
name: Capybara
source:
dataset_name: LDJnr/Capybara
source_name: huggingface
embeddings:
- path:
- conversation
- '*'
- input
embedding: gte-small
- path:
- conversation
- '*'
- output
embedding: gte-small
settings:
ui:
media_paths:
- - conversation
- '*'
- input
- - conversation
- '*'
- output
markdown_paths: []
tags:
- datasets
- namespace: lilac
name: glaive-code-assistant
source:
dataset_name: glaiveai/glaive-code-assistant
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: answer
embedding: gte-small
settings:
ui:
media_paths:
- question
- answer
markdown_paths: []
tags:
- datasets
- namespace: lilac
name: open-assistant-conversations-2
source:
dataset_name: OpenAssistant/oasst2
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
settings:
ui:
media_paths:
- text
tags:
- datasets
- namespace: lilac
name: lmsys-chat-1m
source:
dataset_name: OpenAssistant/oasst2
source_name: huggingface
embeddings:
- path:
- conversation
- '*'
- content
embedding: gte-small
settings:
ui:
media_paths:
- - conversation
- '*'
- content
tags:
- logs
- namespace: lilac
name: databricks-dolly-15k-curated-en
source:
dataset_name: argilla/databricks-dolly-15k-curated-en
source_name: huggingface
embeddings:
- path: original-instruction
embedding: gte-small
- path: original-context
embedding: gte-small
- path: original-response
embedding: gte-small
settings:
ui:
media_paths:
- original-instruction
- original-context
- original-response
- - new-instruction
- value
- '*'
- - new-context
- value
- '*'
- - new-response
- value
- '*'
tags:
- machine-learning
- namespace: lilac
name: OpenOrca-100k
source:
dataset_name: Open-Orca/OpenOrca
sample_size: 100000
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: response
embedding: gte-small
settings:
ui:
media_paths:
- question
- response
tags:
- machine-learning
- namespace: lilac
name: dolphin
tags:
- datasets
source:
dataset_name: cognitivecomputations/dolphin
config_name: flan1m-alpaca-uncensored
source_name: huggingface
settings:
ui:
media_paths:
- instruction
- input
- output
- - input__cluster
- text
markdown_paths: []
use_garden: true
signals:
- signal_name: text_statistics
- signal_name: lang_detection
concept_model_cache_embeddings:
- gte-small
- gte-base
- sbert
- openai
- cohere
clusters:
- dataset_namespace: lilac
dataset_name: Capybara
input_path: !!python/tuple
- conversation
- '*'
- input
- dataset_namespace: lilac
dataset_name: glaive-code-assistant
input_path: !!python/tuple
- question
- dataset_namespace: lilac
dataset_name: open-assistant-conversations-2
input_path: !!python/tuple
- text
- dataset_namespace: lilac
dataset_name: lmsys-chat-1m
input_selector:
format: openai_json
selector: user
output_path: !!python/tuple
- conversation__clusters
- dataset_namespace: lilac
dataset_name: databricks-dolly-15k-curated-en
input_path: !!python/tuple
- original-instruction
- dataset_namespace: lilac
dataset_name: OpenOrca-100k
input_path: !!python/tuple
- question
- dataset_namespace: lilac
dataset_name: dolphin
input_path: !!python/tuple
- input