datasets: - namespace: lilac name: Capybara source: dataset_name: LDJnr/Capybara source_name: huggingface embeddings: - path: - conversation - '*' - input embedding: gte-small - path: - conversation - '*' - output embedding: gte-small settings: ui: media_paths: - - conversation - '*' - input - - conversation - '*' - output markdown_paths: [] tags: - datasets - namespace: lilac name: glaive-code-assistant source: dataset_name: glaiveai/glaive-code-assistant source_name: huggingface embeddings: - path: question embedding: gte-small - path: answer embedding: gte-small settings: ui: media_paths: - question - answer markdown_paths: [] tags: - datasets - namespace: lilac name: glaive-function-calling-v2 source: dataset_name: lilacai/glaive-function-calling-v2-sharegpt source_name: huggingface embeddings: - path: - conversations - '*' - value embedding: gte-small settings: ui: media_paths: - - conversations - '*' - value markdown_paths: [] tags: - datasets - namespace: lilac name: open-assistant-conversations-2 source: dataset_name: OpenAssistant/oasst2 source_name: huggingface embeddings: - path: text embedding: gte-small settings: ui: media_paths: - text tags: - datasets - namespace: lilac name: lmsys-chat-1m source: dataset_name: lmsys/lmsys-chat-1m source_name: huggingface embeddings: - path: - conversation - '*' - content embedding: gte-small settings: ui: media_paths: - - conversation - '*' - content tags: - logs - namespace: lilac name: databricks-dolly-15k-curated-en source: dataset_name: argilla/databricks-dolly-15k-curated-en source_name: huggingface embeddings: - path: original-instruction embedding: gte-small - path: original-context embedding: gte-small - path: original-response embedding: gte-small settings: ui: media_paths: - original-instruction - original-context - original-response - - new-instruction - value - '*' - - new-context - value - '*' - - new-response - value - '*' tags: - machine-learning - namespace: lilac name: OpenOrca source: dataset_name: Open-Orca/OpenOrca source_name: huggingface embeddings: - path: question embedding: gte-small settings: ui: media_paths: - question - response tags: - datasets - namespace: lilac name: dolphin tags: - datasets source: dataset_name: cognitivecomputations/dolphin config_name: flan1m-alpaca-uncensored source_name: huggingface settings: ui: media_paths: - instruction - input - output - - input__cluster - text markdown_paths: [] use_garden: true signals: - signal_name: text_statistics - signal_name: lang_detection concept_model_cache_embeddings: - gte-small - gte-base - sbert - openai - cohere clusters: - dataset_namespace: lilac dataset_name: Capybara input_path: !!python/tuple - conversation - '*' - input - dataset_namespace: lilac dataset_name: glaive-code-assistant input_path: !!python/tuple - question - dataset_namespace: lilac dataset_name: glaive-function-calling-v2 input_selector: format: sharegpt selector: human output_path: !!python/tuple - conversation_clusters - dataset_namespace: lilac dataset_name: open-assistant-conversations-2 input_path: !!python/tuple - text - dataset_namespace: lilac dataset_name: lmsys-chat-1m input_selector: format: openai_json selector: user output_path: !!python/tuple - conversation__clusters - dataset_namespace: lilac dataset_name: OpenOrca input_path: !!python/tuple - question - dataset_namespace: lilac dataset_name: databricks-dolly-15k-curated-en input_path: !!python/tuple - original-instruction - dataset_namespace: lilac dataset_name: dolphin input_path: !!python/tuple - input