datasets: - namespace: lilac name: imdb tags: [] source: dataset_name: imdb config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: text signal: signal_name: text_statistics settings: ui: media_paths: - text markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: open-asssistant-conversations tags: [] source: dataset_name: OpenAssistant/oasst1 config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: text signal: signal_name: text_statistics settings: ui: media_paths: - text markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: wikitext-2-raw-v1 tags: [] source: dataset_name: wikitext config_name: wikitext-2-raw-v1 split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: split_by_paragraph: false signal_name: lang_detection - path: text signal: signal_name: text_statistics - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score settings: ui: media_paths: - text markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: databricks-dolly-15k-curated-en tags: [] source: dataset_name: argilla/databricks-dolly-15k-curated-en config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: original-context embedding: gte-small - path: - new-context - value - '*' embedding: gte-small signals: - path: original-instruction signal: threshold: 0.85 signal_name: near_dup - path: original-instruction signal: signal_name: pii - path: original-instruction signal: split_by_paragraph: false signal_name: lang_detection - path: original-instruction signal: signal_name: text_statistics - path: original-context signal: threshold: 0.85 signal_name: near_dup - path: original-context signal: signal_name: pii - path: original-context signal: split_by_paragraph: false signal_name: lang_detection - path: original-context signal: signal_name: text_statistics - path: original-response signal: threshold: 0.85 signal_name: near_dup - path: original-response signal: signal_name: pii - path: original-response signal: split_by_paragraph: false signal_name: lang_detection - path: original-response signal: signal_name: text_statistics - path: - new-instruction - value - '*' signal: threshold: 0.85 signal_name: near_dup - path: - new-instruction - value - '*' signal: signal_name: pii - path: - new-instruction - value - '*' signal: split_by_paragraph: false signal_name: lang_detection - path: - new-instruction - value - '*' signal: signal_name: text_statistics - path: - new-context - value - '*' signal: threshold: 0.85 signal_name: near_dup - path: - new-context - value - '*' signal: signal_name: pii - path: - new-context - value - '*' signal: split_by_paragraph: false signal_name: lang_detection - path: - new-context - value - '*' signal: signal_name: text_statistics - path: - new-response - value - '*' signal: threshold: 0.85 signal_name: near_dup - path: - new-response - value - '*' signal: signal_name: pii - path: - new-response - value - '*' signal: split_by_paragraph: false signal_name: lang_detection - path: - new-response - value - '*' signal: signal_name: text_statistics - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score settings: ui: media_paths: - original-instruction - original-context - original-response - - new-instruction - value - '*' - - new-context - value - '*' - - new-response - value - '*' markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: piqa tags: [] source: dataset_name: piqa config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: goal embedding: gte-small - path: sol1 embedding: gte-small - path: sol2 embedding: gte-small signals: - path: goal signal: threshold: 0.85 signal_name: near_dup - path: goal signal: signal_name: pii - path: goal signal: split_by_paragraph: false signal_name: lang_detection - path: goal signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: goal signal: signal_name: text_statistics - path: sol1 signal: threshold: 0.85 signal_name: near_dup - path: sol1 signal: signal_name: pii - path: sol1 signal: split_by_paragraph: false signal_name: lang_detection - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: sol1 signal: signal_name: text_statistics - path: sol2 signal: threshold: 0.85 signal_name: near_dup - path: sol2 signal: signal_name: pii - path: sol2 signal: split_by_paragraph: false signal_name: lang_detection - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: sol2 signal: signal_name: text_statistics settings: ui: media_paths: - sol1 - sol2 - goal markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: OpenOrca-100k tags: [] source: dataset_name: Open-Orca/OpenOrca config_name: null split: null sample_size: 100000 revision: null load_from_disk: false source_name: huggingface embeddings: - path: question embedding: gte-small - path: response embedding: gte-small signals: - path: question signal: threshold: 0.85 signal_name: near_dup - path: question signal: signal_name: pii - path: question signal: split_by_paragraph: false signal_name: lang_detection - path: question signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: question signal: signal_name: text_statistics - path: response signal: threshold: 0.85 signal_name: near_dup - path: response signal: signal_name: pii - path: response signal: split_by_paragraph: false signal_name: lang_detection - path: response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: response signal: signal_name: text_statistics settings: ui: media_paths: - question - response markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: opus100-en-es-validation tags: [] source: dataset_name: opus100 config_name: en-es split: validation sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: - translation - en embedding: gte-small - path: - translation - es embedding: gte-small signals: - path: - translation - en signal: threshold: 0.85 signal_name: near_dup - path: - translation - en signal: signal_name: pii - path: - translation - en signal: split_by_paragraph: false signal_name: lang_detection - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: - translation - en signal: signal_name: text_statistics - path: - translation - es signal: threshold: 0.85 signal_name: near_dup - path: - translation - es signal: signal_name: pii - path: - translation - es signal: split_by_paragraph: false signal_name: lang_detection - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: - translation - es signal: signal_name: text_statistics settings: ui: media_paths: - - translation - es - - translation - en markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: science-qa-derek-thomas tags: [] source: dataset_name: derek-thomas/ScienceQA config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: lecture embedding: gte-small signals: - path: lecture signal: threshold: 0.85 signal_name: near_dup - path: lecture signal: signal_name: pii - path: lecture signal: split_by_paragraph: false signal_name: lang_detection - path: lecture signal: signal_name: text_statistics - path: lecture signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score settings: ui: media_paths: - lecture markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: enron-emails tags: [] source: dataset_name: EleutherAI/pile config_name: enron_emails split: null sample_size: 100000 revision: null load_from_disk: false source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: threshold: 0.85 signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: split_by_paragraph: false signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: text signal: signal_name: text_statistics settings: ui: media_paths: - text markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: the_movies_dataset tags: [] source: filepaths: - https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv delim: ',' header: true names: null source_name: csv embeddings: - path: overview embedding: gte-small signals: - path: overview signal: threshold: 0.85 signal_name: near_dup - path: overview signal: signal_name: pii - path: overview signal: split_by_paragraph: false signal_name: lang_detection - path: overview signal: signal_name: text_statistics - path: overview signal: embedding: gte-small namespace: lilac concept_name: legal-termination draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: non-english draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: profanity draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: question draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: source-code draft: main signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: toxicity draft: main signal_name: concept_score settings: ui: media_paths: - overview markdown_paths: [] preferred_embedding: gte-small - namespace: lilac name: textbook_quality_programming tags: [] source: dataset_name: vikp/textbook_quality_programming config_name: null split: null sample_size: null revision: null load_from_disk: false source_name: huggingface embeddings: - path: - outline - '*' embedding: gte-small - path: - concepts - '*' embedding: gte-small - path: markdown embedding: gte-small signals: [] settings: ui: media_paths: - - outline - '*' - - concepts - '*' - markdown markdown_paths: - markdown preferred_embedding: gte-small signals: [] concept_model_cache_embeddings: []