datasets: - namespace: lilac name: imdb source: dataset_name: imdb source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: signal_name: text_statistics - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: signal_name: cluster_dbscan - path: text signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] tags: - machine-learning - namespace: lilac name: open-asssistant-conversations source: dataset_name: OpenAssistant/oasst1 source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: signal_name: text_statistics - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: signal_name: cluster_dbscan - path: text signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] tags: - machine-learning - namespace: lilac name: wikitext-2-raw-v1 source: dataset_name: wikitext config_name: wikitext-2-raw-v1 source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: signal_name: lang_detection - path: text signal: signal_name: text_statistics - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: signal_name: cluster_dbscan - path: text signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] tags: - machine-learning - namespace: lilac name: databricks-dolly-15k-curated-en source: dataset_name: argilla/databricks-dolly-15k-curated-en source_name: huggingface embeddings: - path: original-context embedding: gte-small - path: - new-context - value - '*' embedding: gte-small - path: original-instruction embedding: gte-small - path: original-response embedding: gte-small - path: - new-instruction - value - '*' embedding: gte-small - path: - new-response - value - '*' embedding: gte-small signals: - path: original-instruction signal: signal_name: near_dup - path: original-instruction signal: signal_name: pii - path: original-instruction signal: signal_name: lang_detection - path: original-instruction signal: signal_name: text_statistics - path: original-context signal: signal_name: near_dup - path: original-context signal: signal_name: pii - path: original-context signal: signal_name: lang_detection - path: original-context signal: signal_name: text_statistics - path: original-response signal: signal_name: near_dup - path: original-response signal: signal_name: pii - path: original-response signal: signal_name: lang_detection - path: original-response signal: signal_name: text_statistics - path: - new-instruction - value - '*' signal: signal_name: near_dup - path: - new-instruction - value - '*' signal: signal_name: pii - path: - new-instruction - value - '*' signal: signal_name: lang_detection - path: - new-instruction - value - '*' signal: signal_name: text_statistics - path: - new-context - value - '*' signal: signal_name: near_dup - path: - new-context - value - '*' signal: signal_name: pii - path: - new-context - value - '*' signal: signal_name: lang_detection - path: - new-context - value - '*' signal: signal_name: text_statistics - path: - new-response - value - '*' signal: signal_name: near_dup - path: - new-response - value - '*' signal: signal_name: pii - path: - new-response - value - '*' signal: signal_name: lang_detection - path: - new-response - value - '*' signal: signal_name: text_statistics - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-context signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: signal_name: cluster_dbscan - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-context signal: signal_name: cluster_dbscan - path: original-response signal: signal_name: cluster_dbscan - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: signal_name: cluster_dbscan - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-context - value - '*' signal: signal_name: cluster_dbscan - path: - new-response - value - '*' signal: signal_name: cluster_dbscan - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-instruction signal: embedding: gte-small signal_name: cluster_hdbscan - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: original-context signal: embedding: gte-small signal_name: cluster_hdbscan - path: original-response signal: embedding: gte-small signal_name: cluster_hdbscan - path: original-response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: original-response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small signal_name: cluster_hdbscan - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-instruction - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - new-context - value - '*' signal: embedding: gte-small signal_name: cluster_hdbscan - path: - new-response - value - '*' signal: embedding: gte-small signal_name: cluster_hdbscan - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - new-response - value - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score settings: ui: media_paths: - original-instruction - original-context - original-response - - new-instruction - value - '*' - - new-context - value - '*' - - new-response - value - '*' markdown_paths: [] tags: - machine-learning - namespace: lilac name: piqa source: dataset_name: piqa source_name: huggingface embeddings: - path: goal embedding: gte-small - path: sol1 embedding: gte-small - path: sol2 embedding: gte-small signals: - path: goal signal: signal_name: near_dup - path: goal signal: signal_name: pii - path: goal signal: signal_name: lang_detection - path: goal signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: goal signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: goal signal: signal_name: text_statistics - path: sol1 signal: signal_name: near_dup - path: sol1 signal: signal_name: pii - path: sol1 signal: signal_name: lang_detection - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: sol1 signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: sol1 signal: signal_name: text_statistics - path: sol2 signal: signal_name: near_dup - path: sol2 signal: signal_name: pii - path: sol2 signal: signal_name: lang_detection - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: sol2 signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: sol2 signal: signal_name: text_statistics settings: ui: media_paths: - sol1 - sol2 - goal markdown_paths: [] - namespace: lilac name: OpenOrca-100k source: dataset_name: Open-Orca/OpenOrca sample_size: 100000 source_name: huggingface embeddings: - path: question embedding: gte-small - path: response embedding: gte-small signals: - path: question signal: signal_name: near_dup - path: question signal: signal_name: pii - path: question signal: signal_name: lang_detection - path: question signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: question signal: signal_name: text_statistics - path: response signal: signal_name: near_dup - path: response signal: signal_name: pii - path: response signal: signal_name: lang_detection - path: response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: response signal: signal_name: text_statistics - path: question signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: question signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: response signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: question signal: signal_name: cluster_dbscan - path: response signal: signal_name: cluster_dbscan - path: question signal: embedding: gte-small signal_name: cluster_hdbscan - path: response signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - question - response markdown_paths: [] tags: - machine-learning - namespace: lilac name: opus100-en-es-validation source: dataset_name: opus100 config_name: en-es split: validation source_name: huggingface embeddings: - path: - translation - en embedding: gte-small - path: - translation - es embedding: gte-small signals: - path: - translation - en signal: signal_name: near_dup - path: - translation - en signal: signal_name: pii - path: - translation - en signal: signal_name: lang_detection - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - translation - en signal: signal_name: text_statistics - path: - translation - es signal: signal_name: near_dup - path: - translation - es signal: signal_name: pii - path: - translation - es signal: signal_name: lang_detection - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - translation - es signal: signal_name: text_statistics - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - translation - es signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - translation - en signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - translation - es signal: signal_name: cluster_dbscan - path: - translation - en signal: signal_name: cluster_dbscan - path: - translation - es signal: embedding: gte-small signal_name: cluster_hdbscan - path: - translation - en signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - - translation - es - - translation - en markdown_paths: [] tags: - machine-learning - namespace: lilac name: science-qa-derek-thomas source: dataset_name: derek-thomas/ScienceQA source_name: huggingface embeddings: - path: lecture embedding: gte-small signals: - path: lecture signal: signal_name: near_dup - path: lecture signal: signal_name: pii - path: lecture signal: signal_name: lang_detection - path: lecture signal: signal_name: text_statistics - path: lecture signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: lecture signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: lecture signal: signal_name: cluster_dbscan - path: lecture signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - lecture markdown_paths: [] tags: - science - namespace: lilac name: enron-emails source: dataset_name: EleutherAI/pile config_name: enron_emails sample_size: 100000 source_name: huggingface embeddings: - path: text embedding: gte-small signals: - path: text signal: signal_name: near_dup - path: text signal: signal_name: pii - path: text signal: signal_name: lang_detection - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: signal_name: text_statistics - path: text signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: text signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: text signal: signal_name: cluster_dbscan - path: text signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - text markdown_paths: [] tags: - business - namespace: lilac name: the_movies_dataset source: filepaths: - https://storage.googleapis.com/lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv source_name: csv embeddings: - path: overview embedding: gte-small signals: - path: overview signal: signal_name: near_dup - path: overview signal: signal_name: pii - path: overview signal: signal_name: lang_detection - path: overview signal: signal_name: text_statistics - path: overview signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: overview signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: overview signal: signal_name: cluster_dbscan - path: overview signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - overview markdown_paths: [] tags: - other - namespace: lilac name: textbook_quality_programming source: dataset_name: vikp/textbook_quality_programming source_name: huggingface embeddings: - path: - outline - '*' embedding: gte-small - path: - concepts - '*' embedding: gte-small - path: markdown embedding: gte-small signals: - path: - outline - '*' signal: signal_name: pii - path: - outline - '*' signal: signal_name: text_statistics - path: - outline - '*' signal: signal_name: near_dup - path: - outline - '*' signal: signal_name: lang_detection - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - outline - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - concepts - '*' signal: signal_name: pii - path: - concepts - '*' signal: signal_name: text_statistics - path: - concepts - '*' signal: signal_name: near_dup - path: - concepts - '*' signal: signal_name: lang_detection - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: - concepts - '*' signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: markdown signal: signal_name: pii - path: markdown signal: signal_name: text_statistics - path: markdown signal: signal_name: near_dup - path: markdown signal: signal_name: lang_detection - path: markdown signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: markdown signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: - outline - '*' signal: signal_name: cluster_dbscan - path: - concepts - '*' signal: signal_name: cluster_dbscan - path: markdown signal: signal_name: cluster_dbscan - path: - outline - '*' signal: embedding: gte-small signal_name: cluster_hdbscan - path: - concepts - '*' signal: embedding: gte-small signal_name: cluster_hdbscan - path: markdown signal: embedding: gte-small signal_name: cluster_hdbscan settings: ui: media_paths: - - outline - '*' - - concepts - '*' - markdown markdown_paths: - markdown tags: - machine-learning - namespace: lilac name: stanford-alpaca source: filepaths: - https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json source_name: json embeddings: - path: output embedding: gte-small - path: instruction embedding: gte-small - path: input embedding: gte-small signals: - path: output signal: signal_name: pii - path: output signal: signal_name: text_statistics - path: output signal: signal_name: near_dup - path: output signal: signal_name: lang_detection - path: output signal: embedding: gte-small signal_name: cluster_hdbscan - path: output signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: output signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: instruction signal: signal_name: pii - path: instruction signal: signal_name: text_statistics - path: instruction signal: signal_name: near_dup - path: instruction signal: signal_name: lang_detection - path: instruction signal: embedding: gte-small signal_name: cluster_hdbscan - path: instruction signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: instruction signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score - path: input signal: signal_name: pii - path: input signal: signal_name: text_statistics - path: input signal: signal_name: near_dup - path: input signal: signal_name: lang_detection - path: input signal: embedding: gte-small signal_name: cluster_hdbscan - path: input signal: embedding: gte-small namespace: lilac concept_name: legal-termination signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: negative-sentiment signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: non-english signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: positive-sentiment signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: profanity signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: question signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: source-code signal_name: concept_score - path: input signal: embedding: gte-small namespace: lilac concept_name: toxicity signal_name: concept_score settings: ui: media_paths: - output - instruction - input markdown_paths: [] - namespace: lilac name: glaive source: dataset_name: glaiveai/glaive-code-assistant source_name: huggingface settings: ui: media_paths: - question - answer - - answer_formatted - answer markdown_paths: [] tags: - machine-learning