nikhil_staging / data /lilac.yml
nsthorat's picture
Push
59cf6d6
raw
history blame
No virus
6.43 kB
# Lilac project config.
# See https://lilacml.com/api_reference/index.html#lilac.Config for details.
datasets:
- namespace: local
name: glue
source:
dataset_name: glue
config_name: ax
source_name: huggingface
embeddings:
- path: premise
embedding: gte-small
signals:
- path: premise
signal:
signal_name: pii
- path: hypothesis
signal:
signal_name: pii
settings:
ui:
media_paths:
- premise
- namespace: local
name: glue_ax
source:
dataset_name: glue
config_name: ax
source_name: huggingface
embeddings:
- path: hypothesis
embedding: gte-small
signals:
- path: premise
signal:
signal_name: text_statistics
- path: premise
signal:
signal_name: pii
- path: premise
signal:
signal_name: near_dup
- path: hypothesis
signal:
embedding: gte-small
namespace: ''
concept_name: ''
signal_name: concept_score
- path: hypothesis
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: hypothesis
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
settings:
ui:
media_paths:
- hypothesis
- namespace: local
name: imdb3
source:
dataset_name: imdb
source_name: huggingface
settings:
ui:
media_paths:
- text
- namespace: local
name: imdb
source:
dataset_name: imdb
source_name: huggingface
embeddings:
- path: text
embedding: gte-small
signals:
- path: text
signal:
signal_name: pii
- path: text
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- text
- namespace: local
name: imdb2
source:
dataset_name: imdb
source_name: huggingface
settings:
ui:
media_paths:
- text
- namespace: lilac
name: OpenOrca-100k
source:
dataset_name: Open-Orca/OpenOrca
sample_size: 100000
source_name: huggingface
embeddings:
- path: question
embedding: gte-small
- path: response
embedding: gte-small
signals:
- path: question
signal:
signal_name: near_dup
- path: question
signal:
signal_name: pii
- path: question
signal:
signal_name: lang_detection
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: question
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: question
signal:
signal_name: text_statistics
- path: response
signal:
signal_name: near_dup
- path: response
signal:
signal_name: pii
- path: response
signal:
signal_name: lang_detection
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: positive-sentiment
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: non-english
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: toxicity
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: question
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: legal-termination
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: source-code
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: negative-sentiment
signal_name: concept_score
- path: response
signal:
embedding: gte-small
namespace: lilac
concept_name: profanity
signal_name: concept_score
- path: response
signal:
signal_name: text_statistics
settings:
ui:
media_paths:
- question
- response
- namespace: local
name: the_movies_dataset
source:
filepaths:
- gs://lilac-data/datasets/the_movies_dataset/the_movies_dataset.csv
names: []
source_name: csv
settings:
ui:
media_paths:
- overview
- namespace: local
name: glue_ax_parquet
source:
filepaths:
- gs://lilac-data/datasets/glue_ax_parquet/glue_ax.parquet
source_name: parquet
settings:
ui:
media_paths:
- premise