File size: 2,338 Bytes
dcb9011 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
stages:
# Ensure Annif is installed
install:
cmd:
- python3 -m venv venv
- . venv/bin/activate && pip install -U pip wheel setuptools && pip install -r requirements.txt
- cp requirements.txt venv-installed
deps:
- requirements.txt
outs:
- venv-installed:
cache: false
# Load KAUNO vocabulary
loadvoc:
cmd: venv/bin/annif load-vocab --force kauno corpora/kauno-skos-reduced.ttl
deps:
- venv-installed
- corpora/kauno-skos-reduced.ttl
outs:
- data/vocabs/kauno
# Train MLLM project
train-mllm:
cmd: venv/bin/annif train kauno-mllm-fi -j 8 -d 2000 corpora/kirjasampo/kirjasampo-kauno-train.tsv
deps:
- venv-installed
- corpora/kirjasampo/kirjasampo-kauno-train.tsv
- data/vocabs/kauno
outs:
- data/projects/kauno-mllm-fi
# Train Omikuji project using Kirjasampo data
train-omikuji-ks:
cmd: venv/bin/annif train kauno-ks-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
deps:
- venv-installed
- corpora/kirjasampo/kirjasampo-kauno-train.tsv
- data/vocabs/kauno
outs:
- data/projects/kauno-ks-bonsai-fi
# Train Omikuji project using Finna data
train-omikuji-finna:
cmd: venv/bin/annif train kauno-finna-bonsai-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
deps:
- venv-installed
- corpora/finna/kauno-finna-fin.tsv.gz
- data/vocabs/kauno
outs:
- data/projects/kauno-finna-bonsai-fi
# Train NN ensemble
train-nn-ensemble:
cmd: venv/bin/annif train -j 8 kauno-fi corpora/kirjasampo/kirjasampo-kauno-validate.tsv
deps:
- venv-installed
- corpora/kirjasampo/kirjasampo-kauno-validate.tsv
- data/vocabs/kauno
- data/projects/kauno-mllm-fi
- data/projects/kauno-ks-bonsai-fi
- data/projects/kauno-finna-bonsai-fi
outs:
- data/projects/kauno-fi
# Evaluate projects
eval:
foreach:
- mllm-fi
- ks-bonsai-fi
- finna-bonsai-fi
- fi
do:
cmd:
- venv/bin/annif eval kauno-${item} -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-${item}.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
deps:
- venv-installed
- corpora/kirjasampo/kirjasampo-kauno-test.tsv
- data/projects/kauno-${item}
metrics:
- reports/kauno-${item}.json:
cache: false
|