File size: 2,338 Bytes
dcb9011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
stages:
  # Ensure Annif is installed
  install:
    cmd:
    - python3 -m venv venv
    - . venv/bin/activate && pip install -U pip wheel setuptools && pip install -r requirements.txt
    - cp requirements.txt venv-installed
    deps:
    - requirements.txt
    outs:
    - venv-installed:
        cache: false
  # Load KAUNO vocabulary
  loadvoc:
    cmd: venv/bin/annif load-vocab --force kauno corpora/kauno-skos-reduced.ttl
    deps:
    - venv-installed
    - corpora/kauno-skos-reduced.ttl
    outs:
    - data/vocabs/kauno
  # Train MLLM project
  train-mllm:
    cmd: venv/bin/annif train kauno-mllm-fi -j 8 -d 2000 corpora/kirjasampo/kirjasampo-kauno-train.tsv
    deps:
    - venv-installed
    - corpora/kirjasampo/kirjasampo-kauno-train.tsv
    - data/vocabs/kauno
    outs:
    - data/projects/kauno-mllm-fi
  # Train Omikuji project using Kirjasampo data
  train-omikuji-ks:
    cmd: venv/bin/annif train kauno-ks-bonsai-fi -j 8 corpora/kirjasampo/kirjasampo-kauno-train.tsv
    deps:
    - venv-installed
    - corpora/kirjasampo/kirjasampo-kauno-train.tsv
    - data/vocabs/kauno
    outs:
    - data/projects/kauno-ks-bonsai-fi
  # Train Omikuji project using Finna data
  train-omikuji-finna:
    cmd: venv/bin/annif train kauno-finna-bonsai-fi -j 8 corpora/finna/kauno-finna-fin.tsv.gz
    deps:
    - venv-installed
    - corpora/finna/kauno-finna-fin.tsv.gz
    - data/vocabs/kauno
    outs:
    - data/projects/kauno-finna-bonsai-fi
  # Train NN ensemble
  train-nn-ensemble:
    cmd: venv/bin/annif train -j 8 kauno-fi corpora/kirjasampo/kirjasampo-kauno-validate.tsv
    deps:
    - venv-installed
    - corpora/kirjasampo/kirjasampo-kauno-validate.tsv
    - data/vocabs/kauno
    - data/projects/kauno-mllm-fi
    - data/projects/kauno-ks-bonsai-fi
    - data/projects/kauno-finna-bonsai-fi
    outs:
    - data/projects/kauno-fi
  # Evaluate projects
  eval:
    foreach:
      - mllm-fi
      - ks-bonsai-fi
      - finna-bonsai-fi
      - fi
    do:
      cmd:
      - venv/bin/annif eval kauno-${item} -j 8 -m F1@5 -m NDCG --metrics-file reports/kauno-${item}.json corpora/kirjasampo/kirjasampo-kauno-test.tsv
      deps:
      - venv-installed
      - corpora/kirjasampo/kirjasampo-kauno-test.tsv
      - data/projects/kauno-${item}
      metrics:
      - reports/kauno-${item}.json:
          cache: false