File size: 3,814 Bytes
14c9181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import functools as func
import re
from os.path import basename, splitext

import numpy as np
import titlecase
from weight_list import gen_weight_list


def title2anchor(name):
    return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
                                     name.strip().lower())).strip('-')


# Count algorithms

files = [
    'backbones.md', 'textdet_models.md', 'textrecog_models.md', 'kie_models.md'
]

stats = []

for f in files:
    with open(f) as content_file:
        content = content_file.read()

    # Remove the blackquote notation from the paper link under the title
    # for better layout in readthedocs
    expr = r'(^## \s*?.*?\s+?)>\s*?(\[.*?\]\(.*?\))'
    content = re.sub(expr, r'\1\2', content, flags=re.MULTILINE)
    with open(f, 'w') as content_file:
        content_file.write(content)

    # title
    title = content.split('\n')[0].replace('#', '')

    # count papers
    exclude_papertype = ['ABSTRACT', 'IMAGE']
    exclude_expr = ''.join(f'(?!{s})' for s in exclude_papertype)
    expr = rf'<!-- \[{exclude_expr}([A-Z]+?)\] -->'\
        r'\s*\n.*?\btitle\s*=\s*{(.*?)}'
    papers = {(papertype, titlecase.titlecase(paper.lower().strip()))
              for (papertype, paper) in re.findall(expr, content, re.DOTALL)}
    print(papers)
    # paper links
    revcontent = '\n'.join(list(reversed(content.splitlines())))
    paperlinks = {}
    for _, p in papers:
        q = p.replace('\\', '\\\\').replace('?', '\\?')
        paper_link = title2anchor(
            re.search(
                rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
                revcontent, re.DOTALL | re.IGNORECASE).group(1))
        paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.md#{paper_link})'
    paperlist = '\n'.join(
        sorted(f'    - [{t}] {paperlinks[x]}' for t, x in papers))
    # count configs
    configs = {
        x.lower().strip()
        for x in re.findall(r'https.*configs/.*\.py', content)
    }

    # count ckpts
    ckpts = {
        x.lower().strip()
        for x in re.findall(r'https://download.*\.pth', content)
        if 'mmocr' in x
    }

    statsmsg = f"""
### [{title}]({f})

* Number of checkpoints: {len(ckpts)}
* Number of configs: {len(configs)}
* Number of papers: {len(papers)}
{paperlist}

    """

    stats.append((papers, configs, ckpts, statsmsg))

allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
msglist = '\n'.join(x for _, _, _, x in stats)

papertypes, papercounts = np.unique([t for t, _ in allpapers],
                                    return_counts=True)
countstr = '\n'.join(
    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])

# get model list
weight_list = gen_weight_list()

modelzoo = f"""
# Overview

## Weights

Here are the list of weights available for
[Inference](user_guides/inference.md).

For the ease of reference, some weights may have shorter aliases, which will be
separated by `/` in the table.
For example, "`DB_r18 / dbnet_resnet18_fpnc_1200e_icdar2015`" means that you can
use either `DB_r18` or `dbnet_resnet18_fpnc_1200e_icdar2015`
to initialize the Inferencer:

```python
>>> from mmocr.apis import TextDetInferencer
>>> inferencer = TextDetInferencer(model='DB_r18')
>>> # equivalent to
>>> inferencer = TextDetInferencer(model='dbnet_resnet18_fpnc_1200e_icdar2015')
```

{weight_list}

## Statistics

* Number of checkpoints: {len(allckpts)}
* Number of configs: {len(allconfigs)}
* Number of papers: {len(allpapers)}
{countstr}

{msglist}
"""  # noqa

with open('modelzoo.md', 'w') as f:
    f.write(modelzoo)