Spaces:
Sleeping
Sleeping
File size: 3,814 Bytes
14c9181 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import functools as func
import re
from os.path import basename, splitext
import numpy as np
import titlecase
from weight_list import gen_weight_list
def title2anchor(name):
return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
name.strip().lower())).strip('-')
# Count algorithms
files = [
'backbones.md', 'textdet_models.md', 'textrecog_models.md', 'kie_models.md'
]
stats = []
for f in files:
with open(f) as content_file:
content = content_file.read()
# Remove the blackquote notation from the paper link under the title
# for better layout in readthedocs
expr = r'(^## \s*?.*?\s+?)>\s*?(\[.*?\]\(.*?\))'
content = re.sub(expr, r'\1\2', content, flags=re.MULTILINE)
with open(f, 'w') as content_file:
content_file.write(content)
# title
title = content.split('\n')[0].replace('#', '')
# count papers
exclude_papertype = ['ABSTRACT', 'IMAGE']
exclude_expr = ''.join(f'(?!{s})' for s in exclude_papertype)
expr = rf'<!-- \[{exclude_expr}([A-Z]+?)\] -->'\
r'\s*\n.*?\btitle\s*=\s*{(.*?)}'
papers = {(papertype, titlecase.titlecase(paper.lower().strip()))
for (papertype, paper) in re.findall(expr, content, re.DOTALL)}
print(papers)
# paper links
revcontent = '\n'.join(list(reversed(content.splitlines())))
paperlinks = {}
for _, p in papers:
q = p.replace('\\', '\\\\').replace('?', '\\?')
paper_link = title2anchor(
re.search(
rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
revcontent, re.DOTALL | re.IGNORECASE).group(1))
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.md#{paper_link})'
paperlist = '\n'.join(
sorted(f' - [{t}] {paperlinks[x]}' for t, x in papers))
# count configs
configs = {
x.lower().strip()
for x in re.findall(r'https.*configs/.*\.py', content)
}
# count ckpts
ckpts = {
x.lower().strip()
for x in re.findall(r'https://download.*\.pth', content)
if 'mmocr' in x
}
statsmsg = f"""
### [{title}]({f})
* Number of checkpoints: {len(ckpts)}
* Number of configs: {len(configs)}
* Number of papers: {len(papers)}
{paperlist}
"""
stats.append((papers, configs, ckpts, statsmsg))
allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
msglist = '\n'.join(x for _, _, _, x in stats)
papertypes, papercounts = np.unique([t for t, _ in allpapers],
return_counts=True)
countstr = '\n'.join(
[f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
# get model list
weight_list = gen_weight_list()
modelzoo = f"""
# Overview
## Weights
Here are the list of weights available for
[Inference](user_guides/inference.md).
For the ease of reference, some weights may have shorter aliases, which will be
separated by `/` in the table.
For example, "`DB_r18 / dbnet_resnet18_fpnc_1200e_icdar2015`" means that you can
use either `DB_r18` or `dbnet_resnet18_fpnc_1200e_icdar2015`
to initialize the Inferencer:
```python
>>> from mmocr.apis import TextDetInferencer
>>> inferencer = TextDetInferencer(model='DB_r18')
>>> # equivalent to
>>> inferencer = TextDetInferencer(model='dbnet_resnet18_fpnc_1200e_icdar2015')
```
{weight_list}
## Statistics
* Number of checkpoints: {len(allckpts)}
* Number of configs: {len(allconfigs)}
* Number of papers: {len(allpapers)}
{countstr}
{msglist}
""" # noqa
with open('modelzoo.md', 'w') as f:
f.write(modelzoo)
|