Spaces:
Running
on
Zero
Running
on
Zero
Upload 140 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +16 -0
- custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/bug_report.md +37 -0
- custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
- custom_nodes/ComfyUI-N-Nodes/.github/workflows/publish.yml +21 -0
- custom_nodes/ComfyUI-N-Nodes/.gitignore +322 -0
- custom_nodes/ComfyUI-N-Nodes/LICENSE +21 -0
- custom_nodes/ComfyUI-N-Nodes/README.md +369 -0
- custom_nodes/ComfyUI-N-Nodes/__init__.py +74 -0
- custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-311.pyc +0 -0
- custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-312.pyc +0 -0
- custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-311.pyc +0 -0
- custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-312.pyc +0 -0
- custom_nodes/ComfyUI-N-Nodes/config.json +4 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-1.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-10.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-13.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-14.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-3.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-4.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-5.png +3 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-6.png +3 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-7.png +3 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-8.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image-9.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/img/image11.png +0 -0
- custom_nodes/ComfyUI-N-Nodes/install_extra.bat +15 -0
- custom_nodes/ComfyUI-N-Nodes/js/cte_advanced.js +213 -0
- custom_nodes/ComfyUI-N-Nodes/js/dynamicPrompt.js +44 -0
- custom_nodes/ComfyUI-N-Nodes/js/extended_widgets.js +329 -0
- custom_nodes/ComfyUI-N-Nodes/js/gptSampler.js +43 -0
- custom_nodes/ComfyUI-N-Nodes/js/include_css.js +22 -0
- custom_nodes/ComfyUI-N-Nodes/js/styles.css +20 -0
- custom_nodes/ComfyUI-N-Nodes/js/videoLoadAdvanced.js +142 -0
- custom_nodes/ComfyUI-N-Nodes/js/videoSave.js +87 -0
- custom_nodes/ComfyUI-N-Nodes/libs/__pycache__/joytag_models.cpython-311.pyc +0 -0
- custom_nodes/ComfyUI-N-Nodes/libs/joytag_models.py +1109 -0
- custom_nodes/ComfyUI-N-Nodes/libs/migrate.py +43 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/.gitignore +2 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/README.md +68 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/_sample.py +26 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg +3 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg +3 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-3.jpg +0 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-4.jpg +0 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-5.jpg +0 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/gradio_demo.py +36 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/.gitattributes +35 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/README.md +6 -0
- custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/__init__.py +2 -0
.gitattributes
CHANGED
@@ -1,2 +1,18 @@
|
|
1 |
/web/assets/** linguist-generated
|
2 |
/web/** linguist-vendored
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
/web/assets/** linguist-generated
|
2 |
/web/** linguist-vendored
|
3 |
+
custom_nodes/ComfyUI-N-Nodes/img/image-5.png filter=lfs diff=lfs merge=lfs -text
|
4 |
+
custom_nodes/ComfyUI-N-Nodes/img/image-6.png filter=lfs diff=lfs merge=lfs -text
|
5 |
+
custom_nodes/ComfyUI-N-Nodes/img/image-7.png filter=lfs diff=lfs merge=lfs -text
|
6 |
+
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg filter=lfs diff=lfs merge=lfs -text
|
7 |
+
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg filter=lfs diff=lfs merge=lfs -text
|
8 |
+
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/assets/demo-1.jpg filter=lfs diff=lfs merge=lfs -text
|
9 |
+
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/assets/demo-2.jpg filter=lfs diff=lfs merge=lfs -text
|
10 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_0.png filter=lfs diff=lfs merge=lfs -text
|
11 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_1.png filter=lfs diff=lfs merge=lfs -text
|
12 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text
|
13 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/i0.png filter=lfs diff=lfs merge=lfs -text
|
14 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/i1.png filter=lfs diff=lfs merge=lfs -text
|
15 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_0.png filter=lfs diff=lfs merge=lfs -text
|
16 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_1.png filter=lfs diff=lfs merge=lfs -text
|
17 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text
|
18 |
+
custom_nodes/ComfyUI-N-Nodes/libs/rifle/train_log/flownet.pkl filter=lfs diff=lfs merge=lfs -text
|
custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/bug_report.md
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Bug report
|
3 |
+
about: Create a report to help us improve
|
4 |
+
title: "[BUG]"
|
5 |
+
labels: bug
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Describe the bug**
|
11 |
+
A clear and concise description of what the bug is.
|
12 |
+
|
13 |
+
**To Reproduce**
|
14 |
+
Steps to reproduce the behavior:
|
15 |
+
1. Go to '...'
|
16 |
+
2. Click on '....'
|
17 |
+
3. Scroll down to '....'
|
18 |
+
4. See error
|
19 |
+
|
20 |
+
**Expected behavior**
|
21 |
+
A clear and concise description of what you expected to happen.
|
22 |
+
|
23 |
+
**Full log**
|
24 |
+
This is MANDATORY. By log I mean all the text in the console from the time ComfyUI was started until the time of the reported bug.
|
25 |
+
>>Bug reports that do not have this log will be closed.<<
|
26 |
+
|
27 |
+
**Screenshots**
|
28 |
+
If applicable, add screenshots to help explain your problem.
|
29 |
+
|
30 |
+
**Desktop (please complete the following information):**
|
31 |
+
- OS: [e.g. iOS]
|
32 |
+
- Browser [e.g. chrome, safari]
|
33 |
+
- Version [e.g. 22]
|
34 |
+
|
35 |
+
|
36 |
+
**Additional context**
|
37 |
+
Add any other context about the problem here.
|
custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/feature_request.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Feature request
|
3 |
+
about: Suggest an idea for this project
|
4 |
+
title: ''
|
5 |
+
labels: enhancement
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Is your feature request related to a problem? Please describe.**
|
11 |
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
12 |
+
|
13 |
+
**Describe the solution you'd like**
|
14 |
+
A clear and concise description of what you want to happen.
|
15 |
+
|
16 |
+
**Describe alternatives you've considered**
|
17 |
+
A clear and concise description of any alternative solutions or features you've considered.
|
18 |
+
|
19 |
+
**Additional context**
|
20 |
+
Add any other context or screenshots about the feature request here.
|
custom_nodes/ComfyUI-N-Nodes/.github/workflows/publish.yml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Publish to Comfy registry
|
2 |
+
on:
|
3 |
+
workflow_dispatch:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
paths:
|
8 |
+
- "pyproject.toml"
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
publish-node:
|
12 |
+
name: Publish Custom Node to registry
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
steps:
|
15 |
+
- name: Check out code
|
16 |
+
uses: actions/checkout@v4
|
17 |
+
- name: Publish Custom Node
|
18 |
+
uses: Comfy-Org/publish-node-action@main
|
19 |
+
with:
|
20 |
+
## Add your own personal access token to your Github Repository secrets and reference it here.
|
21 |
+
personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
|
custom_nodes/ComfyUI-N-Nodes/.gitignore
ADDED
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
libs/moondream_repo
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
# Byte-compiled / optimized / DLL files
|
163 |
+
__pycache__/
|
164 |
+
*.py[cod]
|
165 |
+
*$py.class
|
166 |
+
|
167 |
+
# C extensions
|
168 |
+
*.so
|
169 |
+
|
170 |
+
# Distribution / packaging
|
171 |
+
.Python
|
172 |
+
build/
|
173 |
+
develop-eggs/
|
174 |
+
dist/
|
175 |
+
downloads/
|
176 |
+
eggs/
|
177 |
+
.eggs/
|
178 |
+
lib/
|
179 |
+
lib64/
|
180 |
+
parts/
|
181 |
+
sdist/
|
182 |
+
var/
|
183 |
+
wheels/
|
184 |
+
share/python-wheels/
|
185 |
+
*.egg-info/
|
186 |
+
.installed.cfg
|
187 |
+
*.egg
|
188 |
+
MANIFEST
|
189 |
+
|
190 |
+
# PyInstaller
|
191 |
+
# Usually these files are written by a python script from a template
|
192 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
193 |
+
*.manifest
|
194 |
+
*.spec
|
195 |
+
|
196 |
+
# Installer logs
|
197 |
+
pip-log.txt
|
198 |
+
pip-delete-this-directory.txt
|
199 |
+
|
200 |
+
# Unit test / coverage reports
|
201 |
+
htmlcov/
|
202 |
+
.tox/
|
203 |
+
.nox/
|
204 |
+
.coverage
|
205 |
+
.coverage.*
|
206 |
+
.cache
|
207 |
+
nosetests.xml
|
208 |
+
coverage.xml
|
209 |
+
*.cover
|
210 |
+
*.py,cover
|
211 |
+
.hypothesis/
|
212 |
+
.pytest_cache/
|
213 |
+
cover/
|
214 |
+
|
215 |
+
# Translations
|
216 |
+
*.mo
|
217 |
+
*.pot
|
218 |
+
|
219 |
+
# Django stuff:
|
220 |
+
*.log
|
221 |
+
local_settings.py
|
222 |
+
db.sqlite3
|
223 |
+
db.sqlite3-journal
|
224 |
+
|
225 |
+
# Flask stuff:
|
226 |
+
instance/
|
227 |
+
.webassets-cache
|
228 |
+
|
229 |
+
# Scrapy stuff:
|
230 |
+
.scrapy
|
231 |
+
|
232 |
+
# Sphinx documentation
|
233 |
+
docs/_build/
|
234 |
+
|
235 |
+
# PyBuilder
|
236 |
+
.pybuilder/
|
237 |
+
target/
|
238 |
+
|
239 |
+
# Jupyter Notebook
|
240 |
+
.ipynb_checkpoints
|
241 |
+
|
242 |
+
# IPython
|
243 |
+
profile_default/
|
244 |
+
ipython_config.py
|
245 |
+
|
246 |
+
# pyenv
|
247 |
+
# For a library or package, you might want to ignore these files since the code is
|
248 |
+
# intended to run in multiple environments; otherwise, check them in:
|
249 |
+
# .python-version
|
250 |
+
|
251 |
+
# pipenv
|
252 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
253 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
254 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
255 |
+
# install all needed dependencies.
|
256 |
+
#Pipfile.lock
|
257 |
+
|
258 |
+
# poetry
|
259 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
260 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
261 |
+
# commonly ignored for libraries.
|
262 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
263 |
+
#poetry.lock
|
264 |
+
|
265 |
+
# pdm
|
266 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
267 |
+
#pdm.lock
|
268 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
269 |
+
# in version control.
|
270 |
+
# https://pdm.fming.dev/#use-with-ide
|
271 |
+
.pdm.toml
|
272 |
+
|
273 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
274 |
+
__pypackages__/
|
275 |
+
|
276 |
+
# Celery stuff
|
277 |
+
celerybeat-schedule
|
278 |
+
celerybeat.pid
|
279 |
+
|
280 |
+
# SageMath parsed files
|
281 |
+
*.sage.py
|
282 |
+
|
283 |
+
# Environments
|
284 |
+
.env
|
285 |
+
.venv
|
286 |
+
env/
|
287 |
+
venv/
|
288 |
+
ENV/
|
289 |
+
env.bak/
|
290 |
+
venv.bak/
|
291 |
+
|
292 |
+
# Spyder project settings
|
293 |
+
.spyderproject
|
294 |
+
.spyproject
|
295 |
+
|
296 |
+
# Rope project settings
|
297 |
+
.ropeproject
|
298 |
+
|
299 |
+
# mkdocs documentation
|
300 |
+
/site
|
301 |
+
|
302 |
+
# mypy
|
303 |
+
.mypy_cache/
|
304 |
+
.dmypy.json
|
305 |
+
dmypy.json
|
306 |
+
|
307 |
+
# Pyre type checker
|
308 |
+
.pyre/
|
309 |
+
|
310 |
+
# pytype static type analyzer
|
311 |
+
.pytype/
|
312 |
+
|
313 |
+
# Cython debug symbols
|
314 |
+
cython_debug/
|
315 |
+
|
316 |
+
# PyCharm
|
317 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
318 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
319 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
320 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
321 |
+
#.idea/
|
322 |
+
*.pyc
|
custom_nodes/ComfyUI-N-Nodes/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 pythongosssss
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
custom_nodes/ComfyUI-N-Nodes/README.md
ADDED
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[](https://ko-fi.com/C0C0AJECJ)
|
2 |
+
|
3 |
+
# ComfyUI-N-Suite
|
4 |
+
A suite of custom nodes for ComfyUI that includes Integer, string and float variable nodes, GPT nodes and video nodes.
|
5 |
+
|
6 |
+
> [!IMPORTANT]
|
7 |
+
> These nodes were tested primarily in Windows in the default environment provided by ComfyUI and in the environment created by the [notebook](https://github.com/comfyanonymous/ComfyUI/blob/master/notebooks/comfyui_colab.ipynb) for paperspace specifically with the cyberes/gradient-base-py3.10:latest docker image.
|
8 |
+
**Any other environment has not been tested.**
|
9 |
+
|
10 |
+
|
11 |
+
# Installation
|
12 |
+
|
13 |
+
1. Clone the repository:
|
14 |
+
`git clone https://github.com/Nuked88/ComfyUI-N-Nodes.git`
|
15 |
+
to your ComfyUI `custom_nodes` directory
|
16 |
+
|
17 |
+
2. ~~IMPORTANT: If you want the GPT nodes on GPU you'll need to run **install_dependency bat files**.
|
18 |
+
There are 2 versions: ***install_dependency_ggml_models.bat*** for the old ggmlv3 models and ***install_dependency_gguf_models.bat*** for all the new models (GGUF).
|
19 |
+
YOU CAN ONLY USE ONE OF THEM AT A TIME!
|
20 |
+
Since _llama-cpp-python_ needs to be compiled from source code to enable it to use the GPU, you will first need to have [CUDA](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64) and visual studio 2019 or 2022 (in the case of my bat) installed to compile it. For details and the full guide you can go [HERE](https://github.com/abetlen/llama-cpp-python).~~
|
21 |
+
|
22 |
+
3. If you intend to use GPTLoaderSimple with the Moondream model, you'll need to execute the 'install_extra.bat' script, which will install transformers version 4.36.2.
|
23 |
+
4. Reboot ComfyUI
|
24 |
+
|
25 |
+
In case you need to revert these changes (due to incompatibility with other nodes), you can utilize the 'remove_extra.bat' script.
|
26 |
+
|
27 |
+
ComfyUI will automatically load all custom scripts and nodes at startup.
|
28 |
+
|
29 |
+
> [!NOTE]
|
30 |
+
> The llama-cpp-python installation will be done automatically by the script. If you have an NVIDIA GPU NO MORE CUDA BUILD IS NECESSARY thanks to [jllllll](https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/) repo. I've also dropped the support to GGMLv3 models since all notable models should have switched to the latest version of GGUF by now.
|
31 |
+
|
32 |
+
|
33 |
+
> [!NOTE]
|
34 |
+
> Since 14/02/2024, the node has undergone a massive rewrite, which also led to the change of all node names in order to avoid any conflicts with other extensions in the future (or at least I hope so). Consequently, the old workflows are no longer compatible and will require manual replacement of each node.
|
35 |
+
> To avoid this, I have created a tool that allows for automatic replacement.
|
36 |
+
> On Windows, simply drag any *.json workflow onto the migrate.bat file located in (custom_nodes/ComfyUI-N-Nodes), and another workflow with the suffix _migrated will be created in the same folder as the current workflow.
|
37 |
+
> On Linux, you can use the script in the following way: python libs/migrate.py path/to/original/workflow/.
|
38 |
+
> For security reasons, the original workflow will not be deleted."
|
39 |
+
> For install the last version of this repository before this changes from the Comfyui-N-Suite execute **git checkout 29b2e43baba81ee556b2930b0ca0a9c978c47083**
|
40 |
+
|
41 |
+
|
42 |
+
- For uninstallation:
|
43 |
+
- Delete the `ComfyUI-N-Nodes` folder in `custom_nodes`
|
44 |
+
- Delete the `comfyui-n-nodes` folder in `ComfyUI\web\extensions`
|
45 |
+
- Delete the `n-styles.csv` and `n-styles.csv.backup` file in `ComfyUI\styles`
|
46 |
+
- Delete the `GPTcheckpoints` folder in `ComfyUI\models`
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
# Update
|
53 |
+
1. Navigate to the cloned repo e.g. `custom_nodes/ComfyUI-N-Nodes`
|
54 |
+
2. `git pull`
|
55 |
+
|
56 |
+
# Features
|
57 |
+
|
58 |
+
## 📽️ Video Nodes 📽️
|
59 |
+
|
60 |
+
### LoadVideo
|
61 |
+
|
62 |
+

|
63 |
+
|
64 |
+
The LoadVideoAdvanced node allows loading a video file and extracting frames from it.
|
65 |
+
The name has been changed from `LoadVideo` to `LoadVideoAdvanced` in order to avoid conflicts with the `LoadVideo` animatediff node.
|
66 |
+
|
67 |
+
|
68 |
+
#### Input Fields
|
69 |
+
- `video`: Select the video file to load.
|
70 |
+
- `framerate`: Choose whether to keep the original framerate or reduce to half or quarter speed.
|
71 |
+
- `resize_by`: Select how to resize frames - 'none', 'height', or 'width'.
|
72 |
+
- `size`: Target size if resizing by height or width.
|
73 |
+
- `images_limit`: Limit number of frames to extract.
|
74 |
+
- `batch_size`: Batch size for encoding frames.
|
75 |
+
- `starting_frame`: Select which frame to start from.
|
76 |
+
- `autoplay`: Select whether to autoplay the video.
|
77 |
+
- `use_ram`: Use RAM instead of disk for decompressing video frames.
|
78 |
+
|
79 |
+
#### Output
|
80 |
+
|
81 |
+
- `IMAGES`: Extracted frame images as PyTorch tensors.
|
82 |
+
- `LATENT`: Empty latent vectors.
|
83 |
+
- `METADATA`: Video metadata - FPS and number of frames.
|
84 |
+
- `WIDTH:` Frame width.
|
85 |
+
- `HEIGHT`: Frame height.
|
86 |
+
- `META_FPS`: Frame rate.
|
87 |
+
- `META_N_FRAMES`: Number of frames.
|
88 |
+
|
89 |
+
|
90 |
+
The node extracts frames from the input video at the specified framerate. It resizes frames if chosen and returns them as batches of PyTorch image tensors along with latent vectors, metadata, and frame dimensions.
|
91 |
+
|
92 |
+
### SaveVideo
|
93 |
+
The SaveVideo node takes in extracted frames and saves them back as a video file.
|
94 |
+

|
95 |
+
|
96 |
+
#### Input Fields
|
97 |
+
- `images`: Frame images as tensors.
|
98 |
+
- `METADATA`: Metadata from LoadVideo node.
|
99 |
+
- `SaveVideo`: Toggle saving output video file.
|
100 |
+
- `SaveFrames`: Toggle saving frames to a folder.
|
101 |
+
- `CompressionLevel`: PNG compression level for saving frames.
|
102 |
+
#### Output
|
103 |
+
Saves output video file and/or extracted frames.
|
104 |
+
|
105 |
+
The node takes extracted frames and metadata and can save them as a new video file and/or individual frame images. Video compression and frame PNG compression can be configured.
|
106 |
+
NOTE: If you are using **LoadVideo** as source of the frames, the audio of the original file will be maintained but only in case **images_limit** and **starting_frame** are equal to Zero.
|
107 |
+
|
108 |
+
### LoadFramesFromFolder
|
109 |
+

|
110 |
+
|
111 |
+
The LoadFramesFromFolder node allows loading image frames from a folder and returning them as a batch.
|
112 |
+
|
113 |
+
|
114 |
+
#### Input Fields
|
115 |
+
- `folder`: Path to the folder containing the frame images.Must be png format, named with a number (eg. 1.png or even 0001.png).The images will be loaded sequentially.
|
116 |
+
- `fps`: Frames per second to assign to the loaded frames.
|
117 |
+
|
118 |
+
#### Output
|
119 |
+
- `IMAGES`: Batch of loaded frame images as PyTorch tensors.
|
120 |
+
- `METADATA`: Metadata containing the set FPS value.
|
121 |
+
- `MAX_WIDTH`: Maximum frame width.
|
122 |
+
- `MAX_HEIGHT`: Maximum frame height.
|
123 |
+
- `FRAME COUNT`: Number of frames in the folder.
|
124 |
+
- `PATH`: Path to the folder containing the frame images.
|
125 |
+
- `IMAGE LIST`: List of frame images in the folder (not a real list just a string divided by \n).
|
126 |
+
|
127 |
+
The node loads all image files from the specified folder, converts them to PyTorch tensors, and returns them as a batched tensor along with simple metadata containing the set FPS value.
|
128 |
+
|
129 |
+
This allows easily loading a set of frames that were extracted and saved previously, for example, to reload and process them again. By setting the FPS value, the frames can be properly interpreted as a video sequence.
|
130 |
+
|
131 |
+
### SetMetadataForSaveVideo
|
132 |
+
|
133 |
+

|
134 |
+
|
135 |
+
The SetMetadataForSaveVideo node allows setting metadata for the SaveVideo node.
|
136 |
+
|
137 |
+
### FrameInterpolator
|
138 |
+
|
139 |
+

|
140 |
+
|
141 |
+
The FrameInterpolator node allows interpolating between extracted video frames to increase the frame rate and smooth motion.
|
142 |
+
|
143 |
+
|
144 |
+
#### Input Fields
|
145 |
+
|
146 |
+
- `images`: Extracted frame images as tensors.
|
147 |
+
- `METADATA`: Metadata from video - FPS and number of frames.
|
148 |
+
- `multiplier`: Factor by which to increase frame rate.
|
149 |
+
|
150 |
+
#### Output
|
151 |
+
|
152 |
+
- `IMAGES`: Interpolated frames as image tensors.
|
153 |
+
- `METADATA`: Updated metadata with new frame rate.
|
154 |
+
|
155 |
+
The node takes extracted frames and metadata as input. It uses an interpolation model (RIFE) to generate additional in-between frames at a higher frame rate.
|
156 |
+
|
157 |
+
The original frame rate in the metadata is multiplied by the `multiplier` value to get the new interpolated frame rate.
|
158 |
+
|
159 |
+
The interpolated frames are returned as a batch of image tensors, along with updated metadata containing the new frame rate.
|
160 |
+
|
161 |
+
This allows increasing the frame rate of an existing video to achieve smoother motion and slower playback. The interpolation model creates new realistic frames to fill in the gaps rather than just duplicating existing frames.
|
162 |
+
|
163 |
+
The original code has been taken from [HERE](https://github.com/hzwer/Practical-RIFE/tree/main)
|
164 |
+
|
165 |
+
## Variables
|
166 |
+
Since the primitive node has limitations in links (for example at the time i'm writing you cannot link "start_at_step" and "steps" of another ksampler toghether), I decided to create these simple node-variables to bypass this limitation
|
167 |
+
The node-variables are:
|
168 |
+
- Integer
|
169 |
+
- Float
|
170 |
+
- String
|
171 |
+
|
172 |
+
|
173 |
+
## 🤖 GPTLoaderSimple and GPTSampler 🤖
|
174 |
+
|
175 |
+
These custom nodes are designed to enhance the capabilities of the ConfyUI framework by enabling text generation using GGUF GPT models. This README provides an overview of the two custom nodes and their usage within ConfyUI.
|
176 |
+
|
177 |
+
You can add in the _extra_model_paths.yaml_ the path where your model GGUF are in this way (example):
|
178 |
+
|
179 |
+
`other_ui:
|
180 |
+
base_path: I:\\text-generation-webui
|
181 |
+
GPTcheckpoints: models/`
|
182 |
+
|
183 |
+
Otherwise it will create a GPTcheckpoints folder in the model folder of ComfyUI where you can place your .gguf models.
|
184 |
+
|
185 |
+
Two folders have also been created within the 'Llava' directory in the 'GPTcheckpoints' folder for the LLava model:
|
186 |
+
|
187 |
+
`clips`: This folder is designated for storing the clips for your LLava models (usually, files that start with **mm** in the repository).
|
188 |
+
`models`: This folder is designated for storing the LLava models.
|
189 |
+
|
190 |
+
This nodes actually supports 4 different models:
|
191 |
+
- All the GGUF supported by [llama.cpp](https://github.com/ggerganov/llama.cpp)
|
192 |
+
- Llava
|
193 |
+
- Moondream
|
194 |
+
- Joytag
|
195 |
+
|
196 |
+
|
197 |
+
#### GGUF LLM
|
198 |
+
|
199 |
+
The GGUF models can be downloaded from the [Huggingface Hub](https://huggingface.co/models?search=gguf)
|
200 |
+
|
201 |
+
[HERE](https://www.youtube.com/watch?v=gzTqXbF0S-w) a video of an example of how to use the GGUF models by [boricuapab](https://github.com/boricuapab)
|
202 |
+
|
203 |
+
|
204 |
+
#### Llava
|
205 |
+
Here a small list of the models supported by this nodes:
|
206 |
+
|
207 |
+
[LlaVa 1.5 7B](https://huggingface.co/mys/ggml_llava-v1.5-7b/)
|
208 |
+
[LlaVa 1.5 13B](https://huggingface.co/mys/ggml_llava-v1.5-13b)
|
209 |
+
[LlaVa 1.6 Mistral 7B](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/)
|
210 |
+
[BakLLaVa](https://huggingface.co/mys/ggml_bakllava-1)
|
211 |
+
[Nous Hermes 2 Vision](https://huggingface.co/billborkowski/llava-NousResearch_Nous-Hermes-2-Vision-GGUF)
|
212 |
+
|
213 |
+
####Example with Llava model:
|
214 |
+

|
215 |
+
|
216 |
+
#### Moondream
|
217 |
+
The model will be automatically downloaded when you run the first time.
|
218 |
+
Anyway, it is available [HERE](https://huggingface.co/vikhyatk/moondream1/tree/main)
|
219 |
+
The code taken from [this repository](https://github.com/vikhyat/moondream)
|
220 |
+
|
221 |
+
####Example with Moondream model:
|
222 |
+

|
223 |
+
|
224 |
+
#### Joytag
|
225 |
+
The model will be automatically downloaded when you run the first time.
|
226 |
+
Anyway, it is available [HERE](https://huggingface.co/fancyfeast/joytag/tree/main)
|
227 |
+
The code taken from [this repository](https://github.com/fpgaminer/joytag)
|
228 |
+
|
229 |
+
####Example with Joytag model:
|
230 |
+

|
231 |
+
|
232 |
+
### GPTLoaderSimple
|
233 |
+
|
234 |
+

|
235 |
+
|
236 |
+
The `GPTLoaderSimple` node is responsible for loading GPT model checkpoints and creating an instance of the Llama library for text generation. It provides an interface to configure GPU layers, the number of threads, and maximum context for text generation.
|
237 |
+
|
238 |
+
|
239 |
+
|
240 |
+
#### Input Fields
|
241 |
+
|
242 |
+
- `ckpt_name`: Select the GPT checkpoint name from the available options (joytag and moondream will be automatically downloaded used the first time).
|
243 |
+
- `gpu_layers`: Specify the number of GPU layers to use (default: 27).
|
244 |
+
- `n_threads`: Specify the number of threads for text generation (default: 8).
|
245 |
+
- `max_ctx`: Specify the maximum context length for text generation (default: 2048).
|
246 |
+
|
247 |
+
#### Output
|
248 |
+
|
249 |
+
The node returns an instance of the Llama library (MODEL) and the path to the loaded checkpoint (STRING).
|
250 |
+
|
251 |
+
### GPTSampler
|
252 |
+
|
253 |
+

|
254 |
+
|
255 |
+
The `GPTSampler` node facilitates text generation using GPT models based on the input prompt and various generation parameters. It allows you to control aspects like temperature, top-p sampling, penalties, and more.
|
256 |
+
|
257 |
+
|
258 |
+
#### Input Fields
|
259 |
+
|
260 |
+
- `prompt`: Enter the input prompt for text generation.
|
261 |
+
- `image`: Image input for Joytag, moondream and llava models.
|
262 |
+
- `model`: Choose the GPT model to use for text generation.
|
263 |
+
- `max_tokens`: Set the maximum number of tokens in the generated text (default: 128).
|
264 |
+
- `temperature`: Set the temperature parameter for randomness (default: 0.7).
|
265 |
+
- `top_p`: Set the top-p probability for nucleus sampling (default: 0.5).
|
266 |
+
- `logprobs`: Specify the number of log probabilities to output (default: 0).
|
267 |
+
- `echo`: Enable or disable printing the input prompt alongside the generated text.
|
268 |
+
- `stop_token`: Specify the token at which text generation stops.
|
269 |
+
- `frequency_penalty`, `presence_penalty`, `repeat_penalty`: Control word generation penalties.
|
270 |
+
- `top_k`: Set the top-k tokens to consider during generation (default: 40).
|
271 |
+
- `tfs_z`: Set the temperature scaling factor for top frequent samples (default: 1.0).
|
272 |
+
- `print_output`: Enable or disable printing the generated text to the console.
|
273 |
+
- `cached`: Choose whether to use cached generation (default: NO).
|
274 |
+
- `prefix`, `suffix`: Specify text to prepend and append to the prompt.
|
275 |
+
- `max_tags`: This only affect the max number of tags generated by joydag.
|
276 |
+
|
277 |
+
#### Output
|
278 |
+
|
279 |
+
The node returns the generated text along with a UI-friendly representation.
|
280 |
+
|
281 |
+
|
282 |
+
## Image Pad For Outpainting Advanced
|
283 |
+

|
284 |
+
|
285 |
+
The `ImagePadForOutpaintingAdvanced` node is an alternative to the `ImagePadForOutpainting` node that applies the technique seen in [this video](https://www.youtube.com/@robadams2451) under the outpainting mask.
|
286 |
+
The color correction part was taken from [this](https://github.com/sipherxyz/comfyui-art-venture) custom node from Sipherxyz
|
287 |
+
|
288 |
+
#### Input Fields
|
289 |
+
|
290 |
+
- `image`: Image input.
|
291 |
+
- `left`: pixel to extend from left,
|
292 |
+
- `top`: pixel to extend from top,
|
293 |
+
- `right`: pixel to extend from right,
|
294 |
+
- `bottom`: pixel to extend from bottom.
|
295 |
+
- `feathering`: feathering strength
|
296 |
+
- `noise`: blend strenght from noise and the copied border
|
297 |
+
- `pixel_size`: how big will be the pixel in the pixellated effect
|
298 |
+
- `pixel_to_copy`: how many pixels to copy (from each side)
|
299 |
+
- `temperature`: color correction setting that is only applied to the mask part.
|
300 |
+
- `hue`: color correction setting that is only applied to the mask part.
|
301 |
+
- `brightness`: color correction setting that is only applied to the mask part.
|
302 |
+
- `contrast`: color correction setting that is only applied to the mask part.
|
303 |
+
- `saturation`: color correction setting that is only applied to the mask part.
|
304 |
+
- `gamma`: color correction setting that is only applied to the mask part.
|
305 |
+
|
306 |
+
#### Output
|
307 |
+
|
308 |
+
The node returns the processed image and the mask.
|
309 |
+
|
310 |
+
## Dynamic Prompt
|
311 |
+
|
312 |
+

|
313 |
+
|
314 |
+
The `DynamicPrompt` node generates prompts by combining a fixed prompt with a random selection of tags from a variable prompt. This enables flexible and dynamic prompt generation for various use cases.
|
315 |
+
|
316 |
+
#### Input Fields
|
317 |
+
|
318 |
+
- `variable_prompt`: Enter the variable prompt for tag selection.
|
319 |
+
- `cached`: Choose whether to cache the generated prompt (default: NO).
|
320 |
+
- `number_of_random_tag`: Choose between "Fixed" and "Random" for the number of random tags to include.
|
321 |
+
- `fixed_number_of_random_tag`: If `number_of_random_tag` if "Fixed" Specify the number of random tags to include (default: 1).
|
322 |
+
- `fixed_prompt` (Optional): Enter the fixed prompt for generating the final prompt.
|
323 |
+
|
324 |
+
#### Output
|
325 |
+
|
326 |
+
The node returns the generated prompt, which is a combination of the fixed prompt and selected random tags.
|
327 |
+
|
328 |
+
#### Example Usage
|
329 |
+
|
330 |
+
- Just fill the `variable_prompt` field with tag comma separated, the `fixed_prompt` is optional
|
331 |
+
|
332 |
+
|
333 |
+
## CLIP Text Encode Advanced (Experimental)
|
334 |
+
|
335 |
+

|
336 |
+
|
337 |
+
The `CLIP Text Encode Advanced` node is an alternative to the standard `CLIP Text Encode` node. It offers support for Add/Replace/Delete styles, allowing for the inclusion of both positive and negative prompts within a single node.
|
338 |
+
|
339 |
+
The base style file is called `n-styles.csv` and is located in the `ComfyUI\styles` folder.
|
340 |
+
The styles file follows the same format as the current `styles.csv` file utilized in A1111 (at the time of writing).
|
341 |
+
|
342 |
+
NOTE: this note is experimental and still have alot of bugs
|
343 |
+
|
344 |
+
#### Input Fields
|
345 |
+
|
346 |
+
- `clip`: clip input
|
347 |
+
- `style`: it will automatically fill the positive and negative prompts based on the choosen style
|
348 |
+
|
349 |
+
#### Output
|
350 |
+
- `positive`: positive conditions
|
351 |
+
- `negative`: negative conditions
|
352 |
+
|
353 |
+
|
354 |
+
|
355 |
+
|
356 |
+
|
357 |
+
|
358 |
+
## Troubleshooting
|
359 |
+
|
360 |
+
- ~~**SaveVideo - Preview not working**: is related to a conflict with animateDiff, i've already opened a [PR](https://github.com/ArtVentureX/comfyui-animatediff/pull/64) to solve this issue. Meanwhile you can download my patched version from [here](https://github.com/Nuked88/comfyui-animatediff)~~ pull has been merged so this problem should be fixed now!
|
361 |
+
|
362 |
+
## Contributing
|
363 |
+
|
364 |
+
Feel free to contribute to this project by reporting issues or suggesting improvements. Open an issue or submit a pull request on the GitHub repository.
|
365 |
+
|
366 |
+
## License
|
367 |
+
|
368 |
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
369 |
+
|
custom_nodes/ComfyUI-N-Nodes/__init__.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import importlib.util
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
from .nnodes import init, get_ext_dir,check_and_install,downloader,get_commit,color
|
5 |
+
import folder_paths
|
6 |
+
import traceback
|
7 |
+
from pathlib import Path
|
8 |
+
NODE_CLASS_MAPPINGS = {}
|
9 |
+
NODE_DISPLAY_NAME_MAPPINGS = {}
|
10 |
+
|
11 |
+
|
12 |
+
if init():
|
13 |
+
print("------------------------------------------")
|
14 |
+
print(f"{color.BLUE}### N-Suite Revision:{color.END} {color.GREEN}{get_commit()} {color.END}")
|
15 |
+
py = Path(get_ext_dir("py"))
|
16 |
+
files = list(py.glob("*.py"))
|
17 |
+
check_and_install('packaging')
|
18 |
+
check_and_install('py-cpuinfo',"cpuinfo")
|
19 |
+
check_and_install('gitpython','git')
|
20 |
+
check_and_install('moviepy')
|
21 |
+
check_and_install("opencv-python","cv2")
|
22 |
+
check_and_install('scikit-build',"skbuild")
|
23 |
+
#LLAMA DEPENTENCIES
|
24 |
+
check_and_install('typing')
|
25 |
+
check_and_install('diskcache')
|
26 |
+
check_and_install('llama_cpp')
|
27 |
+
check_and_install('timm',"timm","0.9.12",reboot=True)
|
28 |
+
#check_and_install('gitpython',"git")
|
29 |
+
#check_and_install('sentencepiece')
|
30 |
+
#check_and_install("accelerate")
|
31 |
+
#check_and_install('transformers','transformers',"4.36.2")
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
#git clone https://github.com/hzwer/Practical-RIFE.git
|
37 |
+
from git import Repo
|
38 |
+
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle")):
|
39 |
+
Repo.clone_from("https://github.com/hzwer/Practical-RIFE.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle"))
|
40 |
+
|
41 |
+
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo")):
|
42 |
+
repo = Repo.clone_from("https://github.com/Nuked88/moondream.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
|
43 |
+
|
44 |
+
#commit_hash = "38af98596e59f2a6c25c6b52b2bd5a672dab4144"
|
45 |
+
#repo.git.checkout(commit_hash)
|
46 |
+
|
47 |
+
#if file moondream.py not exist
|
48 |
+
#if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo","moondream","moondream.py")):
|
49 |
+
# #delete moondream_repo and download repo again
|
50 |
+
# shutil.rmtree(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
|
51 |
+
# repo = Repo.clone_from("https://github.com/Nuked88/moondream.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
|
52 |
+
|
53 |
+
#if train_log folder not exists
|
54 |
+
if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle","train_log")):
|
55 |
+
downloader("https://github.com/Nuked88/DreamingAI/raw/main/RIFE_trained_model_v4.7.zip")
|
56 |
+
|
57 |
+
# code based on pysssss repo
|
58 |
+
for file in files:
|
59 |
+
try:
|
60 |
+
name = os.path.splitext(file)[0]
|
61 |
+
spec = importlib.util.spec_from_file_location(name, os.path.join(py, file))
|
62 |
+
module = importlib.util.module_from_spec(spec)
|
63 |
+
sys.modules[name] = module
|
64 |
+
spec.loader.exec_module(module)
|
65 |
+
if hasattr(module, "NODE_CLASS_MAPPINGS") and getattr(module, "NODE_CLASS_MAPPINGS") is not None:
|
66 |
+
NODE_CLASS_MAPPINGS.update(module.NODE_CLASS_MAPPINGS)
|
67 |
+
if hasattr(module, "NODE_DISPLAY_NAME_MAPPINGS") and getattr(module, "NODE_DISPLAY_NAME_MAPPINGS") is not None:
|
68 |
+
NODE_DISPLAY_NAME_MAPPINGS.update(module.NODE_DISPLAY_NAME_MAPPINGS)
|
69 |
+
except Exception as e:
|
70 |
+
traceback.print_exc()
|
71 |
+
|
72 |
+
|
73 |
+
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
74 |
+
WEB_DIRECTORY = "./js"
|
custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (4.69 kB). View file
|
|
custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (4.32 kB). View file
|
|
custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-311.pyc
ADDED
Binary file (20.1 kB). View file
|
|
custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-312.pyc
ADDED
Binary file (17.8 kB). View file
|
|
custom_nodes/ComfyUI-N-Nodes/config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "N-Suite",
|
3 |
+
"logging": false
|
4 |
+
}
|
custom_nodes/ComfyUI-N-Nodes/img/image-1.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-10.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-13.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-14.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-3.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-4.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-5.png
ADDED
![]() |
Git LFS Details
|
custom_nodes/ComfyUI-N-Nodes/img/image-6.png
ADDED
![]() |
Git LFS Details
|
custom_nodes/ComfyUI-N-Nodes/img/image-7.png
ADDED
![]() |
Git LFS Details
|
custom_nodes/ComfyUI-N-Nodes/img/image-8.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image-9.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/img/image11.png
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/install_extra.bat
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
set "python_exec=..\..\..\python_embeded\python.exe"
|
3 |
+
|
4 |
+
echo Installing dependency for moondream_repo...
|
5 |
+
if exist "%python_exec%" (
|
6 |
+
echo Installing with ComfyUI Portable
|
7 |
+
"%python_exec%" -s -m pip install transformers==4.36.2
|
8 |
+
echo Done. Please reboot ComfyUI.
|
9 |
+
) else (
|
10 |
+
echo Installing with system Python
|
11 |
+
pip install transformers==4.36.2
|
12 |
+
echo Done. Please reboot ComfyUI.
|
13 |
+
)
|
14 |
+
|
15 |
+
pause
|
custom_nodes/ComfyUI-N-Nodes/js/cte_advanced.js
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { app } from "/scripts/app.js";
|
2 |
+
import { api } from "/scripts/api.js"
|
3 |
+
const MultilineSymbol = Symbol();
|
4 |
+
const MultilineResizeSymbol = Symbol();
|
5 |
+
|
6 |
+
function getStyles(name) {
|
7 |
+
//console.log("getStyles called " + name);
|
8 |
+
|
9 |
+
return api.fetchApi('/nsuite/styles')
|
10 |
+
.then(response => response.json())
|
11 |
+
.then(data => {
|
12 |
+
// Eseguire l'elaborazione dei dati
|
13 |
+
const styles = data.styles;
|
14 |
+
//console.log('Styles:', styles);
|
15 |
+
let positive_prompt = "";
|
16 |
+
let negative_prompt = "";
|
17 |
+
|
18 |
+
// Funzione per ottenere positive_prompt e negative_prompt dato il name
|
19 |
+
for (let i = 0; i < styles[0].length; i++) {
|
20 |
+
const style = styles[0][i];
|
21 |
+
if (style.name === name) {
|
22 |
+
positive_prompt = style.prompt;
|
23 |
+
negative_prompt = style.negative_prompt;
|
24 |
+
//console.log('Style:', style.name);
|
25 |
+
break;
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
if (positive_prompt !== "") {
|
30 |
+
//console.log("Positive prompt:", positive_prompt);
|
31 |
+
//console.log("Negative prompt:", negative_prompt);
|
32 |
+
return { positive_prompt: positive_prompt, negative_prompt: negative_prompt };
|
33 |
+
} else {
|
34 |
+
return { positive_prompt: "", negative_prompt: "" };
|
35 |
+
}
|
36 |
+
})
|
37 |
+
.catch(error => {
|
38 |
+
console.error('Error:', error);
|
39 |
+
throw error; // Rilancia l'errore per consentire al chiamante di gestirlo
|
40 |
+
});
|
41 |
+
}
|
42 |
+
|
43 |
+
function addStyles(name, positive_prompt, negative_prompt) {
|
44 |
+
return api.fetchApi('/nsuite/styles/add', {
|
45 |
+
method: 'POST',
|
46 |
+
headers: {
|
47 |
+
'Content-Type': 'application/json',
|
48 |
+
},
|
49 |
+
body: JSON.stringify({
|
50 |
+
name: name,
|
51 |
+
positive_prompt: positive_prompt,
|
52 |
+
negative_prompt: negative_prompt
|
53 |
+
}),
|
54 |
+
|
55 |
+
})
|
56 |
+
}
|
57 |
+
|
58 |
+
function updateStyles(name, positive_prompt, negative_prompt) {
|
59 |
+
return api.fetchApi('/nsuite/styles/update', {
|
60 |
+
method: 'POST',
|
61 |
+
headers: {
|
62 |
+
'Content-Type': 'application/json',
|
63 |
+
},
|
64 |
+
body: JSON.stringify({
|
65 |
+
name: name,
|
66 |
+
positive_prompt: positive_prompt,
|
67 |
+
negative_prompt: negative_prompt
|
68 |
+
}),
|
69 |
+
})
|
70 |
+
}
|
71 |
+
|
72 |
+
function removeStyles(name) {
|
73 |
+
//confirmation
|
74 |
+
let ok = confirm("Are you sure you want to remove this style?");
|
75 |
+
if (!ok) {
|
76 |
+
return;
|
77 |
+
}
|
78 |
+
|
79 |
+
return api.fetchApi('/nsuite/styles/remove', {
|
80 |
+
method: 'POST',
|
81 |
+
headers: {
|
82 |
+
'Content-Type': 'application/json',
|
83 |
+
},
|
84 |
+
body: JSON.stringify({
|
85 |
+
name: name
|
86 |
+
}),
|
87 |
+
})
|
88 |
+
}
|
89 |
+
|
90 |
+
app.registerExtension({
|
91 |
+
name: "n.CLIPTextEncodeAdvancedNSuite",
|
92 |
+
async beforeRegisterNodeDef(nodeType, nodeData, app) {
|
93 |
+
|
94 |
+
const onAdded = nodeType.prototype.onAdded;
|
95 |
+
if (nodeData.name === "CLIPTextEncodeAdvancedNSuite [n-suite]") {
|
96 |
+
nodeType.prototype.onAdded = function () {
|
97 |
+
onAdded?.apply(this, arguments);
|
98 |
+
const styles = this.widgets.find((w) => w.name === "styles");
|
99 |
+
const p_prompt = this.widgets.find((w) => w.name === "positive_prompt");
|
100 |
+
const n_prompt = this.widgets.find((w) => w.name === "negative_prompt");
|
101 |
+
const cb = nodeData.callback;
|
102 |
+
let addedd_positive_prompt = "";
|
103 |
+
let addedd_negative_prompt = "";
|
104 |
+
styles.callback = function () {
|
105 |
+
let index = styles.options.values.indexOf(styles.value);
|
106 |
+
|
107 |
+
|
108 |
+
if (addedd_positive_prompt == "" && addedd_negative_prompt == "") {
|
109 |
+
getStyles(styles.options.values[index-1]).then(style_prompts => {
|
110 |
+
//wait 4 seconds
|
111 |
+
|
112 |
+
console.log(style_prompts);
|
113 |
+
|
114 |
+
addedd_positive_prompt = style_prompts.positive_prompt;
|
115 |
+
addedd_negative_prompt = style_prompts.negative_prompt;
|
116 |
+
//alert("Addedd positive prompt: " + addedd_positive_prompt + "\nAddedd negative prompt: " + addedd_negative_prompt);
|
117 |
+
})
|
118 |
+
}
|
119 |
+
|
120 |
+
|
121 |
+
let current_positive_prompt = p_prompt.value;
|
122 |
+
let current_negative_prompt = n_prompt.value;
|
123 |
+
|
124 |
+
getStyles(styles.value).then(style_prompts => {
|
125 |
+
//console.log(style_prompts)
|
126 |
+
|
127 |
+
if ((current_positive_prompt.trim() != addedd_positive_prompt.trim() || current_negative_prompt.trim() != addedd_negative_prompt.trim())) {
|
128 |
+
|
129 |
+
let ok = confirm("Style has been changed. Do you want to change style without saving?");
|
130 |
+
|
131 |
+
|
132 |
+
if (!ok) {
|
133 |
+
if (styles.value === styles.options.values[0]) {
|
134 |
+
value = styles.options.values[0];
|
135 |
+
}
|
136 |
+
styles.value = styles.options.values[index-1];
|
137 |
+
|
138 |
+
|
139 |
+
return;
|
140 |
+
}
|
141 |
+
}
|
142 |
+
|
143 |
+
// add the addedd prompt to the current prompt
|
144 |
+
p_prompt.value = style_prompts.positive_prompt;
|
145 |
+
n_prompt.value = style_prompts.negative_prompt;
|
146 |
+
|
147 |
+
|
148 |
+
addedd_positive_prompt = style_prompts.positive_prompt;
|
149 |
+
addedd_negative_prompt = style_prompts.negative_prompt;
|
150 |
+
if (cb) {
|
151 |
+
return cb.apply(this, arguments);
|
152 |
+
}
|
153 |
+
})
|
154 |
+
.catch(error => {
|
155 |
+
console.error('Error:', error);
|
156 |
+
});
|
157 |
+
|
158 |
+
};
|
159 |
+
|
160 |
+
|
161 |
+
|
162 |
+
let savestyle;
|
163 |
+
let replacestyle;
|
164 |
+
let deletestyle;
|
165 |
+
|
166 |
+
|
167 |
+
// Create the button widget for selecting the files
|
168 |
+
savestyle = this.addWidget("button", "New", "image", () => {
|
169 |
+
////console.log("Save called");
|
170 |
+
//ask input name style
|
171 |
+
let inputName = prompt("Enter a name for the style:", styles.value);
|
172 |
+
if (inputName === null) {
|
173 |
+
return;
|
174 |
+
}
|
175 |
+
|
176 |
+
|
177 |
+
addStyles(inputName, p_prompt.value, n_prompt.value);
|
178 |
+
// Add the file to the dropdown list and update the widget value
|
179 |
+
|
180 |
+
if (!styles.options.values.includes(inputName)) {
|
181 |
+
styles.options.values.push(inputName);
|
182 |
+
}
|
183 |
+
|
184 |
+
},{
|
185 |
+
cursor: "grab",
|
186 |
+
},);
|
187 |
+
replacestyle = this.addWidget("button", "Replace", "image", () => {
|
188 |
+
//console.log("Replace called");
|
189 |
+
updateStyles(styles.value, p_prompt.value, n_prompt.value);
|
190 |
+
},{
|
191 |
+
cursor: "grab",
|
192 |
+
},);
|
193 |
+
deletestyle = this.addWidget("button", "Delete", "image", () => {
|
194 |
+
//console.log("Delete called");
|
195 |
+
removeStyles(styles.value);
|
196 |
+
|
197 |
+
// Remove the file from the dropdown list
|
198 |
+
styles.options.values = styles.options.values.filter((value) => value !== styles.value);
|
199 |
+
},{
|
200 |
+
cursor: "grab",
|
201 |
+
},);
|
202 |
+
savestyle.serialize = false;
|
203 |
+
|
204 |
+
}
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
|
210 |
+
};
|
211 |
+
|
212 |
+
},
|
213 |
+
});
|
custom_nodes/ComfyUI-N-Nodes/js/dynamicPrompt.js
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { app } from "/scripts/app.js";
|
2 |
+
import { ComfyWidgets } from "/scripts/widgets.js";
|
3 |
+
|
4 |
+
app.registerExtension({
|
5 |
+
name: "n.DynamicPrompt",
|
6 |
+
async beforeRegisterNodeDef(nodeType, nodeData, app) {
|
7 |
+
|
8 |
+
if (nodeData.name === "DynamicPrompt") {
|
9 |
+
console.warn("DynamicPrompt detected")
|
10 |
+
|
11 |
+
const onExecuted = nodeType.prototype.onExecuted;
|
12 |
+
|
13 |
+
|
14 |
+
nodeType.prototype.onExecuted = function (message) {
|
15 |
+
onExecuted?.apply(this, arguments);
|
16 |
+
|
17 |
+
const pos_cached = this.widgets.findIndex((w) => w.name === "cached");
|
18 |
+
console.warn("value:"+pos_cached)
|
19 |
+
|
20 |
+
if (this.widgets) {
|
21 |
+
const pos_text = this.widgets.findIndex((w) => w.name === "text");
|
22 |
+
if (pos_text !== -1) {
|
23 |
+
for (let i = pos_text; i < this.widgets.length; i++) {
|
24 |
+
this.widgets[i].onRemove?.();
|
25 |
+
}
|
26 |
+
this.widgets.length = pos_text;
|
27 |
+
}
|
28 |
+
}
|
29 |
+
|
30 |
+
|
31 |
+
if (this.widgets[pos_cached].value === "NO") {
|
32 |
+
|
33 |
+
const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app);
|
34 |
+
//random seed
|
35 |
+
var rnm = Math.floor(Math.random() * 10000)
|
36 |
+
w.widget.value = rnm;
|
37 |
+
|
38 |
+
|
39 |
+
}
|
40 |
+
|
41 |
+
};
|
42 |
+
}
|
43 |
+
},
|
44 |
+
});
|
custom_nodes/ComfyUI-N-Nodes/js/extended_widgets.js
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
//extended_widgets.js
|
2 |
+
import { api } from "/scripts/api.js"
|
3 |
+
import { ComfyWidgets } from "/scripts/widgets.js";
|
4 |
+
|
5 |
+
const MultilineSymbol = Symbol();
|
6 |
+
const MultilineResizeSymbol = Symbol();
|
7 |
+
async function uploadFile(file, updateNode, node, pasted = false) {
|
8 |
+
const videoWidget = node.widgets.find((w) => w.name === "video");
|
9 |
+
|
10 |
+
|
11 |
+
try {
|
12 |
+
// Wrap file in formdata so it includes filename
|
13 |
+
const body = new FormData();
|
14 |
+
body.append("image", file);
|
15 |
+
if (pasted) {
|
16 |
+
body.append("subfolder", "pasted");
|
17 |
+
}
|
18 |
+
else {
|
19 |
+
body.append("subfolder", "n-suite");
|
20 |
+
}
|
21 |
+
|
22 |
+
const resp = await api.fetchApi("/upload/image", {
|
23 |
+
method: "POST",
|
24 |
+
body,
|
25 |
+
});
|
26 |
+
|
27 |
+
if (resp.status === 200) {
|
28 |
+
const data = await resp.json();
|
29 |
+
// Add the file to the dropdown list and update the widget value
|
30 |
+
let path = data.name;
|
31 |
+
|
32 |
+
|
33 |
+
if (!videoWidget.options.values.includes(path)) {
|
34 |
+
videoWidget.options.values.push(path);
|
35 |
+
}
|
36 |
+
|
37 |
+
if (updateNode) {
|
38 |
+
|
39 |
+
videoWidget.value = path;
|
40 |
+
if (data.subfolder) path = data.subfolder + "/" + path;
|
41 |
+
showVideoInput(path,node);
|
42 |
+
|
43 |
+
}
|
44 |
+
} else {
|
45 |
+
alert(resp.status + " - " + resp.statusText);
|
46 |
+
}
|
47 |
+
} catch (error) {
|
48 |
+
alert(error);
|
49 |
+
}
|
50 |
+
}
|
51 |
+
|
52 |
+
function addVideo(node, name,src, app,autoplay_value) {
|
53 |
+
const MIN_SIZE = 50;
|
54 |
+
|
55 |
+
function computeSize(size) {
|
56 |
+
try{
|
57 |
+
|
58 |
+
if (node.widgets[0].last_y == null) return;
|
59 |
+
|
60 |
+
let y = node.widgets[0].last_y;
|
61 |
+
let freeSpace = size[1] - y;
|
62 |
+
|
63 |
+
// Compute the height of all non customvideo widgets
|
64 |
+
let widgetHeight = 0;
|
65 |
+
const multi = [];
|
66 |
+
for (let i = 0; i < node.widgets.length; i++) {
|
67 |
+
const w = node.widgets[i];
|
68 |
+
if (w.type === "customvideo") {
|
69 |
+
multi.push(w);
|
70 |
+
} else {
|
71 |
+
if (w.computeSize) {
|
72 |
+
widgetHeight += w.computeSize()[1] + 4;
|
73 |
+
} else {
|
74 |
+
widgetHeight += LiteGraph.NODE_WIDGET_HEIGHT + 4;
|
75 |
+
}
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
// See how large each text input can be
|
80 |
+
freeSpace -= widgetHeight;
|
81 |
+
freeSpace /= multi.length + (!!node.imgs?.length);
|
82 |
+
|
83 |
+
if (freeSpace < MIN_SIZE) {
|
84 |
+
// There isnt enough space for all the widgets, increase the size of the node
|
85 |
+
freeSpace = MIN_SIZE;
|
86 |
+
node.size[1] = y + widgetHeight + freeSpace * (multi.length + (!!node.imgs?.length));
|
87 |
+
node.graph.setDirtyCanvas(true);
|
88 |
+
}
|
89 |
+
|
90 |
+
// Position each of the widgets
|
91 |
+
for (const w of node.widgets) {
|
92 |
+
w.y = y;
|
93 |
+
if (w.type === "customvideo") {
|
94 |
+
y += freeSpace;
|
95 |
+
w.computedHeight = freeSpace - multi.length*4;
|
96 |
+
} else if (w.computeSize) {
|
97 |
+
y += w.computeSize()[1] + 4;
|
98 |
+
} else {
|
99 |
+
y += LiteGraph.NODE_WIDGET_HEIGHT + 4;
|
100 |
+
}
|
101 |
+
}
|
102 |
+
|
103 |
+
node.inputHeight = freeSpace;
|
104 |
+
}catch(e){
|
105 |
+
|
106 |
+
}
|
107 |
+
}
|
108 |
+
const widget = {
|
109 |
+
type: "customvideo",
|
110 |
+
name,
|
111 |
+
get value() {
|
112 |
+
return this.inputEl.value;
|
113 |
+
},
|
114 |
+
set value(x) {
|
115 |
+
this.inputEl.value = x;
|
116 |
+
},
|
117 |
+
draw: function (ctx, _, widgetWidth, y, widgetHeight) {
|
118 |
+
if (!this.parent.inputHeight) {
|
119 |
+
// If we are initially offscreen when created we wont have received a resize event
|
120 |
+
// Calculate it here instead
|
121 |
+
node.setSizeForImage?.();
|
122 |
+
|
123 |
+
}
|
124 |
+
const visible = app.canvas.ds.scale > 0.5 && this.type === "customvideo";
|
125 |
+
const margin = 10;
|
126 |
+
let top_offset = 5
|
127 |
+
//hack for top menu
|
128 |
+
if (localStorage.getItem("Comfy.Settings.Comfy.UseNewMenu") === '"Top"') {
|
129 |
+
top_offset = 40;
|
130 |
+
}
|
131 |
+
|
132 |
+
const elRect = ctx.canvas.getBoundingClientRect();
|
133 |
+
const transform = new DOMMatrix()
|
134 |
+
.scaleSelf(elRect.width / ctx.canvas.width, elRect.height / ctx.canvas.height)
|
135 |
+
.multiplySelf(ctx.getTransform())
|
136 |
+
.translateSelf(margin, margin + y);
|
137 |
+
|
138 |
+
const scale = new DOMMatrix().scaleSelf(transform.a, transform.d)
|
139 |
+
Object.assign(this.inputEl.style, {
|
140 |
+
transformOrigin: "0 0",
|
141 |
+
transform: scale,
|
142 |
+
left: `${transform.a + transform.e}px`,
|
143 |
+
top: `${transform.d +top_offset+ transform.f}px`,
|
144 |
+
width: `${widgetWidth - (margin * 2)}px`,
|
145 |
+
height: `${this.parent.inputHeight - (margin * 2)}px`,
|
146 |
+
position: "absolute",
|
147 |
+
background: (!node.color)?'':node.color,
|
148 |
+
color: (!node.color)?'':'white',
|
149 |
+
zIndex: app.graph._nodes.indexOf(node),
|
150 |
+
});
|
151 |
+
this.inputEl.hidden = !visible;
|
152 |
+
},
|
153 |
+
};
|
154 |
+
|
155 |
+
|
156 |
+
widget.inputEl = document.createElement("video");
|
157 |
+
|
158 |
+
|
159 |
+
// Set the video attributes
|
160 |
+
Object.assign(widget.inputEl, {
|
161 |
+
controls: true,
|
162 |
+
src: src,
|
163 |
+
poster: "",
|
164 |
+
width: 400,
|
165 |
+
height: 300,
|
166 |
+
loop: true,
|
167 |
+
muted: true,
|
168 |
+
autoplay: autoplay_value,
|
169 |
+
type : "video/mp4"
|
170 |
+
|
171 |
+
});
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
// Add video element to the body
|
177 |
+
document.body.appendChild(widget.inputEl);
|
178 |
+
|
179 |
+
|
180 |
+
|
181 |
+
widget.parent = node;
|
182 |
+
document.body.appendChild(widget.inputEl);
|
183 |
+
|
184 |
+
node.addCustomWidget(widget);
|
185 |
+
|
186 |
+
app.canvas.onDrawBackground = function () {
|
187 |
+
// Draw node isnt fired once the node is off the screen
|
188 |
+
// if it goes off screen quickly, the input may not be removed
|
189 |
+
// this shifts it off screen so it can be moved back if the node is visible.
|
190 |
+
for (let n in app.graph._nodes) {
|
191 |
+
n = graph._nodes[n];
|
192 |
+
for (let w in n.widgets) {
|
193 |
+
let wid = n.widgets[w];
|
194 |
+
if (Object.hasOwn(wid, "inputEl")) {
|
195 |
+
wid.inputEl.style.left = -8000 + "px";
|
196 |
+
wid.inputEl.style.position = "absolute";
|
197 |
+
}
|
198 |
+
}
|
199 |
+
}
|
200 |
+
};
|
201 |
+
|
202 |
+
node.onRemoved = function () {
|
203 |
+
// When removing this node we need to remove the input from the DOM
|
204 |
+
for (let y in this.widgets) {
|
205 |
+
if (this.widgets[y].inputEl) {
|
206 |
+
this.widgets[y].inputEl.remove();
|
207 |
+
}
|
208 |
+
}
|
209 |
+
};
|
210 |
+
|
211 |
+
widget.onRemove = () => {
|
212 |
+
widget.inputEl?.remove();
|
213 |
+
|
214 |
+
// Restore original size handler if we are the last
|
215 |
+
if (!--node[MultilineSymbol]) {
|
216 |
+
node.onResize = node[MultilineResizeSymbol];
|
217 |
+
delete node[MultilineSymbol];
|
218 |
+
delete node[MultilineResizeSymbol];
|
219 |
+
}
|
220 |
+
};
|
221 |
+
|
222 |
+
if (node[MultilineSymbol]) {
|
223 |
+
node[MultilineSymbol]++;
|
224 |
+
} else {
|
225 |
+
node[MultilineSymbol] = 1;
|
226 |
+
const onResize = (node[MultilineResizeSymbol] = node.onResize);
|
227 |
+
|
228 |
+
node.onResize = function (size) {
|
229 |
+
|
230 |
+
computeSize(size);
|
231 |
+
// Call original resizer handler
|
232 |
+
if (onResize) {
|
233 |
+
onResize.apply(this, arguments);
|
234 |
+
}
|
235 |
+
};
|
236 |
+
}
|
237 |
+
|
238 |
+
return { minWidth: 400, minHeight: 200, widget };
|
239 |
+
}
|
240 |
+
|
241 |
+
|
242 |
+
export function showVideoInput(name,node) {
|
243 |
+
const videoWidget = node.widgets.find((w) => w.name === "videoWidget");
|
244 |
+
const temp_web_url = node.widgets.find((w) => w.name === "local_url");
|
245 |
+
|
246 |
+
|
247 |
+
let folder_separator = name.lastIndexOf("/");
|
248 |
+
let subfolder = "n-suite";
|
249 |
+
if (folder_separator > -1) {
|
250 |
+
subfolder = name.substring(0, folder_separator);
|
251 |
+
name = name.substring(folder_separator + 1);
|
252 |
+
}
|
253 |
+
|
254 |
+
let url_video = api.apiURL(`/view?filename=${encodeURIComponent(name)}&type=input&subfolder=${subfolder}${app.getPreviewFormatParam()}`);
|
255 |
+
videoWidget.inputEl.src = url_video
|
256 |
+
temp_web_url.value = url_video
|
257 |
+
}
|
258 |
+
|
259 |
+
export function showVideoOutput(name,node) {
|
260 |
+
const videoWidget = node.widgets.find((w) => w.name === "videoOutWidget");
|
261 |
+
|
262 |
+
|
263 |
+
|
264 |
+
let folder_separator = name.lastIndexOf("/");
|
265 |
+
let subfolder = "n-suite/videos";
|
266 |
+
if (folder_separator > -1) {
|
267 |
+
subfolder = name.substring(0, folder_separator);
|
268 |
+
name = name.substring(folder_separator + 1);
|
269 |
+
}
|
270 |
+
|
271 |
+
|
272 |
+
let url_video = api.apiURL(`/view?filename=${encodeURIComponent(name)}&type=output&subfolder=${subfolder}${app.getPreviewFormatParam()}`);
|
273 |
+
videoWidget.inputEl.src = url_video
|
274 |
+
|
275 |
+
return url_video;
|
276 |
+
}
|
277 |
+
|
278 |
+
|
279 |
+
|
280 |
+
export const ExtendedComfyWidgets = {
|
281 |
+
...ComfyWidgets, // Copy all the functions from ComfyWidgets
|
282 |
+
|
283 |
+
VIDEO(node, inputName, inputData, src, app,type="input",autoplay_value=true) {
|
284 |
+
try {
|
285 |
+
const videoWidget = node.widgets.find((w) => w.name === "video");
|
286 |
+
const autoplay = node.widgets.find((w) => w.name === "autoplay");
|
287 |
+
const defaultVal = "";
|
288 |
+
let res;
|
289 |
+
res = addVideo(node, inputName, src, app,autoplay_value);
|
290 |
+
|
291 |
+
if (type == "input"){
|
292 |
+
|
293 |
+
const cb = node.callback;
|
294 |
+
videoWidget.callback = function () {
|
295 |
+
|
296 |
+
showVideoInput(videoWidget.value, node);
|
297 |
+
if (cb) {
|
298 |
+
return cb.apply(this, arguments);
|
299 |
+
}
|
300 |
+
};
|
301 |
+
autoplay.callback = function () {
|
302 |
+
const videoWidgetz = node.widgets.find((w) => w.name === "videoWidget");
|
303 |
+
|
304 |
+
videoWidgetz.inputEl.autoplay = autoplay.value;
|
305 |
+
showVideoInput(videoWidget.value, node);
|
306 |
+
if (cb) {
|
307 |
+
return cb.apply(this, arguments);
|
308 |
+
}
|
309 |
+
}
|
310 |
+
}
|
311 |
+
|
312 |
+
if (node.type =="LoadVideoAdvanced"){
|
313 |
+
|
314 |
+
|
315 |
+
}
|
316 |
+
|
317 |
+
return res;
|
318 |
+
}
|
319 |
+
catch (error) {
|
320 |
+
|
321 |
+
console.error("Errore in extended_widgets.js:", error);
|
322 |
+
throw error;
|
323 |
+
|
324 |
+
}
|
325 |
+
|
326 |
+
},
|
327 |
+
|
328 |
+
|
329 |
+
};
|
custom_nodes/ComfyUI-N-Nodes/js/gptSampler.js
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { app } from "/scripts/app.js";
|
2 |
+
import { ComfyWidgets } from "/scripts/widgets.js";
|
3 |
+
|
4 |
+
app.registerExtension({
|
5 |
+
name: "n.GPTSampler",
|
6 |
+
async beforeRegisterNodeDef(nodeType, nodeData, app) {
|
7 |
+
|
8 |
+
if (nodeData.name === "GPT Sampler [n-suite]") {
|
9 |
+
console.warn("GPTSampler detected")
|
10 |
+
|
11 |
+
const onExecuted = nodeType.prototype.onExecuted;
|
12 |
+
|
13 |
+
nodeType.prototype.onExecuted = function (message) {
|
14 |
+
onExecuted?.apply(this, arguments);
|
15 |
+
|
16 |
+
const pos_cached = this.widgets.findIndex((w) => w.name === "cached");
|
17 |
+
console.warn("value:"+pos_cached)
|
18 |
+
|
19 |
+
if (this.widgets) {
|
20 |
+
const pos_text = this.widgets.findIndex((w) => w.name === "text");
|
21 |
+
if (pos_text !== -1) {
|
22 |
+
for (let i = pos_text; i < this.widgets.length; i++) {
|
23 |
+
this.widgets[i].onRemove?.();
|
24 |
+
}
|
25 |
+
this.widgets.length = pos_text;
|
26 |
+
}
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
if (this.widgets[pos_cached].value === "NO") {
|
31 |
+
|
32 |
+
const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app);
|
33 |
+
//random seed
|
34 |
+
var rnm = Math.floor(Math.random() * 10000)
|
35 |
+
w.widget.value = rnm;
|
36 |
+
|
37 |
+
|
38 |
+
}
|
39 |
+
|
40 |
+
};
|
41 |
+
}
|
42 |
+
},
|
43 |
+
});
|
custom_nodes/ComfyUI-N-Nodes/js/include_css.js
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { $el } from "../../../scripts/ui.js";
|
2 |
+
|
3 |
+
function addStylesheet(url) {
|
4 |
+
if (url.endsWith(".js")) {
|
5 |
+
url = url.substr(0, url.length - 2) + "css";
|
6 |
+
}
|
7 |
+
$el("link", {
|
8 |
+
parent: document.head,
|
9 |
+
rel: "stylesheet",
|
10 |
+
type: "text/css",
|
11 |
+
href: url.startsWith("http") ? url : getUrl(url),
|
12 |
+
});
|
13 |
+
}
|
14 |
+
function getUrl(path, baseUrl) {
|
15 |
+
if (baseUrl) {
|
16 |
+
return new URL(path, baseUrl).toString();
|
17 |
+
} else {
|
18 |
+
return new URL("../" + path, import.meta.url).toString();
|
19 |
+
}
|
20 |
+
}
|
21 |
+
|
22 |
+
addStylesheet(getUrl("styles.css", import.meta.url));
|
custom_nodes/ComfyUI-N-Nodes/js/styles.css
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
textarea[placeholder="positive_prompt"] {
|
2 |
+
border: 1px solid #64d509;
|
3 |
+
|
4 |
+
|
5 |
+
}
|
6 |
+
textarea[placeholder="positive_prompt"]:focus-visible {
|
7 |
+
border: 1px solid #72eb0f;
|
8 |
+
|
9 |
+
}
|
10 |
+
|
11 |
+
|
12 |
+
textarea[placeholder="negative_prompt"] {
|
13 |
+
border: 1px solid #a94442;
|
14 |
+
border-color: #a94442;
|
15 |
+
}
|
16 |
+
|
17 |
+
textarea[placeholder="negative_prompt"]:focus-visible {
|
18 |
+
border: 1px solid #de5755;
|
19 |
+
border-color: #de5755;
|
20 |
+
}
|
custom_nodes/ComfyUI-N-Nodes/js/videoLoadAdvanced.js
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { app } from "/scripts/app.js";
|
2 |
+
import { api } from "/scripts/api.js"
|
3 |
+
import { ExtendedComfyWidgets,showVideoInput } from "./extended_widgets.js";
|
4 |
+
const MultilineSymbol = Symbol();
|
5 |
+
const MultilineResizeSymbol = Symbol();
|
6 |
+
|
7 |
+
|
8 |
+
async function uploadFile(file, updateNode, node, pasted = false) {
|
9 |
+
const videoWidget = node.widgets.find((w) => w.name === "video");
|
10 |
+
|
11 |
+
|
12 |
+
try {
|
13 |
+
// Wrap file in formdata so it includes filename
|
14 |
+
const body = new FormData();
|
15 |
+
body.append("image", file);
|
16 |
+
if (pasted) {
|
17 |
+
body.append("subfolder", "pasted");
|
18 |
+
}
|
19 |
+
else {
|
20 |
+
body.append("subfolder", "n-suite");
|
21 |
+
}
|
22 |
+
|
23 |
+
const resp = await api.fetchApi("/upload/image", {
|
24 |
+
method: "POST",
|
25 |
+
body,
|
26 |
+
});
|
27 |
+
|
28 |
+
if (resp.status === 200) {
|
29 |
+
const data = await resp.json();
|
30 |
+
// Add the file to the dropdown list and update the widget value
|
31 |
+
let path = data.name;
|
32 |
+
|
33 |
+
|
34 |
+
if (!videoWidget.options.values.includes(path)) {
|
35 |
+
videoWidget.options.values.push(path);
|
36 |
+
}
|
37 |
+
|
38 |
+
if (updateNode) {
|
39 |
+
|
40 |
+
videoWidget.value = path;
|
41 |
+
if (data.subfolder) path = data.subfolder + "/" + path;
|
42 |
+
showVideoInput(path,node);
|
43 |
+
|
44 |
+
}
|
45 |
+
} else {
|
46 |
+
alert(resp.status + " - " + resp.statusText);
|
47 |
+
}
|
48 |
+
} catch (error) {
|
49 |
+
alert(error);
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
let uploadWidget = "";
|
57 |
+
app.registerExtension({
|
58 |
+
name: "Comfy.VideoLoadAdvanced",
|
59 |
+
async beforeRegisterNodeDef(nodeType, nodeData, app) {
|
60 |
+
|
61 |
+
const onAdded = nodeType.prototype.onAdded;
|
62 |
+
if (nodeData.name === "LoadVideo [n-suite]") {
|
63 |
+
nodeType.prototype.onAdded = function () {
|
64 |
+
onAdded?.apply(this, arguments);
|
65 |
+
const temp_web_url = this.widgets.find((w) => w.name === "local_url");
|
66 |
+
const autoplay_value = this.widgets.find((w) => w.name === "autoplay");
|
67 |
+
|
68 |
+
|
69 |
+
let uploadWidget;
|
70 |
+
const fileInput = document.createElement("input");
|
71 |
+
Object.assign(fileInput, {
|
72 |
+
type: "file",
|
73 |
+
accept: "video/mp4,image/gif,video/webm",
|
74 |
+
style: "display: none",
|
75 |
+
onchange: async () => {
|
76 |
+
if (fileInput.files.length) {
|
77 |
+
await uploadFile(fileInput.files[0], true,this);
|
78 |
+
}
|
79 |
+
},
|
80 |
+
});
|
81 |
+
document.body.append(fileInput);
|
82 |
+
// Create the button widget for selecting the files
|
83 |
+
uploadWidget = this.addWidget("button", "choose file to upload", "image", () => {
|
84 |
+
fileInput.click();
|
85 |
+
},{
|
86 |
+
cursor: "grab",
|
87 |
+
},);
|
88 |
+
uploadWidget.serialize = false;
|
89 |
+
|
90 |
+
|
91 |
+
setTimeout(() => {
|
92 |
+
ExtendedComfyWidgets["VIDEO"](this, "videoWidget", ["STRING"], temp_web_url.value, app,"input", autoplay_value.value);
|
93 |
+
|
94 |
+
}, 100);
|
95 |
+
|
96 |
+
|
97 |
+
}
|
98 |
+
|
99 |
+
|
100 |
+
nodeType.prototype.onDragOver = function (e) {
|
101 |
+
if (e.dataTransfer && e.dataTransfer.items) {
|
102 |
+
const image = [...e.dataTransfer.items].find((f) => f.kind === "file");
|
103 |
+
return !!image;
|
104 |
+
}
|
105 |
+
|
106 |
+
return false;
|
107 |
+
};
|
108 |
+
|
109 |
+
// On drop upload files
|
110 |
+
nodeType.prototype.onDragDrop = function (e) {
|
111 |
+
console.log("onDragDrop called");
|
112 |
+
let handled = false;
|
113 |
+
for (const file of e.dataTransfer.files) {
|
114 |
+
if (file.type.startsWith("video/mp4")) {
|
115 |
+
|
116 |
+
const filePath = file.path || (file.webkitRelativePath || '').split('/').slice(1).join('/');
|
117 |
+
|
118 |
+
|
119 |
+
uploadFile(file, !handled,this ); // Dont await these, any order is fine, only update on first one
|
120 |
+
|
121 |
+
handled = true;
|
122 |
+
}
|
123 |
+
}
|
124 |
+
|
125 |
+
return handled;
|
126 |
+
};
|
127 |
+
|
128 |
+
nodeType.prototype.pasteFile = function(file) {
|
129 |
+
if (file.type.startsWith("video/mp4")) {
|
130 |
+
|
131 |
+
//uploadFile(file, true, is_pasted);
|
132 |
+
|
133 |
+
return true;
|
134 |
+
}
|
135 |
+
return false;
|
136 |
+
}
|
137 |
+
|
138 |
+
|
139 |
+
};
|
140 |
+
|
141 |
+
},
|
142 |
+
});
|
custom_nodes/ComfyUI-N-Nodes/js/videoSave.js
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { app } from "/scripts/app.js";
|
2 |
+
import { api } from "/scripts/api.js"
|
3 |
+
import { ExtendedComfyWidgets,showVideoOutput } from "./extended_widgets.js";
|
4 |
+
const MultilineSymbol = Symbol();
|
5 |
+
const MultilineResizeSymbol = Symbol();
|
6 |
+
|
7 |
+
|
8 |
+
async function uploadFile(file, updateNode, node, pasted = false) {
|
9 |
+
const videoWidget = node.widgets.find((w) => w.name === "video");
|
10 |
+
|
11 |
+
|
12 |
+
try {
|
13 |
+
// Wrap file in formdata so it includes filename
|
14 |
+
const body = new FormData();
|
15 |
+
body.append("image", file);
|
16 |
+
if (pasted) {
|
17 |
+
body.append("subfolder", "pasted");
|
18 |
+
}
|
19 |
+
else {
|
20 |
+
body.append("subfolder", "n-suite");
|
21 |
+
}
|
22 |
+
|
23 |
+
const resp = await api.fetchApi("/upload/image", {
|
24 |
+
method: "POST",
|
25 |
+
body,
|
26 |
+
});
|
27 |
+
|
28 |
+
if (resp.status === 200) {
|
29 |
+
const data = await resp.json();
|
30 |
+
// Add the file to the dropdown list and update the widget value
|
31 |
+
let path = data.name;
|
32 |
+
|
33 |
+
|
34 |
+
if (!videoWidget.options.values.includes(path)) {
|
35 |
+
videoWidget.options.values.push(path);
|
36 |
+
}
|
37 |
+
|
38 |
+
if (updateNode) {
|
39 |
+
// showVideo(path,node);
|
40 |
+
videoWidget.value = path;
|
41 |
+
if (data.subfolder) path = data.subfolder + "/" + path;
|
42 |
+
showVideo(path,node);
|
43 |
+
|
44 |
+
}
|
45 |
+
} else {
|
46 |
+
alert(resp.status + " - " + resp.statusText);
|
47 |
+
}
|
48 |
+
} catch (error) {
|
49 |
+
alert(error);
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
let uploadWidget = "";
|
57 |
+
app.registerExtension({
|
58 |
+
name: "Comfy.VideoSave",
|
59 |
+
async beforeRegisterNodeDef(nodeType, nodeData, app) {
|
60 |
+
|
61 |
+
const onExecuted = nodeType.prototype.onExecuted;
|
62 |
+
|
63 |
+
|
64 |
+
const onAdded = nodeType.prototype.onAdded;
|
65 |
+
if (nodeData.name === "SaveVideo [n-suite]") {
|
66 |
+
nodeType.prototype.onAdded = function () {
|
67 |
+
|
68 |
+
ExtendedComfyWidgets["VIDEO"](this, "videoOutWidget", ["STRING"], "", app,"output");
|
69 |
+
|
70 |
+
};
|
71 |
+
nodeType.prototype.onExecuted = function (message) {
|
72 |
+
onExecuted?.apply(this, arguments);
|
73 |
+
console.log(nodeData)
|
74 |
+
|
75 |
+
let full_path="";
|
76 |
+
|
77 |
+
for (const list of message.text) {
|
78 |
+
full_path = list;
|
79 |
+
}
|
80 |
+
|
81 |
+
let fullweb= showVideoOutput(full_path,this)
|
82 |
+
|
83 |
+
}
|
84 |
+
};
|
85 |
+
|
86 |
+
},
|
87 |
+
});
|
custom_nodes/ComfyUI-N-Nodes/libs/__pycache__/joytag_models.cpython-311.pyc
ADDED
Binary file (65 kB). View file
|
|
custom_nodes/ComfyUI-N-Nodes/libs/joytag_models.py
ADDED
@@ -0,0 +1,1109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#from https://github.com/fpgaminer/joytag.git
|
2 |
+
import json
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Optional
|
5 |
+
import torch
|
6 |
+
import torch.backends.cuda
|
7 |
+
import torch.nn as nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
import torchvision
|
10 |
+
|
11 |
+
from transformers.activations import QuickGELUActivation
|
12 |
+
import math
|
13 |
+
from einops.layers.torch import Rearrange
|
14 |
+
import einops
|
15 |
+
|
16 |
+
|
17 |
+
class VisionModel(nn.Module):
|
18 |
+
image_size: int
|
19 |
+
n_tags: int
|
20 |
+
|
21 |
+
def __init__(self, image_size: int, n_tags: int):
|
22 |
+
super().__init__()
|
23 |
+
|
24 |
+
self.image_size = image_size
|
25 |
+
self.n_tags = n_tags
|
26 |
+
|
27 |
+
@staticmethod
|
28 |
+
def load_model(path: Path | str, device: str | None = None) -> 'VisionModel':
|
29 |
+
"""
|
30 |
+
Load a model from a directory.
|
31 |
+
:param path: The directory containing the model.
|
32 |
+
:return: The model, the image size, and the number of tags.
|
33 |
+
"""
|
34 |
+
with open(Path(path) / 'config.json', 'r') as f:
|
35 |
+
config = json.load(f)
|
36 |
+
|
37 |
+
if (Path(path) / 'model.safetensors').exists():
|
38 |
+
from safetensors.torch import load_file
|
39 |
+
resume = load_file(Path(path) / 'model.safetensors', device='cpu')
|
40 |
+
else:
|
41 |
+
resume = torch.load(Path(path) / 'model.pt', map_location=torch.device('cpu'))['model']
|
42 |
+
|
43 |
+
model_classes = VisionModel.__subclasses__()
|
44 |
+
model_cls = next(cls for cls in model_classes if cls.__name__ == config['class'])
|
45 |
+
|
46 |
+
model = model_cls(**{k: v for k, v in config.items() if k != 'class'})
|
47 |
+
model.load(resume)
|
48 |
+
if device is not None:
|
49 |
+
model = model.to(device)
|
50 |
+
|
51 |
+
return model
|
52 |
+
|
53 |
+
@staticmethod
|
54 |
+
def from_config(config: dict) -> 'VisionModel':
|
55 |
+
model_classes = VisionModel.__subclasses__()
|
56 |
+
model_cls = next(cls for cls in model_classes if cls.__name__ == config['class'])
|
57 |
+
return model_cls(**{k: v for k, v in config.items() if k != 'class'})
|
58 |
+
|
59 |
+
def get_optimized_parameters(self, lr: float):
|
60 |
+
raise NotImplementedError
|
61 |
+
|
62 |
+
def save(self):
|
63 |
+
raise NotImplementedError
|
64 |
+
|
65 |
+
def load(self, state_dict):
|
66 |
+
raise NotImplementedError
|
67 |
+
|
68 |
+
|
69 |
+
def basic_calculate_loss(preds: dict[str, torch.Tensor], batch: dict, pos_weight: torch.Tensor | None, loss_type: str):
|
70 |
+
def asl_helper(preds, target):
|
71 |
+
p = F.softmax(preds, dim=1)
|
72 |
+
xs_pos = p.clamp(min=1e-6)
|
73 |
+
xs_neg = (1 - p).clamp(min=1e-6)
|
74 |
+
|
75 |
+
los_pos = torch.log(torch.gather(xs_pos, 1, target.unsqueeze(1))).sum()
|
76 |
+
los_neg = torch.log(xs_neg)
|
77 |
+
los_neg = los_neg.sum() - torch.gather(los_neg, 1, target.unsqueeze(1)).sum()
|
78 |
+
loss = los_pos + los_neg
|
79 |
+
|
80 |
+
return -loss
|
81 |
+
|
82 |
+
if loss_type == "ce":
|
83 |
+
loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'])
|
84 |
+
elif loss_type == "weighted":
|
85 |
+
loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight)
|
86 |
+
elif loss_type == "focal":
|
87 |
+
gamma = 2
|
88 |
+
p = torch.sigmoid(preds['tags'])
|
89 |
+
ce_loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], reduction='none')
|
90 |
+
p_t = p * batch['tags'] + (1 - p) * (1 - batch['tags'])
|
91 |
+
loss = ce_loss * ((1 - p_t) ** gamma)
|
92 |
+
loss = loss.mean()
|
93 |
+
elif loss_type == "focal2":
|
94 |
+
gamma = 2
|
95 |
+
p = torch.sigmoid(preds['tags'])
|
96 |
+
ce_loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], reduction='none')
|
97 |
+
p_t = p * batch['tags'] + (1 - p) * (1 - batch['tags'])
|
98 |
+
loss = ce_loss * ((1 - p_t) ** gamma) * 256
|
99 |
+
loss = loss.mean()
|
100 |
+
elif loss_type == "asl":
|
101 |
+
p = torch.sigmoid(preds['tags'])
|
102 |
+
xs_pos = p
|
103 |
+
xs_neg = 1 - p
|
104 |
+
|
105 |
+
los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
|
106 |
+
los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
|
107 |
+
loss = los_pos + los_neg
|
108 |
+
loss = -loss.sum()
|
109 |
+
|
110 |
+
# Rating
|
111 |
+
loss = loss + asl_helper(preds['rating'], batch['rating'])
|
112 |
+
|
113 |
+
# Score
|
114 |
+
loss = loss + asl_helper(preds['score'], batch['score'])
|
115 |
+
elif loss_type == "asl2":
|
116 |
+
p = torch.sigmoid(preds['tags'])
|
117 |
+
xs_pos = p
|
118 |
+
xs_neg = 1 - p
|
119 |
+
|
120 |
+
los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
|
121 |
+
los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
|
122 |
+
loss = -los_pos - los_neg
|
123 |
+
loss = loss.sum()
|
124 |
+
elif loss_type == "asl3":
|
125 |
+
p = torch.sigmoid(preds['tags'])
|
126 |
+
xs_pos = p
|
127 |
+
xs_neg = 1 - p
|
128 |
+
|
129 |
+
los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
|
130 |
+
los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
|
131 |
+
loss = -los_pos - los_neg
|
132 |
+
loss = loss.mean()
|
133 |
+
elif loss_type == "asl4":
|
134 |
+
p = torch.sigmoid(preds['tags'])
|
135 |
+
xs_pos = p
|
136 |
+
xs_neg = 1 - p
|
137 |
+
|
138 |
+
los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
|
139 |
+
los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
|
140 |
+
loss = -los_pos - los_neg
|
141 |
+
loss = loss.mean() * 128
|
142 |
+
elif loss_type == "asl5":
|
143 |
+
loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 128
|
144 |
+
elif loss_type == "asl6":
|
145 |
+
loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 256
|
146 |
+
elif loss_type == "asl7":
|
147 |
+
loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 2
|
148 |
+
else:
|
149 |
+
raise ValueError(f"Invalid loss type: {loss_type}")
|
150 |
+
|
151 |
+
return loss
|
152 |
+
|
153 |
+
|
154 |
+
class CLIPMlp(nn.Module):
|
155 |
+
def __init__(self, hidden_size: int, intermediate_size: int, activation_cls):
|
156 |
+
super().__init__()
|
157 |
+
self.activation_fn = activation_cls()
|
158 |
+
self.fc1 = nn.Linear(hidden_size, intermediate_size)
|
159 |
+
self.fc2 = nn.Linear(intermediate_size, hidden_size)
|
160 |
+
|
161 |
+
def forward(self, hidden_states: torch.Tensor):
|
162 |
+
hidden_states = self.fc1(hidden_states)
|
163 |
+
hidden_states = self.activation_fn(hidden_states)
|
164 |
+
hidden_states = self.fc2(hidden_states)
|
165 |
+
return hidden_states
|
166 |
+
|
167 |
+
|
168 |
+
class FastCLIPAttention2(nn.Module):
|
169 |
+
"""Fast Attention module for CLIP-like. This is NOT a drop-in replacement for CLIPAttention, since it adds additional flexibility. Mainly uses xformers."""
|
170 |
+
def __init__(self, hidden_size: int, out_dim: int, num_attention_heads: int, out_seq_len: Optional[int] = None, norm_qk: bool = False):
|
171 |
+
super().__init__()
|
172 |
+
self.out_seq_len = out_seq_len
|
173 |
+
self.embed_dim = hidden_size
|
174 |
+
self.out_dim = out_dim
|
175 |
+
self.norm_qk = norm_qk
|
176 |
+
self.num_heads = num_attention_heads
|
177 |
+
self.head_dim = hidden_size // num_attention_heads
|
178 |
+
assert self.head_dim * num_attention_heads == self.embed_dim, "embed_dim must be divisible by num_attention_heads"
|
179 |
+
|
180 |
+
self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
|
181 |
+
self.kv_proj = nn.Linear(self.embed_dim, self.embed_dim * 2)
|
182 |
+
self.out_proj = nn.Linear(self.embed_dim, self.out_dim)
|
183 |
+
|
184 |
+
if self.norm_qk:
|
185 |
+
self.query_norm = nn.LayerNorm(self.embed_dim)
|
186 |
+
self.key_norm = nn.LayerNorm(self.embed_dim)
|
187 |
+
|
188 |
+
#def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
189 |
+
# return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).contiguous()
|
190 |
+
|
191 |
+
def forward(self, query_states: torch.Tensor, kv_states: torch.Tensor) -> torch.Tensor:
|
192 |
+
bsz, src_len, embed_dim = kv_states.size()
|
193 |
+
if self.out_seq_len is not None:
|
194 |
+
tgt_len = self.out_seq_len
|
195 |
+
else:
|
196 |
+
tgt_len = src_len
|
197 |
+
|
198 |
+
kv_states = self.kv_proj(kv_states) # (bsz, src_len, embed_dim * 2)
|
199 |
+
q_states = self.q_proj(query_states[:, :tgt_len]) # (bsz, tgt_len, embed_dim)
|
200 |
+
|
201 |
+
# NOTE: It is not clear if LayerNorm should be applied to the embed_dim, or to the head_dim
|
202 |
+
if self.norm_qk:
|
203 |
+
q_states = self.query_norm(q_states).type(q_states.dtype)
|
204 |
+
k_states = self.key_norm(kv_states[:, :, :embed_dim]).type(kv_states.dtype)
|
205 |
+
v_states = kv_states[:, :, embed_dim:]
|
206 |
+
else:
|
207 |
+
k_states = kv_states[:, :, :embed_dim]
|
208 |
+
v_states = kv_states[:, :, embed_dim:]
|
209 |
+
|
210 |
+
q_states = q_states.view(bsz, tgt_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, tgt_len, head_dim)
|
211 |
+
k_states = k_states.view(bsz, src_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, src_len, head_dim)
|
212 |
+
v_states = v_states.view(bsz, src_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, src_len, head_dim)
|
213 |
+
|
214 |
+
# Performs scale of query_states, attention, and softmax
|
215 |
+
with torch.backends.cuda.sdp_kernel(enable_math=False):
|
216 |
+
x = F.scaled_dot_product_attention(q_states, k_states, v_states) # (bsz, num_heads, tgt_len, head_dim)
|
217 |
+
x = x.transpose(1, 2).contiguous().view(bsz, tgt_len, embed_dim) # (bsz, tgt_len, embed_dim)
|
218 |
+
|
219 |
+
# Projection
|
220 |
+
x = self.out_proj(x) # (bsz, tgt_len, out_dim)
|
221 |
+
|
222 |
+
return x
|
223 |
+
|
224 |
+
|
225 |
+
class SkipInit(nn.Module):
|
226 |
+
def __init__(self, hidden_size: int, channel_wise: bool, init_scale: float):
|
227 |
+
super().__init__()
|
228 |
+
self.hidden_size = hidden_size
|
229 |
+
self.channel_wise = channel_wise
|
230 |
+
self.init_scale = init_scale
|
231 |
+
|
232 |
+
if self.channel_wise:
|
233 |
+
self.scale = nn.Parameter(torch.ones(hidden_size) * init_scale)
|
234 |
+
else:
|
235 |
+
self.scale = nn.Parameter(torch.tensor(init_scale))
|
236 |
+
|
237 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
238 |
+
return x * self.scale
|
239 |
+
|
240 |
+
|
241 |
+
class FastCLIPEncoderLayer(nn.Module):
|
242 |
+
def __init__(
|
243 |
+
self,
|
244 |
+
hidden_size: int,
|
245 |
+
num_attention_heads: int,
|
246 |
+
out_seq_len: Optional[int],
|
247 |
+
activation_cls = QuickGELUActivation,
|
248 |
+
use_palm_alt: bool = False,
|
249 |
+
norm_qk: bool = False,
|
250 |
+
skip_init: Optional[float] = None,
|
251 |
+
stochastic_depth: Optional[float] = None,
|
252 |
+
):
|
253 |
+
super().__init__()
|
254 |
+
|
255 |
+
self.use_palm_alt = use_palm_alt
|
256 |
+
self.stochastic_depth = stochastic_depth
|
257 |
+
|
258 |
+
self.self_attn = FastCLIPAttention2(
|
259 |
+
hidden_size=hidden_size,
|
260 |
+
out_dim=hidden_size,
|
261 |
+
num_attention_heads=num_attention_heads,
|
262 |
+
out_seq_len=out_seq_len,
|
263 |
+
norm_qk=norm_qk,
|
264 |
+
)
|
265 |
+
self.mlp = CLIPMlp(hidden_size, 4 * hidden_size, activation_cls)
|
266 |
+
self.layer_norm1 = nn.LayerNorm(hidden_size)
|
267 |
+
if not use_palm_alt:
|
268 |
+
self.layer_norm2 = nn.LayerNorm(hidden_size)
|
269 |
+
|
270 |
+
if skip_init is not None:
|
271 |
+
self.attn_skip_init = SkipInit(hidden_size, channel_wise=True, init_scale=skip_init)
|
272 |
+
self.mlp_skip_init = SkipInit(hidden_size, channel_wise=True, init_scale=skip_init)
|
273 |
+
else:
|
274 |
+
self.attn_skip_init = nn.Identity()
|
275 |
+
self.mlp_skip_init = nn.Identity()
|
276 |
+
|
277 |
+
def forward(self, hidden_states: torch.Tensor):
|
278 |
+
residual = hidden_states
|
279 |
+
hidden_states = self.layer_norm1(hidden_states)
|
280 |
+
|
281 |
+
if not self.use_palm_alt:
|
282 |
+
hidden_states = self.self_attn(query_states=hidden_states, kv_states=hidden_states)
|
283 |
+
hidden_states = self.attn_skip_init(hidden_states)
|
284 |
+
hidden_states = hidden_states + residual[:, :hidden_states.size(1)]
|
285 |
+
|
286 |
+
residual = hidden_states
|
287 |
+
hidden_states = self.layer_norm2(hidden_states)
|
288 |
+
hidden_states = self.mlp(hidden_states)
|
289 |
+
hidden_states = self.mlp_skip_init(hidden_states)
|
290 |
+
hidden_states = hidden_states + residual
|
291 |
+
else:
|
292 |
+
# An alternative implementation inspired by the PALM paper
|
293 |
+
# By performing the attention and MLP in parallel it's possible to fuse the linear projections of the attention and MLP layers
|
294 |
+
# We don't do that here yet, but that supposedly improves efficiency without hurting performance
|
295 |
+
attn = self.self_attn(query_states=hidden_states, kv_states=hidden_states)
|
296 |
+
attn = self.attn_skip_init(attn)
|
297 |
+
mlp = self.mlp(hidden_states[:, :attn.size(1)])
|
298 |
+
mlp = self.mlp_skip_init(mlp)
|
299 |
+
|
300 |
+
if self.stochastic_depth is not None:
|
301 |
+
attn = torchvision.ops.stochastic_depth(attn, self.stochastic_depth, mode='row', training=self.training)
|
302 |
+
mlp = torchvision.ops.stochastic_depth(mlp, self.stochastic_depth, mode='row', training=self.training)
|
303 |
+
|
304 |
+
hidden_states = residual[:, :attn.size(1)] + attn + mlp
|
305 |
+
|
306 |
+
return hidden_states
|
307 |
+
|
308 |
+
|
309 |
+
def sinusoidal_position_embedding(width: int, height: int, depth: int, dtype, device, temperature = 10000):
|
310 |
+
"""
|
311 |
+
Sinusoidal position embedding. Returns a flat tensor of shape (h * w, d).
|
312 |
+
"""
|
313 |
+
assert depth % 4 == 0, "Embedding dimension must be divisible by 4."
|
314 |
+
|
315 |
+
y, x = torch.meshgrid(torch.arange(height, device=device), torch.arange(width, device=device), indexing="ij")
|
316 |
+
omega = torch.arange(depth // 4, device=device) / (depth // 4 - 1)
|
317 |
+
omega = 1. / (temperature ** omega)
|
318 |
+
|
319 |
+
y = y.flatten()[:, None] * omega[None, :]
|
320 |
+
x = x.flatten()[:, None] * omega[None, :]
|
321 |
+
embedding = torch.cat([x.sin(), x.cos(), y.sin(), y.cos()], dim=1)
|
322 |
+
|
323 |
+
return embedding.type(dtype)
|
324 |
+
|
325 |
+
|
326 |
+
class CLIPEmbeddingLayer(nn.Module):
|
327 |
+
def __init__(self, hidden_size: int, num_channels: int, image_size: int, patch_size: int, patch_dropout: float = 0.0, good_dropout: bool = False, dpn: bool = False, sine_positional_embeddings: bool = False):
|
328 |
+
super().__init__()
|
329 |
+
|
330 |
+
assert image_size % patch_size == 0, "Image dimensions must be divisible by the patch size."
|
331 |
+
|
332 |
+
seq_len = (image_size // patch_size) ** 2
|
333 |
+
self.patch_dropout = patch_dropout
|
334 |
+
self.hidden_size = hidden_size
|
335 |
+
self.good_dropout = good_dropout
|
336 |
+
self.dpn = dpn
|
337 |
+
self.sine_positional_embeddings = sine_positional_embeddings
|
338 |
+
self.patch_size = patch_size
|
339 |
+
|
340 |
+
self.patch_embeddings = nn.Conv2d(
|
341 |
+
in_channels=num_channels,
|
342 |
+
out_channels=hidden_size,
|
343 |
+
kernel_size=patch_size,
|
344 |
+
stride=patch_size,
|
345 |
+
bias=False,
|
346 |
+
)
|
347 |
+
if not self.sine_positional_embeddings:
|
348 |
+
self.positional_embeddings = nn.Embedding(seq_len, hidden_size)
|
349 |
+
self.register_buffer("position_ids", torch.arange(seq_len))
|
350 |
+
|
351 |
+
if self.dpn:
|
352 |
+
self.to_patch_embeddings = nn.Sequential(
|
353 |
+
Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
|
354 |
+
nn.LayerNorm(3 * patch_size * patch_size),
|
355 |
+
nn.Linear(3 * patch_size * patch_size, hidden_size),
|
356 |
+
nn.LayerNorm(hidden_size),
|
357 |
+
)
|
358 |
+
else:
|
359 |
+
self.to_patch_embeddings = nn.Conv2d(
|
360 |
+
in_channels=num_channels,
|
361 |
+
out_channels=hidden_size,
|
362 |
+
kernel_size=patch_size,
|
363 |
+
stride=patch_size,
|
364 |
+
bias=False,
|
365 |
+
)
|
366 |
+
|
367 |
+
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
|
368 |
+
B, C, H, W = pixel_values.shape
|
369 |
+
assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
|
370 |
+
assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
|
371 |
+
|
372 |
+
if self.dpn:
|
373 |
+
patches = self.to_patch_embeddings(pixel_values)
|
374 |
+
else:
|
375 |
+
patches = self.to_patch_embeddings(pixel_values)
|
376 |
+
patches = patches.flatten(2).transpose(1, 2)
|
377 |
+
|
378 |
+
seq_len = patches.shape[1]
|
379 |
+
patch_dropout = int(math.ceil((1.0 - self.patch_dropout) * seq_len))
|
380 |
+
|
381 |
+
if self.sine_positional_embeddings:
|
382 |
+
position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.hidden_size, pixel_values.dtype, pixel_values.device)
|
383 |
+
else:
|
384 |
+
position_embeddings = self.positional_embeddings(self.position_ids)
|
385 |
+
|
386 |
+
if patch_dropout == seq_len or not self.training:
|
387 |
+
embeddings = patches + position_embeddings
|
388 |
+
elif self.good_dropout:
|
389 |
+
# Pick random patches to drop out
|
390 |
+
# The "good_dropout" variant uses random permutations for each batch item, but is slightly slower and involves more code
|
391 |
+
|
392 |
+
# The below method is a nice trick to generate a batch of random permutations.
|
393 |
+
# Torch (as of 1.13) doesn't have a built-in function to do this, and a for loop of torch.randperm is slow.
|
394 |
+
# Based on some benchmarks I measured the generation of the mask and the fetching to be only 50% slower than the non-"good_dropout" variant.
|
395 |
+
# And the time taken here is only a fraction of the time spent performing the embedding convolution.
|
396 |
+
# Generate a matrix of random numbers between 0 and 1 of shape (B, seq_len)
|
397 |
+
patch_mask = torch.rand(B, seq_len, device=patches.device)
|
398 |
+
# For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
|
399 |
+
patch_mask = torch.argsort(patch_mask, dim=1)
|
400 |
+
# Truncate
|
401 |
+
patch_mask = patch_mask[:, :patch_dropout]
|
402 |
+
|
403 |
+
embeddings = patches.gather(1, patch_mask.unsqueeze(-1).expand(-1, -1, self.hidden_size)) + position_embeddings[patch_mask]
|
404 |
+
else:
|
405 |
+
# The non-"good_dropout" variant uses a single random permutation for all batch items, but is faster and uses less code
|
406 |
+
indices = torch.randperm(seq_len, device=pixel_values.device)[:patch_dropout]
|
407 |
+
embeddings = patches[:, indices, :] + position_embeddings[indices.expand(1, -1)]
|
408 |
+
|
409 |
+
return embeddings
|
410 |
+
|
411 |
+
|
412 |
+
class MHAPoolingHead(nn.Module):
|
413 |
+
def __init__(self, hidden_size: int, num_attention_heads: int, activation_cls, out_dim: int, alt_style: bool, norm_qk: bool):
|
414 |
+
super().__init__()
|
415 |
+
|
416 |
+
self.out_dim = out_dim if not alt_style else hidden_size
|
417 |
+
|
418 |
+
self.probe = nn.Parameter(torch.randn(hidden_size))
|
419 |
+
|
420 |
+
self.mlp = CLIPMlp(hidden_size, 4 * hidden_size, activation_cls)
|
421 |
+
self.layer_norm = nn.LayerNorm(hidden_size)
|
422 |
+
self.pooling_head = nn.Linear(hidden_size, 1)
|
423 |
+
|
424 |
+
self.self_attn = FastCLIPAttention2(
|
425 |
+
hidden_size=hidden_size,
|
426 |
+
out_dim=self.out_dim,
|
427 |
+
num_attention_heads=num_attention_heads,
|
428 |
+
out_seq_len=1,
|
429 |
+
norm_qk=norm_qk,
|
430 |
+
)
|
431 |
+
self.mlp = CLIPMlp(self.out_dim, 4 * self.out_dim, activation_cls)
|
432 |
+
self.layer_norm1 = nn.LayerNorm(hidden_size)
|
433 |
+
self.layer_norm2 = nn.LayerNorm(self.out_dim)
|
434 |
+
|
435 |
+
if alt_style:
|
436 |
+
self.final_proj = nn.Linear(hidden_size, out_dim)
|
437 |
+
else:
|
438 |
+
self.final_proj = nn.Identity()
|
439 |
+
|
440 |
+
def forward(self, hidden_states: torch.Tensor):
|
441 |
+
hidden_states = self.layer_norm1(hidden_states)
|
442 |
+
query_states = self.probe.unsqueeze(0).unsqueeze(0).expand(hidden_states.size(0), 1, -1)
|
443 |
+
|
444 |
+
hidden_states = self.self_attn(query_states=query_states, kv_states=hidden_states)
|
445 |
+
# We don't use a residual connection here because the out_dim is different from the hidden_size
|
446 |
+
|
447 |
+
residual = hidden_states
|
448 |
+
hidden_states = self.layer_norm2(hidden_states)
|
449 |
+
hidden_states = self.mlp(hidden_states)
|
450 |
+
hidden_states = hidden_states + residual
|
451 |
+
hidden_states = self.final_proj(hidden_states)
|
452 |
+
|
453 |
+
return hidden_states.squeeze(1)
|
454 |
+
|
455 |
+
|
456 |
+
class GAPHead(nn.Module):
|
457 |
+
def __init__(self, hidden_size: int, out_dim: int):
|
458 |
+
super().__init__()
|
459 |
+
|
460 |
+
self.norm = nn.LayerNorm(hidden_size)
|
461 |
+
self.proj = nn.Linear(hidden_size, out_dim)
|
462 |
+
|
463 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
464 |
+
x = x.mean(dim=1)
|
465 |
+
x = self.norm(x)
|
466 |
+
x = self.proj(x)
|
467 |
+
return x
|
468 |
+
|
469 |
+
|
470 |
+
class CLIPLikeModel(VisionModel):
|
471 |
+
def __init__(
|
472 |
+
self,
|
473 |
+
n_tags: int,
|
474 |
+
embedding_dim: int,
|
475 |
+
num_attention_heads: int,
|
476 |
+
activation_cls,
|
477 |
+
num_channels: int,
|
478 |
+
image_size: int,
|
479 |
+
patch_size: int,
|
480 |
+
patch_dropout: float,
|
481 |
+
use_palm_alt: bool,
|
482 |
+
num_layers: int,
|
483 |
+
use_mha_alt: bool,
|
484 |
+
loss_type: str,
|
485 |
+
good_dropout: bool=False,
|
486 |
+
dpn: bool=False,
|
487 |
+
sine_positional_embeddings: bool=False,
|
488 |
+
norm_qk: bool = False,
|
489 |
+
no_wd_bias: bool = False,
|
490 |
+
use_gap_head: bool = False,
|
491 |
+
skip_init: Optional[float] = None,
|
492 |
+
stochastic_depth: Optional[float] = None,
|
493 |
+
):
|
494 |
+
super().__init__(image_size, n_tags)
|
495 |
+
|
496 |
+
out_dim = n_tags
|
497 |
+
self.n_tags = n_tags
|
498 |
+
self.loss_type = loss_type
|
499 |
+
self.no_wd_bias = no_wd_bias
|
500 |
+
|
501 |
+
stochastic_depth_space = torch.linspace(0, stochastic_depth, num_layers) if stochastic_depth is not None else None
|
502 |
+
|
503 |
+
self.embedding_layer = CLIPEmbeddingLayer(embedding_dim, num_channels, image_size, patch_size, patch_dropout, good_dropout, dpn, sine_positional_embeddings)
|
504 |
+
self.pre_layer_norm = nn.LayerNorm(embedding_dim)
|
505 |
+
self.encoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
|
506 |
+
hidden_size=embedding_dim,
|
507 |
+
num_attention_heads=num_attention_heads,
|
508 |
+
out_seq_len=None,
|
509 |
+
activation_cls=activation_cls,
|
510 |
+
use_palm_alt=use_palm_alt,
|
511 |
+
norm_qk=norm_qk,
|
512 |
+
skip_init=skip_init,
|
513 |
+
stochastic_depth=stochastic_depth_space[i].item() if stochastic_depth_space is not None else None,
|
514 |
+
) for i in range(num_layers)])
|
515 |
+
|
516 |
+
if use_gap_head:
|
517 |
+
self.pooling_head = GAPHead(embedding_dim, out_dim)
|
518 |
+
else:
|
519 |
+
self.pooling_head = MHAPoolingHead(embedding_dim, num_attention_heads, activation_cls, out_dim, use_mha_alt, norm_qk=norm_qk)
|
520 |
+
|
521 |
+
def forward(self, batch):
|
522 |
+
hidden_states = self.embedding_layer(batch['image'])
|
523 |
+
hidden_states = self.pre_layer_norm(hidden_states)
|
524 |
+
|
525 |
+
for layer in self.encoder_layers:
|
526 |
+
hidden_states = layer(hidden_states)
|
527 |
+
|
528 |
+
preds = self.pooling_head(hidden_states)
|
529 |
+
|
530 |
+
result = {
|
531 |
+
'tags': preds,
|
532 |
+
}
|
533 |
+
|
534 |
+
return result
|
535 |
+
|
536 |
+
def calculate_loss(self, preds, batch, pos_weight):
|
537 |
+
return basic_calculate_loss(preds, batch, pos_weight, self.loss_type)
|
538 |
+
|
539 |
+
def get_optimized_parameters(self, lr: float):
|
540 |
+
if self.no_wd_bias:
|
541 |
+
return self.get_optimized_parameters_no_wd_bias()
|
542 |
+
else:
|
543 |
+
return self.parameters()
|
544 |
+
|
545 |
+
def get_optimized_parameters_no_wd_bias(self):
|
546 |
+
decay = []
|
547 |
+
no_decay = []
|
548 |
+
|
549 |
+
for name, param in self.named_parameters():
|
550 |
+
if not param.requires_grad:
|
551 |
+
continue
|
552 |
+
|
553 |
+
if len(param.shape) == 1 or name.endswith(".bias"):
|
554 |
+
no_decay.append(param)
|
555 |
+
print(f'No decay: {name}')
|
556 |
+
else:
|
557 |
+
decay.append(param)
|
558 |
+
|
559 |
+
return [
|
560 |
+
{'params': decay},
|
561 |
+
{'params': no_decay, 'weight_decay': 0.},
|
562 |
+
]
|
563 |
+
|
564 |
+
def save(self):
|
565 |
+
return self.state_dict()
|
566 |
+
|
567 |
+
def load(self, state_dict):
|
568 |
+
self.load_state_dict(state_dict)
|
569 |
+
|
570 |
+
|
571 |
+
class MaskedAutoEncoderViT(nn.Module):
|
572 |
+
def __init__(
|
573 |
+
self,
|
574 |
+
n_tags: int,
|
575 |
+
|
576 |
+
embedding_dim: int,
|
577 |
+
num_attention_heads: int,
|
578 |
+
activation_cls,
|
579 |
+
num_channels: int,
|
580 |
+
image_size: int,
|
581 |
+
patch_size: int,
|
582 |
+
num_layers: int,
|
583 |
+
loss_type: str,
|
584 |
+
sine_positional_embeddings: bool=False,
|
585 |
+
|
586 |
+
decoder_embedding_dim: int = 512,
|
587 |
+
decoder_num_attention_heads: int = 8,
|
588 |
+
decoder_num_layers: int = 6,
|
589 |
+
decoder_force_projection: bool = False,
|
590 |
+
|
591 |
+
masking_ratio: float = 0.75,
|
592 |
+
mae_loss_weight: float = 1.0,
|
593 |
+
mae_normalize_targets: bool = False,
|
594 |
+
mae_post_norm: bool = False,
|
595 |
+
):
|
596 |
+
super().__init__()
|
597 |
+
|
598 |
+
self.n_tags = n_tags
|
599 |
+
self.seq_len = (image_size // patch_size) ** 2
|
600 |
+
self.embedding_dim = embedding_dim
|
601 |
+
self.decoder_embedding_dim = decoder_embedding_dim
|
602 |
+
self.sine_positional_embeddings = sine_positional_embeddings
|
603 |
+
self.image_size = image_size
|
604 |
+
self.patch_size = patch_size
|
605 |
+
self.masking_ratio = masking_ratio
|
606 |
+
self.loss_type = loss_type
|
607 |
+
self.mae_loss_weight = mae_loss_weight
|
608 |
+
self.mae_normalize_targets = mae_normalize_targets
|
609 |
+
|
610 |
+
if not self.sine_positional_embeddings:
|
611 |
+
self.positional_embeddings = nn.Embedding(self.seq_len, embedding_dim)
|
612 |
+
self.decoder_positional_embeddings = nn.Embedding(self.seq_len, decoder_embedding_dim)
|
613 |
+
self.register_buffer("position_ids", torch.arange(self.seq_len))
|
614 |
+
|
615 |
+
self.to_patches = Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size)
|
616 |
+
self.patch_embedder = nn.Linear(num_channels * patch_size * patch_size, embedding_dim)
|
617 |
+
|
618 |
+
# Encoder
|
619 |
+
self.pre_layer_norm = nn.LayerNorm(embedding_dim)
|
620 |
+
self.encoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
|
621 |
+
hidden_size=embedding_dim,
|
622 |
+
num_attention_heads=num_attention_heads,
|
623 |
+
out_seq_len=None,
|
624 |
+
activation_cls=activation_cls,
|
625 |
+
use_palm_alt=True,
|
626 |
+
norm_qk=False,
|
627 |
+
skip_init=None,
|
628 |
+
) for _ in range(num_layers)])
|
629 |
+
|
630 |
+
# Head for classification
|
631 |
+
self.pooling_head = GAPHead(embedding_dim, n_tags)
|
632 |
+
|
633 |
+
# Decoder
|
634 |
+
if embedding_dim != decoder_embedding_dim or decoder_force_projection:
|
635 |
+
self.encoder_to_decoder_proj = nn.Linear(embedding_dim, decoder_embedding_dim)
|
636 |
+
else:
|
637 |
+
self.encoder_to_decoder_proj = nn.Identity()
|
638 |
+
self.decoder_pre_layer_norm = nn.LayerNorm(decoder_embedding_dim)
|
639 |
+
self.decoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
|
640 |
+
hidden_size=decoder_embedding_dim,
|
641 |
+
num_attention_heads=decoder_num_attention_heads,
|
642 |
+
out_seq_len=None,
|
643 |
+
activation_cls=activation_cls,
|
644 |
+
use_palm_alt=True,
|
645 |
+
norm_qk=False,
|
646 |
+
skip_init=None,
|
647 |
+
) for _ in range(decoder_num_layers)])
|
648 |
+
|
649 |
+
if mae_post_norm:
|
650 |
+
self.decoder_to_pixel_values = nn.Sequential(
|
651 |
+
nn.LayerNorm(decoder_embedding_dim),
|
652 |
+
nn.Linear(decoder_embedding_dim, num_channels * patch_size * patch_size)
|
653 |
+
)
|
654 |
+
else:
|
655 |
+
self.decoder_to_pixel_values = nn.Linear(decoder_embedding_dim, num_channels * patch_size * patch_size)
|
656 |
+
self.mask_token = nn.Parameter(torch.zeros(decoder_embedding_dim))
|
657 |
+
torch.nn.init.normal_(self.mask_token, std=0.02)
|
658 |
+
|
659 |
+
def forward(self, batch):
|
660 |
+
pixel_values = batch['image']
|
661 |
+
device = pixel_values.device
|
662 |
+
B, C, H, W = pixel_values.shape
|
663 |
+
assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
|
664 |
+
assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
|
665 |
+
|
666 |
+
# Convert image to patches (B, seq_len, C * patch_size * patch_size)
|
667 |
+
patches = self.to_patches(pixel_values)
|
668 |
+
seq_len = patches.shape[1]
|
669 |
+
num_masked = int(self.masking_ratio * seq_len)
|
670 |
+
|
671 |
+
# For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
|
672 |
+
# From this we can get the masked and unmasked indices
|
673 |
+
patch_mask = torch.rand(B, seq_len, device=device)
|
674 |
+
patch_mask = torch.argsort(patch_mask, dim=1)
|
675 |
+
masked_indices, unmasked_indices = patch_mask[:, :num_masked], patch_mask[:, num_masked:]
|
676 |
+
batch_range = torch.arange(B, device=device)[:, None]
|
677 |
+
|
678 |
+
# Masked and unmasked patches
|
679 |
+
unmasked_patches = patches[batch_range, unmasked_indices]
|
680 |
+
masked_patches = patches[batch_range, masked_indices]
|
681 |
+
|
682 |
+
# Embed unmasked patches for the encoder (B, seq_len, embedding_dim)
|
683 |
+
tokens = self.patch_embedder(unmasked_patches)
|
684 |
+
|
685 |
+
if self.sine_positional_embeddings:
|
686 |
+
position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.embedding_dim, pixel_values.dtype, device)
|
687 |
+
decoder_position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.decoder_embedding_dim, pixel_values.dtype, device)
|
688 |
+
else:
|
689 |
+
position_embeddings = self.positional_embeddings(self.position_ids)
|
690 |
+
decoder_position_embeddings = self.decoder_positional_embeddings(self.position_ids)
|
691 |
+
|
692 |
+
# Add position embeddings
|
693 |
+
tokens = tokens + position_embeddings[unmasked_indices]
|
694 |
+
|
695 |
+
# Run the encoder
|
696 |
+
encoded_tokens = self.pre_layer_norm(tokens)
|
697 |
+
|
698 |
+
for layer in self.encoder_layers:
|
699 |
+
encoded_tokens = layer(encoded_tokens)
|
700 |
+
|
701 |
+
# Label predictions
|
702 |
+
if self.training:
|
703 |
+
preds = self.pooling_head(encoded_tokens)
|
704 |
+
else:
|
705 |
+
# During inference, classify using the entire image
|
706 |
+
# But we'll do the usual for the MAE part, just so we can see how MAE is performing during validation
|
707 |
+
tokens = self.patch_embedder(patches)
|
708 |
+
tokens = tokens + position_embeddings
|
709 |
+
tokens = self.pre_layer_norm(tokens)
|
710 |
+
for layer in self.encoder_layers:
|
711 |
+
tokens = layer(tokens)
|
712 |
+
preds = self.pooling_head(tokens)
|
713 |
+
|
714 |
+
# Projection for the decoder and position embeddings
|
715 |
+
decoder_tokens = self.encoder_to_decoder_proj(encoded_tokens)
|
716 |
+
decoder_tokens = decoder_tokens + decoder_position_embeddings[unmasked_indices]
|
717 |
+
|
718 |
+
# Fill in the masked patches
|
719 |
+
mask_tokens = einops.repeat(self.mask_token, 'd -> b n d', b = B, n = num_masked)
|
720 |
+
mask_tokens = mask_tokens + decoder_position_embeddings[masked_indices]
|
721 |
+
decoder_tokens = torch.cat([decoder_tokens, mask_tokens], dim=1)
|
722 |
+
|
723 |
+
# Run the decoder
|
724 |
+
decoded_tokens = self.decoder_pre_layer_norm(decoder_tokens)
|
725 |
+
|
726 |
+
for layer in self.decoder_layers:
|
727 |
+
decoded_tokens = layer(decoded_tokens)
|
728 |
+
|
729 |
+
# Only predict the masked patches
|
730 |
+
# All the masked patches are at the end of the sequence
|
731 |
+
decoded_tokens = decoded_tokens[:, -num_masked:]
|
732 |
+
pred_pixel_values = self.decoder_to_pixel_values(decoded_tokens)
|
733 |
+
|
734 |
+
# Calculate the mae loss
|
735 |
+
if self.mae_normalize_targets:
|
736 |
+
# Normalize each patch by its mean and variance. The ViCHA paper says this provides better results
|
737 |
+
means = masked_patches.mean(dim=-1, keepdim=True)
|
738 |
+
vars = masked_patches.var(dim=-1, keepdim=True)
|
739 |
+
target = (masked_patches - means) / (vars + 1e-6)**0.5
|
740 |
+
mae_loss = F.mse_loss(pred_pixel_values, target)
|
741 |
+
else:
|
742 |
+
mae_loss = F.mse_loss(pred_pixel_values, masked_patches)
|
743 |
+
mae_loss = mae_loss * self.mae_loss_weight
|
744 |
+
|
745 |
+
return {
|
746 |
+
'tags': preds,
|
747 |
+
'mae_loss': mae_loss,
|
748 |
+
}
|
749 |
+
|
750 |
+
def calculate_loss(self, preds, batch, pos_weight):
|
751 |
+
return basic_calculate_loss(preds, batch, pos_weight, self.loss_type) + preds['mae_loss']
|
752 |
+
|
753 |
+
def get_optimized_parameters(self, lr: float):
|
754 |
+
return self.parameters()
|
755 |
+
|
756 |
+
def save(self):
|
757 |
+
return self.state_dict()
|
758 |
+
|
759 |
+
def load(self, state_dict):
|
760 |
+
self.load_state_dict(state_dict)
|
761 |
+
|
762 |
+
|
763 |
+
class StochDepth(nn.Module):
|
764 |
+
def __init__(self, drop_rate: float, scale_by_keep: bool = False):
|
765 |
+
super().__init__()
|
766 |
+
self.drop_rate = drop_rate
|
767 |
+
self.scale_by_keep = scale_by_keep
|
768 |
+
|
769 |
+
def forward(self, x):
|
770 |
+
if not self.training:
|
771 |
+
return x
|
772 |
+
|
773 |
+
batch_size = x.shape[0]
|
774 |
+
r = torch.rand((batch_size, 1, 1), device=x.device)
|
775 |
+
keep_prob = 1 - self.drop_rate
|
776 |
+
binary_tensor = torch.floor(keep_prob + r)
|
777 |
+
if self.scale_by_keep:
|
778 |
+
x = x / keep_prob
|
779 |
+
|
780 |
+
return x * binary_tensor
|
781 |
+
|
782 |
+
|
783 |
+
class SkipInitChannelwise(nn.Module):
|
784 |
+
def __init__(self, channels, init_val=1e-6):
|
785 |
+
super().__init__()
|
786 |
+
self.channels = channels
|
787 |
+
self.init_val = init_val
|
788 |
+
self.skip = nn.Parameter(torch.ones(channels) * init_val)
|
789 |
+
|
790 |
+
def forward(self, x):
|
791 |
+
return x * self.skip
|
792 |
+
|
793 |
+
|
794 |
+
class PosEmbedding(nn.Module):
|
795 |
+
def __init__(self, d_model: int, max_len: int, use_sine: bool, patch_size: int):
|
796 |
+
super().__init__()
|
797 |
+
self.d_model = d_model
|
798 |
+
self.max_len = max_len
|
799 |
+
self.use_sine = use_sine
|
800 |
+
self.patch_size = patch_size
|
801 |
+
|
802 |
+
if not self.use_sine:
|
803 |
+
self.embedding = nn.Embedding(max_len, d_model)
|
804 |
+
nn.init.trunc_normal_(self.embedding.weight, std=0.02)
|
805 |
+
self.register_buffer("position_ids", torch.arange(max_len))
|
806 |
+
|
807 |
+
def forward(self, x, width: int, height: int):
|
808 |
+
if self.use_sine:
|
809 |
+
position_embeddings = sinusoidal_position_embedding(width // self.patch_size, height // self.patch_size, self.d_model, x.dtype, x.device)
|
810 |
+
else:
|
811 |
+
position_embeddings = self.embedding(self.position_ids)
|
812 |
+
|
813 |
+
return x + position_embeddings
|
814 |
+
|
815 |
+
|
816 |
+
class MLPBlock(nn.Module):
|
817 |
+
def __init__(self, d_model: int, d_ff: int, stochdepth_rate: float):
|
818 |
+
super().__init__()
|
819 |
+
self.linear1 = nn.Linear(d_model, d_ff)
|
820 |
+
self.linear2 = nn.Linear(d_ff, d_model)
|
821 |
+
self.activation = nn.GELU()
|
822 |
+
if stochdepth_rate > 0:
|
823 |
+
self.stochdepth = StochDepth(stochdepth_rate, scale_by_keep=True)
|
824 |
+
else:
|
825 |
+
self.stochdepth = None
|
826 |
+
|
827 |
+
def forward(self, x):
|
828 |
+
x = self.linear1(x)
|
829 |
+
x = self.activation(x)
|
830 |
+
if self.stochdepth is not None:
|
831 |
+
x = self.stochdepth(x)
|
832 |
+
x = self.linear2(x)
|
833 |
+
return x
|
834 |
+
|
835 |
+
|
836 |
+
class ViTBlock(nn.Module):
|
837 |
+
def __init__(self, num_heads: int, d_model: int, d_ff: int, layerscale_init: float, stochdepth_rate: float):
|
838 |
+
super().__init__()
|
839 |
+
self.num_heads = num_heads
|
840 |
+
self.d_model = d_model
|
841 |
+
|
842 |
+
assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
|
843 |
+
|
844 |
+
# MHA
|
845 |
+
self.norm1 = nn.LayerNorm(d_model)
|
846 |
+
self.qkv_proj = nn.Linear(d_model, d_model * 3)
|
847 |
+
self.out_proj = nn.Linear(d_model, d_model)
|
848 |
+
self.skip_init1 = SkipInitChannelwise(channels=d_model, init_val=layerscale_init)
|
849 |
+
self.stochdepth1 = StochDepth(stochdepth_rate, scale_by_keep=True) if stochdepth_rate > 0 else None
|
850 |
+
|
851 |
+
# MLP
|
852 |
+
self.norm2 = nn.LayerNorm(d_model)
|
853 |
+
self.mlp = MLPBlock(d_model, d_ff, stochdepth_rate)
|
854 |
+
self.skip_init2 = SkipInitChannelwise(channels=d_model, init_val=layerscale_init)
|
855 |
+
self.stochdepth2 = StochDepth(stochdepth_rate, scale_by_keep=True) if stochdepth_rate > 0 else None
|
856 |
+
|
857 |
+
def forward(self, x):
|
858 |
+
bsz, src_len, embed_dim = x.shape
|
859 |
+
|
860 |
+
out = x
|
861 |
+
out = self.norm1(out)
|
862 |
+
|
863 |
+
# MHA
|
864 |
+
qkv_states = self.qkv_proj(out).split(self.d_model, dim=-1)
|
865 |
+
q_states = qkv_states[0].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
|
866 |
+
k_states = qkv_states[1].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
|
867 |
+
v_states = qkv_states[2].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
|
868 |
+
|
869 |
+
with torch.backends.cuda.sdp_kernel(enable_math=False):
|
870 |
+
out = F.scaled_dot_product_attention(q_states, k_states, v_states) # (bsz, num_heads, tgt_len, head_dim)
|
871 |
+
out = out.transpose(1, 2).contiguous().view(bsz, src_len, embed_dim) # (bsz, tgt_len, embed_dim)
|
872 |
+
|
873 |
+
out = self.out_proj(out)
|
874 |
+
|
875 |
+
out = self.skip_init1(out)
|
876 |
+
if self.stochdepth1 is not None:
|
877 |
+
out = self.stochdepth1(out)
|
878 |
+
x = out + x
|
879 |
+
|
880 |
+
out = self.norm2(x)
|
881 |
+
out = self.mlp(out)
|
882 |
+
out = self.skip_init2(out)
|
883 |
+
if self.stochdepth2 is not None:
|
884 |
+
out = self.stochdepth2(out)
|
885 |
+
|
886 |
+
out = out + x
|
887 |
+
|
888 |
+
return out
|
889 |
+
|
890 |
+
|
891 |
+
def CaiT_LayerScale_init(network_depth):
|
892 |
+
if network_depth <= 18:
|
893 |
+
return 1e-1
|
894 |
+
elif network_depth <= 24:
|
895 |
+
return 1e-5
|
896 |
+
else:
|
897 |
+
return 1e-6
|
898 |
+
|
899 |
+
|
900 |
+
class CNNLayerNorm(nn.Module):
|
901 |
+
def __init__(self, d_model: int):
|
902 |
+
super().__init__()
|
903 |
+
self.norm = nn.LayerNorm(d_model)
|
904 |
+
|
905 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
906 |
+
x = x.transpose(1, 3)
|
907 |
+
x = self.norm(x)
|
908 |
+
x = x.transpose(1, 3)
|
909 |
+
return x
|
910 |
+
|
911 |
+
|
912 |
+
class CNNStem(nn.Module):
|
913 |
+
def __init__(self, config: str):
|
914 |
+
super().__init__()
|
915 |
+
self.config = config
|
916 |
+
|
917 |
+
layers = []
|
918 |
+
channels = 3
|
919 |
+
|
920 |
+
for line in config.split(";"):
|
921 |
+
ty, line = line.split(":") if ":" in line else (line, "")
|
922 |
+
options = line.split(",")
|
923 |
+
options = [o.split("=") for o in options] if line else []
|
924 |
+
options = {k: v for k, v in options}
|
925 |
+
|
926 |
+
if ty == 'conv':
|
927 |
+
layers.append(nn.Conv2d(
|
928 |
+
in_channels=channels,
|
929 |
+
out_channels=int(options['c']),
|
930 |
+
kernel_size=int(options['k'] if 'k' in options else 3),
|
931 |
+
stride=int(options['s'] if 's' in options else 2),
|
932 |
+
bias=True,
|
933 |
+
padding=int(options['p'] if 'p' in options else 1),
|
934 |
+
))
|
935 |
+
channels = int(options['c'])
|
936 |
+
elif ty == 'bn':
|
937 |
+
layers.append(nn.BatchNorm2d(channels))
|
938 |
+
elif ty == 'ln':
|
939 |
+
layers.append(CNNLayerNorm(channels))
|
940 |
+
elif ty == 'relu':
|
941 |
+
layers.append(nn.ReLU())
|
942 |
+
elif ty == 'gelu':
|
943 |
+
layers.append(nn.GELU())
|
944 |
+
|
945 |
+
self.conv = nn.Sequential(*layers)
|
946 |
+
|
947 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
948 |
+
return self.conv(x)
|
949 |
+
|
950 |
+
|
951 |
+
class ViT(VisionModel):
|
952 |
+
def __init__(self,
|
953 |
+
n_tags: int,
|
954 |
+
image_size: int,
|
955 |
+
num_blocks: int,
|
956 |
+
patch_size: int,
|
957 |
+
d_model: int,
|
958 |
+
mlp_dim: int,
|
959 |
+
num_heads: int,
|
960 |
+
stochdepth_rate: float,
|
961 |
+
use_sine: bool,
|
962 |
+
loss_type: str,
|
963 |
+
layerscale_init: Optional[float] = None,
|
964 |
+
head_mean_after: bool = False,
|
965 |
+
cnn_stem: str | None = None,
|
966 |
+
patch_dropout: float = 0.0,
|
967 |
+
):
|
968 |
+
super().__init__(image_size, n_tags)
|
969 |
+
|
970 |
+
#assert image_size % patch_size == 0, "image_size must be divisible by patch_size"
|
971 |
+
assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
|
972 |
+
|
973 |
+
out_dim = n_tags
|
974 |
+
self.n_tags = n_tags
|
975 |
+
self.loss_type = loss_type
|
976 |
+
self.patch_size = patch_size
|
977 |
+
self.head_mean_after = head_mean_after
|
978 |
+
self.patch_dropout = patch_dropout
|
979 |
+
|
980 |
+
layerscale_init = CaiT_LayerScale_init(num_blocks) if layerscale_init is None else layerscale_init
|
981 |
+
self.patch_embeddings = nn.Conv2d(
|
982 |
+
in_channels=3,
|
983 |
+
out_channels=d_model,
|
984 |
+
kernel_size=patch_size,
|
985 |
+
stride=patch_size,
|
986 |
+
bias=True,
|
987 |
+
) if cnn_stem is None else CNNStem(cnn_stem)
|
988 |
+
self.pos_embedding = PosEmbedding(d_model, (image_size // patch_size) ** 2, use_sine=use_sine, patch_size=patch_size)
|
989 |
+
|
990 |
+
self.blocks = nn.ModuleList([
|
991 |
+
ViTBlock(num_heads, d_model, mlp_dim, layerscale_init, stochdepth_rate)
|
992 |
+
for _ in range(num_blocks)
|
993 |
+
])
|
994 |
+
|
995 |
+
self.norm = nn.LayerNorm(d_model)
|
996 |
+
self.head = nn.Linear(d_model, out_dim)
|
997 |
+
|
998 |
+
def forward(self, batch, return_embeddings=False, return_loss: bool = False, pos_weight = None):
|
999 |
+
B, C, H, W = batch['image'].shape
|
1000 |
+
assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
|
1001 |
+
assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
|
1002 |
+
|
1003 |
+
x = self.patch_embeddings(batch['image']) # (bsz, d_model, patch_num, patch_num)
|
1004 |
+
x = x.flatten(2).transpose(1, 2) # (bsz, patch_num ** 2, d_model)
|
1005 |
+
x = self.pos_embedding(x, W, H) # (bsz, patch_num ** 2, d_model)
|
1006 |
+
|
1007 |
+
# Patch dropout
|
1008 |
+
seq_len = x.shape[1]
|
1009 |
+
patch_dropout = int(math.ceil((1.0 - self.patch_dropout) * seq_len))
|
1010 |
+
|
1011 |
+
if patch_dropout != seq_len:
|
1012 |
+
# Generate a matrix of random numbers between 0 and 1 of shape (B, seq_len)
|
1013 |
+
patch_mask = torch.rand(B, seq_len, device=x.device)
|
1014 |
+
# For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
|
1015 |
+
patch_mask = torch.argsort(patch_mask, dim=1)
|
1016 |
+
# Truncate
|
1017 |
+
patch_mask = patch_mask[:, :patch_dropout]
|
1018 |
+
|
1019 |
+
x = x.gather(1, patch_mask.unsqueeze(-1).expand(-1, -1, x.shape[-1]))
|
1020 |
+
|
1021 |
+
#indices = torch.randperm(seq_len, device=x.device)[:patch_dropout]
|
1022 |
+
#x = x[:, indices, :]
|
1023 |
+
|
1024 |
+
# Transformer
|
1025 |
+
for block in self.blocks:
|
1026 |
+
x = block(x)
|
1027 |
+
|
1028 |
+
# Head
|
1029 |
+
result = {}
|
1030 |
+
|
1031 |
+
x = self.norm(x)
|
1032 |
+
if self.head_mean_after:
|
1033 |
+
x = self.head(x)
|
1034 |
+
x = x.mean(dim=1)
|
1035 |
+
else:
|
1036 |
+
x = x.mean(dim=1)
|
1037 |
+
if return_embeddings:
|
1038 |
+
result['embeddings'] = x
|
1039 |
+
x = self.head(x)
|
1040 |
+
|
1041 |
+
result['tags'] = x
|
1042 |
+
|
1043 |
+
if return_loss:
|
1044 |
+
result['loss'] = self.calculate_loss(result, batch, pos_weight)
|
1045 |
+
|
1046 |
+
return result
|
1047 |
+
|
1048 |
+
def calculate_loss(self, preds, batch, pos_weight):
|
1049 |
+
return basic_calculate_loss(preds, batch, pos_weight, self.loss_type)
|
1050 |
+
|
1051 |
+
def get_optimized_parameters(self, lr: float):
|
1052 |
+
return self.parameters()
|
1053 |
+
|
1054 |
+
def save(self):
|
1055 |
+
return self.state_dict()
|
1056 |
+
|
1057 |
+
def load(self, state_dict):
|
1058 |
+
if 'head.weight' in state_dict and 'head.bias' in state_dict and state_dict['head.weight'].shape[0] == (self.n_tags + 9):
|
1059 |
+
# Support old models which included 3 rating and 6 score dimensions
|
1060 |
+
state_dict['head.weight'] = state_dict['head.weight'][:self.n_tags]
|
1061 |
+
state_dict['head.bias'] = state_dict['head.bias'][:self.n_tags]
|
1062 |
+
|
1063 |
+
self.load_state_dict(state_dict)
|
1064 |
+
|
1065 |
+
|
1066 |
+
from PIL import Image
|
1067 |
+
import torchvision.transforms.functional as TVF
|
1068 |
+
from torchvision import transforms
|
1069 |
+
|
1070 |
+
def prepare_image(image: Image.Image, target_size: int) -> torch.Tensor:
|
1071 |
+
# Pad image to square
|
1072 |
+
image_shape = image.size
|
1073 |
+
max_dim = max(image_shape)
|
1074 |
+
pad_left = (max_dim - image_shape[0]) // 2
|
1075 |
+
pad_top = (max_dim - image_shape[1]) // 2
|
1076 |
+
|
1077 |
+
padded_image = Image.new('RGB', (max_dim, max_dim), (255, 255, 255))
|
1078 |
+
padded_image.paste(image, (pad_left, pad_top))
|
1079 |
+
|
1080 |
+
# Resize image
|
1081 |
+
if max_dim != target_size:
|
1082 |
+
padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
|
1083 |
+
|
1084 |
+
# Convert to tensor
|
1085 |
+
image_tensor = TVF.pil_to_tensor(padded_image) / 255.0
|
1086 |
+
|
1087 |
+
# Normalize
|
1088 |
+
image_tensor = TVF.normalize(image_tensor, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
|
1089 |
+
|
1090 |
+
return image_tensor
|
1091 |
+
|
1092 |
+
|
1093 |
+
@torch.no_grad()
|
1094 |
+
def predict(image: Image.Image, model, top_tags, THRESHOLD=0.4):
|
1095 |
+
image = transforms.ToPILImage()(image.permute(2, 0, 1))
|
1096 |
+
image_tensor = prepare_image(image, model.image_size)
|
1097 |
+
batch = {
|
1098 |
+
'image': image_tensor.unsqueeze(0).to('cuda'),
|
1099 |
+
}
|
1100 |
+
|
1101 |
+
with torch.amp.autocast_mode.autocast('cuda', enabled=True):
|
1102 |
+
preds = model(batch)
|
1103 |
+
tag_preds = preds['tags'].sigmoid().cpu()
|
1104 |
+
|
1105 |
+
scores = {top_tags[i]: tag_preds[0][i] for i in range(len(top_tags))}
|
1106 |
+
predicted_tags = [tag for tag, score in scores.items() if score > THRESHOLD]
|
1107 |
+
tag_string = ', '.join(predicted_tags)
|
1108 |
+
|
1109 |
+
return tag_string, scores
|
custom_nodes/ComfyUI-N-Nodes/libs/migrate.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
|
4 |
+
def migrate_workflow(input_file_path):
|
5 |
+
try:
|
6 |
+
file_name, file_extension = os.path.splitext(input_file_path)
|
7 |
+
|
8 |
+
output_file_path = f"{file_name}_migrated.json"
|
9 |
+
|
10 |
+
pre_list = ('LoadVideo', 'SaveVideo','FrameInterpolator', 'LoadFramesFromFolder','SetMetadataForSaveVideo','GPT Loader Simple','GPTSampler','String Variable','Integer Variable','Float Variable','DynamicPrompt')
|
11 |
+
post_list= ('LoadVideo [n-suite]', 'SaveVideo [n-suite]','FrameInterpolator [n-suite]', 'LoadFramesFromFolder [n-suite]','SetMetadataForSaveVideo [n-suite]','GPT Loader Simple [n-suite]','GPT Sampler [n-suite]','String Variable [n-suite]','Integer Variable [n-suite]','Float Variable [n-suite]','DynamicPrompt [n-suite]')
|
12 |
+
replacements = list(zip(pre_list, post_list))
|
13 |
+
|
14 |
+
with open(input_file_path, 'r') as input_file:
|
15 |
+
content = input_file.read()
|
16 |
+
|
17 |
+
# s&r
|
18 |
+
for old, new in replacements:
|
19 |
+
content = content.replace(f'"Node name for S&R": "{old}"', f'"Node name for S&R": "{new}"')
|
20 |
+
#type
|
21 |
+
for old, new in replacements:
|
22 |
+
content = content.replace(f'"type": "{old}"', f'"type": "{new}"')
|
23 |
+
|
24 |
+
with open(output_file_path, 'w') as output_file:
|
25 |
+
output_file.write(content)
|
26 |
+
|
27 |
+
print("Replacement completed successfully.")
|
28 |
+
except Exception as e:
|
29 |
+
print(f"An error occurred: {str(e)}")
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
print(len(sys.argv))
|
33 |
+
if len(sys.argv) != 2:
|
34 |
+
print("Error: Provide the path of the text file to migrate.")
|
35 |
+
sys.exit(1)
|
36 |
+
|
37 |
+
file_path = sys.argv[1]
|
38 |
+
|
39 |
+
if not os.path.isfile(file_path):
|
40 |
+
print(f"Error: The file {file_path} does not exist.")
|
41 |
+
sys.exit(1)
|
42 |
+
|
43 |
+
migrate_workflow(file_path)
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.venv
|
2 |
+
__pycache__
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## commit 38af98596e59f2a6c25c6b52b2bd5a672dab4144
|
2 |
+
|
3 |
+
# 🌔 moondream
|
4 |
+
|
5 |
+
a tiny vision language model that kicks ass and runs anywhere
|
6 |
+
|
7 |
+
## moondream1
|
8 |
+
|
9 |
+
1.6B parameter model built using SigLIP, Phi-1.5 and the LLaVA training dataset.
|
10 |
+
Weights are licensed under CC-BY-SA due to using the LLaVA dataset. Try it out
|
11 |
+
on [Huggingface Spaces](https://huggingface.co/spaces/vikhyatk/moondream1)!
|
12 |
+
|
13 |
+
**Benchmarks**
|
14 |
+
|
15 |
+
| Model | Parameters | VQAv2 | GQA | VizWiz | TextVQA |
|
16 |
+
| --- | --- | --- | --- | --- | --- |
|
17 |
+
| LLaVA-1.5 | 13.3B | 80.0 | 63.3 | 53.6 | 61.3 |
|
18 |
+
| LLaVA-1.5 | 7.3B | 78.5 | 62.0 | 50.0 | 58.2 |
|
19 |
+
| [MC-LLaVA-3B](https://huggingface.co/visheratin/MC-LLaVA-3b) | 3B | 64.2 | 49.6 | 24.9 | 38.6 |
|
20 |
+
| [LLaVA-Phi](https://arxiv.org/pdf/2401.02330.pdf) | 3B | 71.4 | - | 35.9 | 48.6 |
|
21 |
+
| **moondream1** | 1.6B | 74.3 | 56.3 | 30.3 | 39.8 |
|
22 |
+
|
23 |
+
**Examples**
|
24 |
+
|
25 |
+
| Image | Examples |
|
26 |
+
| --- | --- |
|
27 |
+
|  | **What is the title of this book?**<br>The Little Book of Deep Learning<br><br>**What can you tell me about this book?**<br>The book appears to be a white booklet titled "The Little Book of Deep Learning." It is held in a person's hand, and it seems to be a personal possession. The book's content focuses on the basics of deep learning, which is a field of artificial intelligence that uses neural networks to process and analyze data. It is likely that the book provides an introduction to the concepts and techniques involved in deep learning, making it accessible for beginners and helping them understand the fundamentals of this advanced machine learning approach.<br><br>**Where is the person standing?**<br>The person is standing on a balcony or a deck, which is located outside the house. |
|
28 |
+
|  | **What type of food is the girl holding?**<br>The girl is holding a large hamburger or burger, which is a type of sandwich made from ground meat, typically consisting of a beef patty, and is usually served between two slices of bread.<br><br>**What color is the woman's hair?**<br>The woman's hair is white.<br><br>**What is the girl doing in the image?**<br>The girl in the image is eating a hamburger. |
|
29 |
+
|  | **What color is the train?**<br>The train is red.<br><br>**What is the bus doing in the image?**<br>The bus is driving down a street, passing through an intersection, and traveling on a train track.<br><br>**What kind of public transportation is in the image?**<br>The image features a red trolley or commuter train on a city street, which is a form of public transportation. |
|
30 |
+
|  | **What is the girl looking at?**<br>The girl is looking at her reflection in the mirror while adjusting her uniform.<br><br>**What color is the woman's hair?**<br>The woman's hair is brown.<br><br>**What is the woman doing in the mirror?**<br>The woman is adjusting her hair in the mirror. |
|
31 |
+
|  | **What might be the reason for the dog's aggressive behavior?**<br>The dog's aggressive behavior, with its teeth bared and growling, could be due to several reasons. It is possible that the dog is feeling threatened, scared, or defensive in its current environment, such as a room with a person it doesn't know well or a situation that provokes it. Alternatively, the dog might be reacting to a perceived threat or discomfort from the person holding it. It is essential to assess the situation and the dog's body language to determine the exact cause of its aggressive behavior and respond accordingly to ensure the safety and well-being of both the dog and the person involved.<br><br>**What kind of dog is in the picture?**<br>The picture features a small dog, possibly a Chihuahua, with red eyes and a mean, hungry-looking expression.<br><br>**What color is the dog?**<br>The dog is black and white. |
|
32 |
+
|
33 |
+
**Usage**
|
34 |
+
|
35 |
+
Clone this repository and install the dependencies:
|
36 |
+
|
37 |
+
```bash
|
38 |
+
pip install -r requirements.txt
|
39 |
+
```
|
40 |
+
|
41 |
+
Use the `sample.py` script to run the model on CPU:
|
42 |
+
|
43 |
+
```bash
|
44 |
+
python sample.py --image [IMAGE_PATH] --prompt [PROMPT]
|
45 |
+
```
|
46 |
+
|
47 |
+
When the `--prompt` argument is not provided, the script will allow you to ask
|
48 |
+
questions interactively.
|
49 |
+
|
50 |
+
**Gradio demo**
|
51 |
+
|
52 |
+
Use the `gradio_demo.py` script to run the gradio app:
|
53 |
+
|
54 |
+
```python
|
55 |
+
python gradio_demo.py
|
56 |
+
```
|
57 |
+
|
58 |
+
|
59 |
+
**Limitations**
|
60 |
+
|
61 |
+
* The model may generate inaccurate statements.
|
62 |
+
* It may struggle to adhere to intricate or nuanced instructions.
|
63 |
+
* It is primarily designed to understand English. Informal English, slang, and
|
64 |
+
non-English languages may not work well.
|
65 |
+
* The model may not be free from societal biases. Users should be aware of this
|
66 |
+
and exercise caution and critical thinking when using the model.
|
67 |
+
* The model may generate offensive, inappropriate, or hurtful content if it is
|
68 |
+
prompted to do so.
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/_sample.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
from PIL import Image
|
3 |
+
from moondream import VisionEncoder, TextModel
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
|
6 |
+
def main(image_path, prompt):
|
7 |
+
model_path = snapshot_download("vikhyatk/moondream1")
|
8 |
+
vision_encoder = VisionEncoder(model_path)
|
9 |
+
text_model = TextModel(model_path)
|
10 |
+
image = Image.open(image_path)
|
11 |
+
image_embeds = vision_encoder(image)
|
12 |
+
|
13 |
+
if prompt is None:
|
14 |
+
while True:
|
15 |
+
question = input("> ")
|
16 |
+
print(text_model.answer_question(image_embeds, question))
|
17 |
+
else:
|
18 |
+
print(">", prompt)
|
19 |
+
print(text_model.answer_question(image_embeds, prompt))
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
parser = argparse.ArgumentParser()
|
23 |
+
parser.add_argument("--image", type=str, required=True)
|
24 |
+
parser.add_argument("--prompt", type=str, required=False)
|
25 |
+
args = parser.parse_args()
|
26 |
+
main(args.image, args.prompt)
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg
ADDED
![]() |
Git LFS Details
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg
ADDED
![]() |
Git LFS Details
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-3.jpg
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-4.jpg
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-5.jpg
ADDED
![]() |
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/gradio_demo.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import gradio as gr
|
3 |
+
from moondream import VisionEncoder, TextModel
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
from threading import Thread
|
6 |
+
from transformers import TextIteratorStreamer
|
7 |
+
|
8 |
+
model_path = snapshot_download("vikhyatk/moondream1")
|
9 |
+
vision_encoder = VisionEncoder(model_path)
|
10 |
+
text_model = TextModel(model_path)
|
11 |
+
|
12 |
+
def moondream(img, prompt):
|
13 |
+
image_embeds = vision_encoder(img)
|
14 |
+
streamer = TextIteratorStreamer(text_model.tokenizer, skip_special_tokens=True)
|
15 |
+
thread = Thread(target=text_model.answer_question, kwargs={
|
16 |
+
"image_embeds": image_embeds, "question": prompt, "streamer": streamer})
|
17 |
+
thread.start()
|
18 |
+
|
19 |
+
buffer = ""
|
20 |
+
for new_text in streamer:
|
21 |
+
clean_text = re.sub("<$|END$", "", new_text)
|
22 |
+
buffer += clean_text
|
23 |
+
yield buffer.strip("<END")
|
24 |
+
|
25 |
+
with gr.Blocks() as demo:
|
26 |
+
gr.Markdown("# 🌔 moondream \n ### A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)")
|
27 |
+
with gr.Row():
|
28 |
+
prompt = gr.Textbox(label='Input Prompt', placeholder='Type here...', scale=4)
|
29 |
+
submit = gr.Button('Submit')
|
30 |
+
with gr.Row():
|
31 |
+
img = gr.Image(type='pil', label='Upload an Image')
|
32 |
+
output = gr.TextArea(label="Response", info='Please wait for a few seconds..')
|
33 |
+
submit.click(moondream, [img, prompt], output)
|
34 |
+
prompt.submit(moondream, [img, prompt], output)
|
35 |
+
|
36 |
+
demo.queue().launch(debug=True)
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
---
|
5 |
+
|
6 |
+
Check out the [moondream repository on GitHub](https://github.com/vikhyat/moondream) for inference code and other details.
|
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/__init__.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
from .vision_encoder import VisionEncoder
|
2 |
+
from .text_model import TextModel
|