yijin928 commited on
Commit
1b80e0f
·
verified ·
1 Parent(s): 05ae9dc

Upload 140 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -0
  2. custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/bug_report.md +37 -0
  3. custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  4. custom_nodes/ComfyUI-N-Nodes/.github/workflows/publish.yml +21 -0
  5. custom_nodes/ComfyUI-N-Nodes/.gitignore +322 -0
  6. custom_nodes/ComfyUI-N-Nodes/LICENSE +21 -0
  7. custom_nodes/ComfyUI-N-Nodes/README.md +369 -0
  8. custom_nodes/ComfyUI-N-Nodes/__init__.py +74 -0
  9. custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-311.pyc +0 -0
  10. custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-312.pyc +0 -0
  11. custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-311.pyc +0 -0
  12. custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-312.pyc +0 -0
  13. custom_nodes/ComfyUI-N-Nodes/config.json +4 -0
  14. custom_nodes/ComfyUI-N-Nodes/img/image-1.png +0 -0
  15. custom_nodes/ComfyUI-N-Nodes/img/image-10.png +0 -0
  16. custom_nodes/ComfyUI-N-Nodes/img/image-13.png +0 -0
  17. custom_nodes/ComfyUI-N-Nodes/img/image-14.png +0 -0
  18. custom_nodes/ComfyUI-N-Nodes/img/image-3.png +0 -0
  19. custom_nodes/ComfyUI-N-Nodes/img/image-4.png +0 -0
  20. custom_nodes/ComfyUI-N-Nodes/img/image-5.png +3 -0
  21. custom_nodes/ComfyUI-N-Nodes/img/image-6.png +3 -0
  22. custom_nodes/ComfyUI-N-Nodes/img/image-7.png +3 -0
  23. custom_nodes/ComfyUI-N-Nodes/img/image-8.png +0 -0
  24. custom_nodes/ComfyUI-N-Nodes/img/image-9.png +0 -0
  25. custom_nodes/ComfyUI-N-Nodes/img/image.png +0 -0
  26. custom_nodes/ComfyUI-N-Nodes/img/image11.png +0 -0
  27. custom_nodes/ComfyUI-N-Nodes/install_extra.bat +15 -0
  28. custom_nodes/ComfyUI-N-Nodes/js/cte_advanced.js +213 -0
  29. custom_nodes/ComfyUI-N-Nodes/js/dynamicPrompt.js +44 -0
  30. custom_nodes/ComfyUI-N-Nodes/js/extended_widgets.js +329 -0
  31. custom_nodes/ComfyUI-N-Nodes/js/gptSampler.js +43 -0
  32. custom_nodes/ComfyUI-N-Nodes/js/include_css.js +22 -0
  33. custom_nodes/ComfyUI-N-Nodes/js/styles.css +20 -0
  34. custom_nodes/ComfyUI-N-Nodes/js/videoLoadAdvanced.js +142 -0
  35. custom_nodes/ComfyUI-N-Nodes/js/videoSave.js +87 -0
  36. custom_nodes/ComfyUI-N-Nodes/libs/__pycache__/joytag_models.cpython-311.pyc +0 -0
  37. custom_nodes/ComfyUI-N-Nodes/libs/joytag_models.py +1109 -0
  38. custom_nodes/ComfyUI-N-Nodes/libs/migrate.py +43 -0
  39. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/.gitignore +2 -0
  40. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/README.md +68 -0
  41. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/_sample.py +26 -0
  42. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg +3 -0
  43. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg +3 -0
  44. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-3.jpg +0 -0
  45. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-4.jpg +0 -0
  46. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-5.jpg +0 -0
  47. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/gradio_demo.py +36 -0
  48. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/.gitattributes +35 -0
  49. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/README.md +6 -0
  50. custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/__init__.py +2 -0
.gitattributes CHANGED
@@ -1,2 +1,18 @@
1
  /web/assets/** linguist-generated
2
  /web/** linguist-vendored
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  /web/assets/** linguist-generated
2
  /web/** linguist-vendored
3
+ custom_nodes/ComfyUI-N-Nodes/img/image-5.png filter=lfs diff=lfs merge=lfs -text
4
+ custom_nodes/ComfyUI-N-Nodes/img/image-6.png filter=lfs diff=lfs merge=lfs -text
5
+ custom_nodes/ComfyUI-N-Nodes/img/image-7.png filter=lfs diff=lfs merge=lfs -text
6
+ custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg filter=lfs diff=lfs merge=lfs -text
7
+ custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg filter=lfs diff=lfs merge=lfs -text
8
+ custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/assets/demo-1.jpg filter=lfs diff=lfs merge=lfs -text
9
+ custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/assets/demo-2.jpg filter=lfs diff=lfs merge=lfs -text
10
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_0.png filter=lfs diff=lfs merge=lfs -text
11
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_1.png filter=lfs diff=lfs merge=lfs -text
12
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I0_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text
13
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/i0.png filter=lfs diff=lfs merge=lfs -text
14
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/i1.png filter=lfs diff=lfs merge=lfs -text
15
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_0.png filter=lfs diff=lfs merge=lfs -text
16
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_1.png filter=lfs diff=lfs merge=lfs -text
17
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/demo/I2_slomo_clipped.gif filter=lfs diff=lfs merge=lfs -text
18
+ custom_nodes/ComfyUI-N-Nodes/libs/rifle/train_log/flownet.pkl filter=lfs diff=lfs merge=lfs -text
custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: "[BUG]"
5
+ labels: bug
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Full log**
24
+ This is MANDATORY. By log I mean all the text in the console from the time ComfyUI was started until the time of the reported bug.
25
+ >>Bug reports that do not have this log will be closed.<<
26
+
27
+ **Screenshots**
28
+ If applicable, add screenshots to help explain your problem.
29
+
30
+ **Desktop (please complete the following information):**
31
+ - OS: [e.g. iOS]
32
+ - Browser [e.g. chrome, safari]
33
+ - Version [e.g. 22]
34
+
35
+
36
+ **Additional context**
37
+ Add any other context about the problem here.
custom_nodes/ComfyUI-N-Nodes/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: enhancement
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
custom_nodes/ComfyUI-N-Nodes/.github/workflows/publish.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish to Comfy registry
2
+ on:
3
+ workflow_dispatch:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - "pyproject.toml"
9
+
10
+ jobs:
11
+ publish-node:
12
+ name: Publish Custom Node to registry
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Check out code
16
+ uses: actions/checkout@v4
17
+ - name: Publish Custom Node
18
+ uses: Comfy-Org/publish-node-action@main
19
+ with:
20
+ ## Add your own personal access token to your Github Repository secrets and reference it here.
21
+ personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
custom_nodes/ComfyUI-N-Nodes/.gitignore ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ libs/moondream_repo
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ # Byte-compiled / optimized / DLL files
163
+ __pycache__/
164
+ *.py[cod]
165
+ *$py.class
166
+
167
+ # C extensions
168
+ *.so
169
+
170
+ # Distribution / packaging
171
+ .Python
172
+ build/
173
+ develop-eggs/
174
+ dist/
175
+ downloads/
176
+ eggs/
177
+ .eggs/
178
+ lib/
179
+ lib64/
180
+ parts/
181
+ sdist/
182
+ var/
183
+ wheels/
184
+ share/python-wheels/
185
+ *.egg-info/
186
+ .installed.cfg
187
+ *.egg
188
+ MANIFEST
189
+
190
+ # PyInstaller
191
+ # Usually these files are written by a python script from a template
192
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
193
+ *.manifest
194
+ *.spec
195
+
196
+ # Installer logs
197
+ pip-log.txt
198
+ pip-delete-this-directory.txt
199
+
200
+ # Unit test / coverage reports
201
+ htmlcov/
202
+ .tox/
203
+ .nox/
204
+ .coverage
205
+ .coverage.*
206
+ .cache
207
+ nosetests.xml
208
+ coverage.xml
209
+ *.cover
210
+ *.py,cover
211
+ .hypothesis/
212
+ .pytest_cache/
213
+ cover/
214
+
215
+ # Translations
216
+ *.mo
217
+ *.pot
218
+
219
+ # Django stuff:
220
+ *.log
221
+ local_settings.py
222
+ db.sqlite3
223
+ db.sqlite3-journal
224
+
225
+ # Flask stuff:
226
+ instance/
227
+ .webassets-cache
228
+
229
+ # Scrapy stuff:
230
+ .scrapy
231
+
232
+ # Sphinx documentation
233
+ docs/_build/
234
+
235
+ # PyBuilder
236
+ .pybuilder/
237
+ target/
238
+
239
+ # Jupyter Notebook
240
+ .ipynb_checkpoints
241
+
242
+ # IPython
243
+ profile_default/
244
+ ipython_config.py
245
+
246
+ # pyenv
247
+ # For a library or package, you might want to ignore these files since the code is
248
+ # intended to run in multiple environments; otherwise, check them in:
249
+ # .python-version
250
+
251
+ # pipenv
252
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
253
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
254
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
255
+ # install all needed dependencies.
256
+ #Pipfile.lock
257
+
258
+ # poetry
259
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
260
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
261
+ # commonly ignored for libraries.
262
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
263
+ #poetry.lock
264
+
265
+ # pdm
266
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
267
+ #pdm.lock
268
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
269
+ # in version control.
270
+ # https://pdm.fming.dev/#use-with-ide
271
+ .pdm.toml
272
+
273
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
274
+ __pypackages__/
275
+
276
+ # Celery stuff
277
+ celerybeat-schedule
278
+ celerybeat.pid
279
+
280
+ # SageMath parsed files
281
+ *.sage.py
282
+
283
+ # Environments
284
+ .env
285
+ .venv
286
+ env/
287
+ venv/
288
+ ENV/
289
+ env.bak/
290
+ venv.bak/
291
+
292
+ # Spyder project settings
293
+ .spyderproject
294
+ .spyproject
295
+
296
+ # Rope project settings
297
+ .ropeproject
298
+
299
+ # mkdocs documentation
300
+ /site
301
+
302
+ # mypy
303
+ .mypy_cache/
304
+ .dmypy.json
305
+ dmypy.json
306
+
307
+ # Pyre type checker
308
+ .pyre/
309
+
310
+ # pytype static type analyzer
311
+ .pytype/
312
+
313
+ # Cython debug symbols
314
+ cython_debug/
315
+
316
+ # PyCharm
317
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
318
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
319
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
320
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
321
+ #.idea/
322
+ *.pyc
custom_nodes/ComfyUI-N-Nodes/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 pythongosssss
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
custom_nodes/ComfyUI-N-Nodes/README.md ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/C0C0AJECJ)
2
+
3
+ # ComfyUI-N-Suite
4
+ A suite of custom nodes for ComfyUI that includes Integer, string and float variable nodes, GPT nodes and video nodes.
5
+
6
+ > [!IMPORTANT]
7
+ > These nodes were tested primarily in Windows in the default environment provided by ComfyUI and in the environment created by the [notebook](https://github.com/comfyanonymous/ComfyUI/blob/master/notebooks/comfyui_colab.ipynb) for paperspace specifically with the cyberes/gradient-base-py3.10:latest docker image.
8
+ **Any other environment has not been tested.**
9
+
10
+
11
+ # Installation
12
+
13
+ 1. Clone the repository:
14
+ `git clone https://github.com/Nuked88/ComfyUI-N-Nodes.git`
15
+ to your ComfyUI `custom_nodes` directory
16
+
17
+ 2. ~~IMPORTANT: If you want the GPT nodes on GPU you'll need to run **install_dependency bat files**.
18
+ There are 2 versions: ***install_dependency_ggml_models.bat*** for the old ggmlv3 models and ***install_dependency_gguf_models.bat*** for all the new models (GGUF).
19
+ YOU CAN ONLY USE ONE OF THEM AT A TIME!
20
+ Since _llama-cpp-python_ needs to be compiled from source code to enable it to use the GPU, you will first need to have [CUDA](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64) and visual studio 2019 or 2022 (in the case of my bat) installed to compile it. For details and the full guide you can go [HERE](https://github.com/abetlen/llama-cpp-python).~~
21
+
22
+ 3. If you intend to use GPTLoaderSimple with the Moondream model, you'll need to execute the 'install_extra.bat' script, which will install transformers version 4.36.2.
23
+ 4. Reboot ComfyUI
24
+
25
+ In case you need to revert these changes (due to incompatibility with other nodes), you can utilize the 'remove_extra.bat' script.
26
+
27
+ ComfyUI will automatically load all custom scripts and nodes at startup.
28
+
29
+ > [!NOTE]
30
+ > The llama-cpp-python installation will be done automatically by the script. If you have an NVIDIA GPU NO MORE CUDA BUILD IS NECESSARY thanks to [jllllll](https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/) repo. I've also dropped the support to GGMLv3 models since all notable models should have switched to the latest version of GGUF by now.
31
+
32
+
33
+ > [!NOTE]
34
+ > Since 14/02/2024, the node has undergone a massive rewrite, which also led to the change of all node names in order to avoid any conflicts with other extensions in the future (or at least I hope so). Consequently, the old workflows are no longer compatible and will require manual replacement of each node.
35
+ > To avoid this, I have created a tool that allows for automatic replacement.
36
+ > On Windows, simply drag any *.json workflow onto the migrate.bat file located in (custom_nodes/ComfyUI-N-Nodes), and another workflow with the suffix _migrated will be created in the same folder as the current workflow.
37
+ > On Linux, you can use the script in the following way: python libs/migrate.py path/to/original/workflow/.
38
+ > For security reasons, the original workflow will not be deleted."
39
+ > For install the last version of this repository before this changes from the Comfyui-N-Suite execute **git checkout 29b2e43baba81ee556b2930b0ca0a9c978c47083**
40
+
41
+
42
+ - For uninstallation:
43
+ - Delete the `ComfyUI-N-Nodes` folder in `custom_nodes`
44
+ - Delete the `comfyui-n-nodes` folder in `ComfyUI\web\extensions`
45
+ - Delete the `n-styles.csv` and `n-styles.csv.backup` file in `ComfyUI\styles`
46
+ - Delete the `GPTcheckpoints` folder in `ComfyUI\models`
47
+
48
+
49
+
50
+
51
+
52
+ # Update
53
+ 1. Navigate to the cloned repo e.g. `custom_nodes/ComfyUI-N-Nodes`
54
+ 2. `git pull`
55
+
56
+ # Features
57
+
58
+ ## 📽️ Video Nodes 📽️
59
+
60
+ ### LoadVideo
61
+
62
+ ![alt text](./img/image-13.png)
63
+
64
+ The LoadVideoAdvanced node allows loading a video file and extracting frames from it.
65
+ The name has been changed from `LoadVideo` to `LoadVideoAdvanced` in order to avoid conflicts with the `LoadVideo` animatediff node.
66
+
67
+
68
+ #### Input Fields
69
+ - `video`: Select the video file to load.
70
+ - `framerate`: Choose whether to keep the original framerate or reduce to half or quarter speed.
71
+ - `resize_by`: Select how to resize frames - 'none', 'height', or 'width'.
72
+ - `size`: Target size if resizing by height or width.
73
+ - `images_limit`: Limit number of frames to extract.
74
+ - `batch_size`: Batch size for encoding frames.
75
+ - `starting_frame`: Select which frame to start from.
76
+ - `autoplay`: Select whether to autoplay the video.
77
+ - `use_ram`: Use RAM instead of disk for decompressing video frames.
78
+
79
+ #### Output
80
+
81
+ - `IMAGES`: Extracted frame images as PyTorch tensors.
82
+ - `LATENT`: Empty latent vectors.
83
+ - `METADATA`: Video metadata - FPS and number of frames.
84
+ - `WIDTH:` Frame width.
85
+ - `HEIGHT`: Frame height.
86
+ - `META_FPS`: Frame rate.
87
+ - `META_N_FRAMES`: Number of frames.
88
+
89
+
90
+ The node extracts frames from the input video at the specified framerate. It resizes frames if chosen and returns them as batches of PyTorch image tensors along with latent vectors, metadata, and frame dimensions.
91
+
92
+ ### SaveVideo
93
+ The SaveVideo node takes in extracted frames and saves them back as a video file.
94
+ ![alt text](./img/image-3.png)
95
+
96
+ #### Input Fields
97
+ - `images`: Frame images as tensors.
98
+ - `METADATA`: Metadata from LoadVideo node.
99
+ - `SaveVideo`: Toggle saving output video file.
100
+ - `SaveFrames`: Toggle saving frames to a folder.
101
+ - `CompressionLevel`: PNG compression level for saving frames.
102
+ #### Output
103
+ Saves output video file and/or extracted frames.
104
+
105
+ The node takes extracted frames and metadata and can save them as a new video file and/or individual frame images. Video compression and frame PNG compression can be configured.
106
+ NOTE: If you are using **LoadVideo** as source of the frames, the audio of the original file will be maintained but only in case **images_limit** and **starting_frame** are equal to Zero.
107
+
108
+ ### LoadFramesFromFolder
109
+ ![alt text](./img/image.png)
110
+
111
+ The LoadFramesFromFolder node allows loading image frames from a folder and returning them as a batch.
112
+
113
+
114
+ #### Input Fields
115
+ - `folder`: Path to the folder containing the frame images.Must be png format, named with a number (eg. 1.png or even 0001.png).The images will be loaded sequentially.
116
+ - `fps`: Frames per second to assign to the loaded frames.
117
+
118
+ #### Output
119
+ - `IMAGES`: Batch of loaded frame images as PyTorch tensors.
120
+ - `METADATA`: Metadata containing the set FPS value.
121
+ - `MAX_WIDTH`: Maximum frame width.
122
+ - `MAX_HEIGHT`: Maximum frame height.
123
+ - `FRAME COUNT`: Number of frames in the folder.
124
+ - `PATH`: Path to the folder containing the frame images.
125
+ - `IMAGE LIST`: List of frame images in the folder (not a real list just a string divided by \n).
126
+
127
+ The node loads all image files from the specified folder, converts them to PyTorch tensors, and returns them as a batched tensor along with simple metadata containing the set FPS value.
128
+
129
+ This allows easily loading a set of frames that were extracted and saved previously, for example, to reload and process them again. By setting the FPS value, the frames can be properly interpreted as a video sequence.
130
+
131
+ ### SetMetadataForSaveVideo
132
+
133
+ ![alt text](./img/image-1.png)
134
+
135
+ The SetMetadataForSaveVideo node allows setting metadata for the SaveVideo node.
136
+
137
+ ### FrameInterpolator
138
+
139
+ ![alt text](./img/image-4.png)
140
+
141
+ The FrameInterpolator node allows interpolating between extracted video frames to increase the frame rate and smooth motion.
142
+
143
+
144
+ #### Input Fields
145
+
146
+ - `images`: Extracted frame images as tensors.
147
+ - `METADATA`: Metadata from video - FPS and number of frames.
148
+ - `multiplier`: Factor by which to increase frame rate.
149
+
150
+ #### Output
151
+
152
+ - `IMAGES`: Interpolated frames as image tensors.
153
+ - `METADATA`: Updated metadata with new frame rate.
154
+
155
+ The node takes extracted frames and metadata as input. It uses an interpolation model (RIFE) to generate additional in-between frames at a higher frame rate.
156
+
157
+ The original frame rate in the metadata is multiplied by the `multiplier` value to get the new interpolated frame rate.
158
+
159
+ The interpolated frames are returned as a batch of image tensors, along with updated metadata containing the new frame rate.
160
+
161
+ This allows increasing the frame rate of an existing video to achieve smoother motion and slower playback. The interpolation model creates new realistic frames to fill in the gaps rather than just duplicating existing frames.
162
+
163
+ The original code has been taken from [HERE](https://github.com/hzwer/Practical-RIFE/tree/main)
164
+
165
+ ## Variables
166
+ Since the primitive node has limitations in links (for example at the time i'm writing you cannot link "start_at_step" and "steps" of another ksampler toghether), I decided to create these simple node-variables to bypass this limitation
167
+ The node-variables are:
168
+ - Integer
169
+ - Float
170
+ - String
171
+
172
+
173
+ ## 🤖 GPTLoaderSimple and GPTSampler 🤖
174
+
175
+ These custom nodes are designed to enhance the capabilities of the ConfyUI framework by enabling text generation using GGUF GPT models. This README provides an overview of the two custom nodes and their usage within ConfyUI.
176
+
177
+ You can add in the _extra_model_paths.yaml_ the path where your model GGUF are in this way (example):
178
+
179
+ `other_ui:
180
+ base_path: I:\\text-generation-webui
181
+ GPTcheckpoints: models/`
182
+
183
+ Otherwise it will create a GPTcheckpoints folder in the model folder of ComfyUI where you can place your .gguf models.
184
+
185
+ Two folders have also been created within the 'Llava' directory in the 'GPTcheckpoints' folder for the LLava model:
186
+
187
+ `clips`: This folder is designated for storing the clips for your LLava models (usually, files that start with **mm** in the repository).
188
+ `models`: This folder is designated for storing the LLava models.
189
+
190
+ This nodes actually supports 4 different models:
191
+ - All the GGUF supported by [llama.cpp](https://github.com/ggerganov/llama.cpp)
192
+ - Llava
193
+ - Moondream
194
+ - Joytag
195
+
196
+
197
+ #### GGUF LLM
198
+
199
+ The GGUF models can be downloaded from the [Huggingface Hub](https://huggingface.co/models?search=gguf)
200
+
201
+ [HERE](https://www.youtube.com/watch?v=gzTqXbF0S-w) a video of an example of how to use the GGUF models by [boricuapab](https://github.com/boricuapab)
202
+
203
+
204
+ #### Llava
205
+ Here a small list of the models supported by this nodes:
206
+
207
+ [LlaVa 1.5 7B](https://huggingface.co/mys/ggml_llava-v1.5-7b/)
208
+ [LlaVa 1.5 13B](https://huggingface.co/mys/ggml_llava-v1.5-13b)
209
+ [LlaVa 1.6 Mistral 7B](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/)
210
+ [BakLLaVa](https://huggingface.co/mys/ggml_bakllava-1)
211
+ [Nous Hermes 2 Vision](https://huggingface.co/billborkowski/llava-NousResearch_Nous-Hermes-2-Vision-GGUF)
212
+
213
+ ####Example with Llava model:
214
+ ![alt text](./img/image-5.png)
215
+
216
+ #### Moondream
217
+ The model will be automatically downloaded when you run the first time.
218
+ Anyway, it is available [HERE](https://huggingface.co/vikhyatk/moondream1/tree/main)
219
+ The code taken from [this repository](https://github.com/vikhyat/moondream)
220
+
221
+ ####Example with Moondream model:
222
+ ![alt text](./img/image-6.png)
223
+
224
+ #### Joytag
225
+ The model will be automatically downloaded when you run the first time.
226
+ Anyway, it is available [HERE](https://huggingface.co/fancyfeast/joytag/tree/main)
227
+ The code taken from [this repository](https://github.com/fpgaminer/joytag)
228
+
229
+ ####Example with Joytag model:
230
+ ![alt text](./img/image-7.png)
231
+
232
+ ### GPTLoaderSimple
233
+
234
+ ![alt text](./img/image11.png)
235
+
236
+ The `GPTLoaderSimple` node is responsible for loading GPT model checkpoints and creating an instance of the Llama library for text generation. It provides an interface to configure GPU layers, the number of threads, and maximum context for text generation.
237
+
238
+
239
+
240
+ #### Input Fields
241
+
242
+ - `ckpt_name`: Select the GPT checkpoint name from the available options (joytag and moondream will be automatically downloaded used the first time).
243
+ - `gpu_layers`: Specify the number of GPU layers to use (default: 27).
244
+ - `n_threads`: Specify the number of threads for text generation (default: 8).
245
+ - `max_ctx`: Specify the maximum context length for text generation (default: 2048).
246
+
247
+ #### Output
248
+
249
+ The node returns an instance of the Llama library (MODEL) and the path to the loaded checkpoint (STRING).
250
+
251
+ ### GPTSampler
252
+
253
+ ![alt text](./img/image-8.png)
254
+
255
+ The `GPTSampler` node facilitates text generation using GPT models based on the input prompt and various generation parameters. It allows you to control aspects like temperature, top-p sampling, penalties, and more.
256
+
257
+
258
+ #### Input Fields
259
+
260
+ - `prompt`: Enter the input prompt for text generation.
261
+ - `image`: Image input for Joytag, moondream and llava models.
262
+ - `model`: Choose the GPT model to use for text generation.
263
+ - `max_tokens`: Set the maximum number of tokens in the generated text (default: 128).
264
+ - `temperature`: Set the temperature parameter for randomness (default: 0.7).
265
+ - `top_p`: Set the top-p probability for nucleus sampling (default: 0.5).
266
+ - `logprobs`: Specify the number of log probabilities to output (default: 0).
267
+ - `echo`: Enable or disable printing the input prompt alongside the generated text.
268
+ - `stop_token`: Specify the token at which text generation stops.
269
+ - `frequency_penalty`, `presence_penalty`, `repeat_penalty`: Control word generation penalties.
270
+ - `top_k`: Set the top-k tokens to consider during generation (default: 40).
271
+ - `tfs_z`: Set the temperature scaling factor for top frequent samples (default: 1.0).
272
+ - `print_output`: Enable or disable printing the generated text to the console.
273
+ - `cached`: Choose whether to use cached generation (default: NO).
274
+ - `prefix`, `suffix`: Specify text to prepend and append to the prompt.
275
+ - `max_tags`: This only affect the max number of tags generated by joydag.
276
+
277
+ #### Output
278
+
279
+ The node returns the generated text along with a UI-friendly representation.
280
+
281
+
282
+ ## Image Pad For Outpainting Advanced
283
+ ![alt text](./img/image-14.png)
284
+
285
+ The `ImagePadForOutpaintingAdvanced` node is an alternative to the `ImagePadForOutpainting` node that applies the technique seen in [this video](https://www.youtube.com/@robadams2451) under the outpainting mask.
286
+ The color correction part was taken from [this](https://github.com/sipherxyz/comfyui-art-venture) custom node from Sipherxyz
287
+
288
+ #### Input Fields
289
+
290
+ - `image`: Image input.
291
+ - `left`: pixel to extend from left,
292
+ - `top`: pixel to extend from top,
293
+ - `right`: pixel to extend from right,
294
+ - `bottom`: pixel to extend from bottom.
295
+ - `feathering`: feathering strength
296
+ - `noise`: blend strenght from noise and the copied border
297
+ - `pixel_size`: how big will be the pixel in the pixellated effect
298
+ - `pixel_to_copy`: how many pixels to copy (from each side)
299
+ - `temperature`: color correction setting that is only applied to the mask part.
300
+ - `hue`: color correction setting that is only applied to the mask part.
301
+ - `brightness`: color correction setting that is only applied to the mask part.
302
+ - `contrast`: color correction setting that is only applied to the mask part.
303
+ - `saturation`: color correction setting that is only applied to the mask part.
304
+ - `gamma`: color correction setting that is only applied to the mask part.
305
+
306
+ #### Output
307
+
308
+ The node returns the processed image and the mask.
309
+
310
+ ## Dynamic Prompt
311
+
312
+ ![alt text](./img/image-9.png)
313
+
314
+ The `DynamicPrompt` node generates prompts by combining a fixed prompt with a random selection of tags from a variable prompt. This enables flexible and dynamic prompt generation for various use cases.
315
+
316
+ #### Input Fields
317
+
318
+ - `variable_prompt`: Enter the variable prompt for tag selection.
319
+ - `cached`: Choose whether to cache the generated prompt (default: NO).
320
+ - `number_of_random_tag`: Choose between "Fixed" and "Random" for the number of random tags to include.
321
+ - `fixed_number_of_random_tag`: If `number_of_random_tag` if "Fixed" Specify the number of random tags to include (default: 1).
322
+ - `fixed_prompt` (Optional): Enter the fixed prompt for generating the final prompt.
323
+
324
+ #### Output
325
+
326
+ The node returns the generated prompt, which is a combination of the fixed prompt and selected random tags.
327
+
328
+ #### Example Usage
329
+
330
+ - Just fill the `variable_prompt` field with tag comma separated, the `fixed_prompt` is optional
331
+
332
+
333
+ ## CLIP Text Encode Advanced (Experimental)
334
+
335
+ ![alt text](./img/image-10.png)
336
+
337
+ The `CLIP Text Encode Advanced` node is an alternative to the standard `CLIP Text Encode` node. It offers support for Add/Replace/Delete styles, allowing for the inclusion of both positive and negative prompts within a single node.
338
+
339
+ The base style file is called `n-styles.csv` and is located in the `ComfyUI\styles` folder.
340
+ The styles file follows the same format as the current `styles.csv` file utilized in A1111 (at the time of writing).
341
+
342
+ NOTE: this note is experimental and still have alot of bugs
343
+
344
+ #### Input Fields
345
+
346
+ - `clip`: clip input
347
+ - `style`: it will automatically fill the positive and negative prompts based on the choosen style
348
+
349
+ #### Output
350
+ - `positive`: positive conditions
351
+ - `negative`: negative conditions
352
+
353
+
354
+
355
+
356
+
357
+
358
+ ## Troubleshooting
359
+
360
+ - ~~**SaveVideo - Preview not working**: is related to a conflict with animateDiff, i've already opened a [PR](https://github.com/ArtVentureX/comfyui-animatediff/pull/64) to solve this issue. Meanwhile you can download my patched version from [here](https://github.com/Nuked88/comfyui-animatediff)~~ pull has been merged so this problem should be fixed now!
361
+
362
+ ## Contributing
363
+
364
+ Feel free to contribute to this project by reporting issues or suggesting improvements. Open an issue or submit a pull request on the GitHub repository.
365
+
366
+ ## License
367
+
368
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
369
+
custom_nodes/ComfyUI-N-Nodes/__init__.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import os
3
+ import sys
4
+ from .nnodes import init, get_ext_dir,check_and_install,downloader,get_commit,color
5
+ import folder_paths
6
+ import traceback
7
+ from pathlib import Path
8
+ NODE_CLASS_MAPPINGS = {}
9
+ NODE_DISPLAY_NAME_MAPPINGS = {}
10
+
11
+
12
+ if init():
13
+ print("------------------------------------------")
14
+ print(f"{color.BLUE}### N-Suite Revision:{color.END} {color.GREEN}{get_commit()} {color.END}")
15
+ py = Path(get_ext_dir("py"))
16
+ files = list(py.glob("*.py"))
17
+ check_and_install('packaging')
18
+ check_and_install('py-cpuinfo',"cpuinfo")
19
+ check_and_install('gitpython','git')
20
+ check_and_install('moviepy')
21
+ check_and_install("opencv-python","cv2")
22
+ check_and_install('scikit-build',"skbuild")
23
+ #LLAMA DEPENTENCIES
24
+ check_and_install('typing')
25
+ check_and_install('diskcache')
26
+ check_and_install('llama_cpp')
27
+ check_and_install('timm',"timm","0.9.12",reboot=True)
28
+ #check_and_install('gitpython',"git")
29
+ #check_and_install('sentencepiece')
30
+ #check_and_install("accelerate")
31
+ #check_and_install('transformers','transformers',"4.36.2")
32
+
33
+
34
+
35
+
36
+ #git clone https://github.com/hzwer/Practical-RIFE.git
37
+ from git import Repo
38
+ if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle")):
39
+ Repo.clone_from("https://github.com/hzwer/Practical-RIFE.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle"))
40
+
41
+ if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo")):
42
+ repo = Repo.clone_from("https://github.com/Nuked88/moondream.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
43
+
44
+ #commit_hash = "38af98596e59f2a6c25c6b52b2bd5a672dab4144"
45
+ #repo.git.checkout(commit_hash)
46
+
47
+ #if file moondream.py not exist
48
+ #if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo","moondream","moondream.py")):
49
+ # #delete moondream_repo and download repo again
50
+ # shutil.rmtree(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
51
+ # repo = Repo.clone_from("https://github.com/Nuked88/moondream.git", os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","moondream_repo"))
52
+
53
+ #if train_log folder not exists
54
+ if not os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)),"libs","rifle","train_log")):
55
+ downloader("https://github.com/Nuked88/DreamingAI/raw/main/RIFE_trained_model_v4.7.zip")
56
+
57
+ # code based on pysssss repo
58
+ for file in files:
59
+ try:
60
+ name = os.path.splitext(file)[0]
61
+ spec = importlib.util.spec_from_file_location(name, os.path.join(py, file))
62
+ module = importlib.util.module_from_spec(spec)
63
+ sys.modules[name] = module
64
+ spec.loader.exec_module(module)
65
+ if hasattr(module, "NODE_CLASS_MAPPINGS") and getattr(module, "NODE_CLASS_MAPPINGS") is not None:
66
+ NODE_CLASS_MAPPINGS.update(module.NODE_CLASS_MAPPINGS)
67
+ if hasattr(module, "NODE_DISPLAY_NAME_MAPPINGS") and getattr(module, "NODE_DISPLAY_NAME_MAPPINGS") is not None:
68
+ NODE_DISPLAY_NAME_MAPPINGS.update(module.NODE_DISPLAY_NAME_MAPPINGS)
69
+ except Exception as e:
70
+ traceback.print_exc()
71
+
72
+
73
+ __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
74
+ WEB_DIRECTORY = "./js"
custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (4.69 kB). View file
 
custom_nodes/ComfyUI-N-Nodes/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (4.32 kB). View file
 
custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-311.pyc ADDED
Binary file (20.1 kB). View file
 
custom_nodes/ComfyUI-N-Nodes/__pycache__/nnodes.cpython-312.pyc ADDED
Binary file (17.8 kB). View file
 
custom_nodes/ComfyUI-N-Nodes/config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "name": "N-Suite",
3
+ "logging": false
4
+ }
custom_nodes/ComfyUI-N-Nodes/img/image-1.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-10.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-13.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-14.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-3.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-4.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-5.png ADDED

Git LFS Details

  • SHA256: 4c04e5794a5506c2376e26e95acff22c7d1647407e08ada2348765ac44312134
  • Pointer size: 131 Bytes
  • Size of remote file: 172 kB
custom_nodes/ComfyUI-N-Nodes/img/image-6.png ADDED

Git LFS Details

  • SHA256: 10c1ee46abd7ef885078401e7d8a2efb88e109fa06a4e9063f7f206b2fe8a414
  • Pointer size: 131 Bytes
  • Size of remote file: 142 kB
custom_nodes/ComfyUI-N-Nodes/img/image-7.png ADDED

Git LFS Details

  • SHA256: a7897544708d0e314813fdabf4de56cc1f838aa3be66969340c4dd2de734aa1b
  • Pointer size: 131 Bytes
  • Size of remote file: 126 kB
custom_nodes/ComfyUI-N-Nodes/img/image-8.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image-9.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image.png ADDED
custom_nodes/ComfyUI-N-Nodes/img/image11.png ADDED
custom_nodes/ComfyUI-N-Nodes/install_extra.bat ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ set "python_exec=..\..\..\python_embeded\python.exe"
3
+
4
+ echo Installing dependency for moondream_repo...
5
+ if exist "%python_exec%" (
6
+ echo Installing with ComfyUI Portable
7
+ "%python_exec%" -s -m pip install transformers==4.36.2
8
+ echo Done. Please reboot ComfyUI.
9
+ ) else (
10
+ echo Installing with system Python
11
+ pip install transformers==4.36.2
12
+ echo Done. Please reboot ComfyUI.
13
+ )
14
+
15
+ pause
custom_nodes/ComfyUI-N-Nodes/js/cte_advanced.js ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { app } from "/scripts/app.js";
2
+ import { api } from "/scripts/api.js"
3
+ const MultilineSymbol = Symbol();
4
+ const MultilineResizeSymbol = Symbol();
5
+
6
+ function getStyles(name) {
7
+ //console.log("getStyles called " + name);
8
+
9
+ return api.fetchApi('/nsuite/styles')
10
+ .then(response => response.json())
11
+ .then(data => {
12
+ // Eseguire l'elaborazione dei dati
13
+ const styles = data.styles;
14
+ //console.log('Styles:', styles);
15
+ let positive_prompt = "";
16
+ let negative_prompt = "";
17
+
18
+ // Funzione per ottenere positive_prompt e negative_prompt dato il name
19
+ for (let i = 0; i < styles[0].length; i++) {
20
+ const style = styles[0][i];
21
+ if (style.name === name) {
22
+ positive_prompt = style.prompt;
23
+ negative_prompt = style.negative_prompt;
24
+ //console.log('Style:', style.name);
25
+ break;
26
+ }
27
+ }
28
+
29
+ if (positive_prompt !== "") {
30
+ //console.log("Positive prompt:", positive_prompt);
31
+ //console.log("Negative prompt:", negative_prompt);
32
+ return { positive_prompt: positive_prompt, negative_prompt: negative_prompt };
33
+ } else {
34
+ return { positive_prompt: "", negative_prompt: "" };
35
+ }
36
+ })
37
+ .catch(error => {
38
+ console.error('Error:', error);
39
+ throw error; // Rilancia l'errore per consentire al chiamante di gestirlo
40
+ });
41
+ }
42
+
43
+ function addStyles(name, positive_prompt, negative_prompt) {
44
+ return api.fetchApi('/nsuite/styles/add', {
45
+ method: 'POST',
46
+ headers: {
47
+ 'Content-Type': 'application/json',
48
+ },
49
+ body: JSON.stringify({
50
+ name: name,
51
+ positive_prompt: positive_prompt,
52
+ negative_prompt: negative_prompt
53
+ }),
54
+
55
+ })
56
+ }
57
+
58
+ function updateStyles(name, positive_prompt, negative_prompt) {
59
+ return api.fetchApi('/nsuite/styles/update', {
60
+ method: 'POST',
61
+ headers: {
62
+ 'Content-Type': 'application/json',
63
+ },
64
+ body: JSON.stringify({
65
+ name: name,
66
+ positive_prompt: positive_prompt,
67
+ negative_prompt: negative_prompt
68
+ }),
69
+ })
70
+ }
71
+
72
+ function removeStyles(name) {
73
+ //confirmation
74
+ let ok = confirm("Are you sure you want to remove this style?");
75
+ if (!ok) {
76
+ return;
77
+ }
78
+
79
+ return api.fetchApi('/nsuite/styles/remove', {
80
+ method: 'POST',
81
+ headers: {
82
+ 'Content-Type': 'application/json',
83
+ },
84
+ body: JSON.stringify({
85
+ name: name
86
+ }),
87
+ })
88
+ }
89
+
90
+ app.registerExtension({
91
+ name: "n.CLIPTextEncodeAdvancedNSuite",
92
+ async beforeRegisterNodeDef(nodeType, nodeData, app) {
93
+
94
+ const onAdded = nodeType.prototype.onAdded;
95
+ if (nodeData.name === "CLIPTextEncodeAdvancedNSuite [n-suite]") {
96
+ nodeType.prototype.onAdded = function () {
97
+ onAdded?.apply(this, arguments);
98
+ const styles = this.widgets.find((w) => w.name === "styles");
99
+ const p_prompt = this.widgets.find((w) => w.name === "positive_prompt");
100
+ const n_prompt = this.widgets.find((w) => w.name === "negative_prompt");
101
+ const cb = nodeData.callback;
102
+ let addedd_positive_prompt = "";
103
+ let addedd_negative_prompt = "";
104
+ styles.callback = function () {
105
+ let index = styles.options.values.indexOf(styles.value);
106
+
107
+
108
+ if (addedd_positive_prompt == "" && addedd_negative_prompt == "") {
109
+ getStyles(styles.options.values[index-1]).then(style_prompts => {
110
+ //wait 4 seconds
111
+
112
+ console.log(style_prompts);
113
+
114
+ addedd_positive_prompt = style_prompts.positive_prompt;
115
+ addedd_negative_prompt = style_prompts.negative_prompt;
116
+ //alert("Addedd positive prompt: " + addedd_positive_prompt + "\nAddedd negative prompt: " + addedd_negative_prompt);
117
+ })
118
+ }
119
+
120
+
121
+ let current_positive_prompt = p_prompt.value;
122
+ let current_negative_prompt = n_prompt.value;
123
+
124
+ getStyles(styles.value).then(style_prompts => {
125
+ //console.log(style_prompts)
126
+
127
+ if ((current_positive_prompt.trim() != addedd_positive_prompt.trim() || current_negative_prompt.trim() != addedd_negative_prompt.trim())) {
128
+
129
+ let ok = confirm("Style has been changed. Do you want to change style without saving?");
130
+
131
+
132
+ if (!ok) {
133
+ if (styles.value === styles.options.values[0]) {
134
+ value = styles.options.values[0];
135
+ }
136
+ styles.value = styles.options.values[index-1];
137
+
138
+
139
+ return;
140
+ }
141
+ }
142
+
143
+ // add the addedd prompt to the current prompt
144
+ p_prompt.value = style_prompts.positive_prompt;
145
+ n_prompt.value = style_prompts.negative_prompt;
146
+
147
+
148
+ addedd_positive_prompt = style_prompts.positive_prompt;
149
+ addedd_negative_prompt = style_prompts.negative_prompt;
150
+ if (cb) {
151
+ return cb.apply(this, arguments);
152
+ }
153
+ })
154
+ .catch(error => {
155
+ console.error('Error:', error);
156
+ });
157
+
158
+ };
159
+
160
+
161
+
162
+ let savestyle;
163
+ let replacestyle;
164
+ let deletestyle;
165
+
166
+
167
+ // Create the button widget for selecting the files
168
+ savestyle = this.addWidget("button", "New", "image", () => {
169
+ ////console.log("Save called");
170
+ //ask input name style
171
+ let inputName = prompt("Enter a name for the style:", styles.value);
172
+ if (inputName === null) {
173
+ return;
174
+ }
175
+
176
+
177
+ addStyles(inputName, p_prompt.value, n_prompt.value);
178
+ // Add the file to the dropdown list and update the widget value
179
+
180
+ if (!styles.options.values.includes(inputName)) {
181
+ styles.options.values.push(inputName);
182
+ }
183
+
184
+ },{
185
+ cursor: "grab",
186
+ },);
187
+ replacestyle = this.addWidget("button", "Replace", "image", () => {
188
+ //console.log("Replace called");
189
+ updateStyles(styles.value, p_prompt.value, n_prompt.value);
190
+ },{
191
+ cursor: "grab",
192
+ },);
193
+ deletestyle = this.addWidget("button", "Delete", "image", () => {
194
+ //console.log("Delete called");
195
+ removeStyles(styles.value);
196
+
197
+ // Remove the file from the dropdown list
198
+ styles.options.values = styles.options.values.filter((value) => value !== styles.value);
199
+ },{
200
+ cursor: "grab",
201
+ },);
202
+ savestyle.serialize = false;
203
+
204
+ }
205
+
206
+
207
+
208
+
209
+
210
+ };
211
+
212
+ },
213
+ });
custom_nodes/ComfyUI-N-Nodes/js/dynamicPrompt.js ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { app } from "/scripts/app.js";
2
+ import { ComfyWidgets } from "/scripts/widgets.js";
3
+
4
+ app.registerExtension({
5
+ name: "n.DynamicPrompt",
6
+ async beforeRegisterNodeDef(nodeType, nodeData, app) {
7
+
8
+ if (nodeData.name === "DynamicPrompt") {
9
+ console.warn("DynamicPrompt detected")
10
+
11
+ const onExecuted = nodeType.prototype.onExecuted;
12
+
13
+
14
+ nodeType.prototype.onExecuted = function (message) {
15
+ onExecuted?.apply(this, arguments);
16
+
17
+ const pos_cached = this.widgets.findIndex((w) => w.name === "cached");
18
+ console.warn("value:"+pos_cached)
19
+
20
+ if (this.widgets) {
21
+ const pos_text = this.widgets.findIndex((w) => w.name === "text");
22
+ if (pos_text !== -1) {
23
+ for (let i = pos_text; i < this.widgets.length; i++) {
24
+ this.widgets[i].onRemove?.();
25
+ }
26
+ this.widgets.length = pos_text;
27
+ }
28
+ }
29
+
30
+
31
+ if (this.widgets[pos_cached].value === "NO") {
32
+
33
+ const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app);
34
+ //random seed
35
+ var rnm = Math.floor(Math.random() * 10000)
36
+ w.widget.value = rnm;
37
+
38
+
39
+ }
40
+
41
+ };
42
+ }
43
+ },
44
+ });
custom_nodes/ComfyUI-N-Nodes/js/extended_widgets.js ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //extended_widgets.js
2
+ import { api } from "/scripts/api.js"
3
+ import { ComfyWidgets } from "/scripts/widgets.js";
4
+
5
+ const MultilineSymbol = Symbol();
6
+ const MultilineResizeSymbol = Symbol();
7
+ async function uploadFile(file, updateNode, node, pasted = false) {
8
+ const videoWidget = node.widgets.find((w) => w.name === "video");
9
+
10
+
11
+ try {
12
+ // Wrap file in formdata so it includes filename
13
+ const body = new FormData();
14
+ body.append("image", file);
15
+ if (pasted) {
16
+ body.append("subfolder", "pasted");
17
+ }
18
+ else {
19
+ body.append("subfolder", "n-suite");
20
+ }
21
+
22
+ const resp = await api.fetchApi("/upload/image", {
23
+ method: "POST",
24
+ body,
25
+ });
26
+
27
+ if (resp.status === 200) {
28
+ const data = await resp.json();
29
+ // Add the file to the dropdown list and update the widget value
30
+ let path = data.name;
31
+
32
+
33
+ if (!videoWidget.options.values.includes(path)) {
34
+ videoWidget.options.values.push(path);
35
+ }
36
+
37
+ if (updateNode) {
38
+
39
+ videoWidget.value = path;
40
+ if (data.subfolder) path = data.subfolder + "/" + path;
41
+ showVideoInput(path,node);
42
+
43
+ }
44
+ } else {
45
+ alert(resp.status + " - " + resp.statusText);
46
+ }
47
+ } catch (error) {
48
+ alert(error);
49
+ }
50
+ }
51
+
52
+ function addVideo(node, name,src, app,autoplay_value) {
53
+ const MIN_SIZE = 50;
54
+
55
+ function computeSize(size) {
56
+ try{
57
+
58
+ if (node.widgets[0].last_y == null) return;
59
+
60
+ let y = node.widgets[0].last_y;
61
+ let freeSpace = size[1] - y;
62
+
63
+ // Compute the height of all non customvideo widgets
64
+ let widgetHeight = 0;
65
+ const multi = [];
66
+ for (let i = 0; i < node.widgets.length; i++) {
67
+ const w = node.widgets[i];
68
+ if (w.type === "customvideo") {
69
+ multi.push(w);
70
+ } else {
71
+ if (w.computeSize) {
72
+ widgetHeight += w.computeSize()[1] + 4;
73
+ } else {
74
+ widgetHeight += LiteGraph.NODE_WIDGET_HEIGHT + 4;
75
+ }
76
+ }
77
+ }
78
+
79
+ // See how large each text input can be
80
+ freeSpace -= widgetHeight;
81
+ freeSpace /= multi.length + (!!node.imgs?.length);
82
+
83
+ if (freeSpace < MIN_SIZE) {
84
+ // There isnt enough space for all the widgets, increase the size of the node
85
+ freeSpace = MIN_SIZE;
86
+ node.size[1] = y + widgetHeight + freeSpace * (multi.length + (!!node.imgs?.length));
87
+ node.graph.setDirtyCanvas(true);
88
+ }
89
+
90
+ // Position each of the widgets
91
+ for (const w of node.widgets) {
92
+ w.y = y;
93
+ if (w.type === "customvideo") {
94
+ y += freeSpace;
95
+ w.computedHeight = freeSpace - multi.length*4;
96
+ } else if (w.computeSize) {
97
+ y += w.computeSize()[1] + 4;
98
+ } else {
99
+ y += LiteGraph.NODE_WIDGET_HEIGHT + 4;
100
+ }
101
+ }
102
+
103
+ node.inputHeight = freeSpace;
104
+ }catch(e){
105
+
106
+ }
107
+ }
108
+ const widget = {
109
+ type: "customvideo",
110
+ name,
111
+ get value() {
112
+ return this.inputEl.value;
113
+ },
114
+ set value(x) {
115
+ this.inputEl.value = x;
116
+ },
117
+ draw: function (ctx, _, widgetWidth, y, widgetHeight) {
118
+ if (!this.parent.inputHeight) {
119
+ // If we are initially offscreen when created we wont have received a resize event
120
+ // Calculate it here instead
121
+ node.setSizeForImage?.();
122
+
123
+ }
124
+ const visible = app.canvas.ds.scale > 0.5 && this.type === "customvideo";
125
+ const margin = 10;
126
+ let top_offset = 5
127
+ //hack for top menu
128
+ if (localStorage.getItem("Comfy.Settings.Comfy.UseNewMenu") === '"Top"') {
129
+ top_offset = 40;
130
+ }
131
+
132
+ const elRect = ctx.canvas.getBoundingClientRect();
133
+ const transform = new DOMMatrix()
134
+ .scaleSelf(elRect.width / ctx.canvas.width, elRect.height / ctx.canvas.height)
135
+ .multiplySelf(ctx.getTransform())
136
+ .translateSelf(margin, margin + y);
137
+
138
+ const scale = new DOMMatrix().scaleSelf(transform.a, transform.d)
139
+ Object.assign(this.inputEl.style, {
140
+ transformOrigin: "0 0",
141
+ transform: scale,
142
+ left: `${transform.a + transform.e}px`,
143
+ top: `${transform.d +top_offset+ transform.f}px`,
144
+ width: `${widgetWidth - (margin * 2)}px`,
145
+ height: `${this.parent.inputHeight - (margin * 2)}px`,
146
+ position: "absolute",
147
+ background: (!node.color)?'':node.color,
148
+ color: (!node.color)?'':'white',
149
+ zIndex: app.graph._nodes.indexOf(node),
150
+ });
151
+ this.inputEl.hidden = !visible;
152
+ },
153
+ };
154
+
155
+
156
+ widget.inputEl = document.createElement("video");
157
+
158
+
159
+ // Set the video attributes
160
+ Object.assign(widget.inputEl, {
161
+ controls: true,
162
+ src: src,
163
+ poster: "",
164
+ width: 400,
165
+ height: 300,
166
+ loop: true,
167
+ muted: true,
168
+ autoplay: autoplay_value,
169
+ type : "video/mp4"
170
+
171
+ });
172
+
173
+
174
+
175
+
176
+ // Add video element to the body
177
+ document.body.appendChild(widget.inputEl);
178
+
179
+
180
+
181
+ widget.parent = node;
182
+ document.body.appendChild(widget.inputEl);
183
+
184
+ node.addCustomWidget(widget);
185
+
186
+ app.canvas.onDrawBackground = function () {
187
+ // Draw node isnt fired once the node is off the screen
188
+ // if it goes off screen quickly, the input may not be removed
189
+ // this shifts it off screen so it can be moved back if the node is visible.
190
+ for (let n in app.graph._nodes) {
191
+ n = graph._nodes[n];
192
+ for (let w in n.widgets) {
193
+ let wid = n.widgets[w];
194
+ if (Object.hasOwn(wid, "inputEl")) {
195
+ wid.inputEl.style.left = -8000 + "px";
196
+ wid.inputEl.style.position = "absolute";
197
+ }
198
+ }
199
+ }
200
+ };
201
+
202
+ node.onRemoved = function () {
203
+ // When removing this node we need to remove the input from the DOM
204
+ for (let y in this.widgets) {
205
+ if (this.widgets[y].inputEl) {
206
+ this.widgets[y].inputEl.remove();
207
+ }
208
+ }
209
+ };
210
+
211
+ widget.onRemove = () => {
212
+ widget.inputEl?.remove();
213
+
214
+ // Restore original size handler if we are the last
215
+ if (!--node[MultilineSymbol]) {
216
+ node.onResize = node[MultilineResizeSymbol];
217
+ delete node[MultilineSymbol];
218
+ delete node[MultilineResizeSymbol];
219
+ }
220
+ };
221
+
222
+ if (node[MultilineSymbol]) {
223
+ node[MultilineSymbol]++;
224
+ } else {
225
+ node[MultilineSymbol] = 1;
226
+ const onResize = (node[MultilineResizeSymbol] = node.onResize);
227
+
228
+ node.onResize = function (size) {
229
+
230
+ computeSize(size);
231
+ // Call original resizer handler
232
+ if (onResize) {
233
+ onResize.apply(this, arguments);
234
+ }
235
+ };
236
+ }
237
+
238
+ return { minWidth: 400, minHeight: 200, widget };
239
+ }
240
+
241
+
242
+ export function showVideoInput(name,node) {
243
+ const videoWidget = node.widgets.find((w) => w.name === "videoWidget");
244
+ const temp_web_url = node.widgets.find((w) => w.name === "local_url");
245
+
246
+
247
+ let folder_separator = name.lastIndexOf("/");
248
+ let subfolder = "n-suite";
249
+ if (folder_separator > -1) {
250
+ subfolder = name.substring(0, folder_separator);
251
+ name = name.substring(folder_separator + 1);
252
+ }
253
+
254
+ let url_video = api.apiURL(`/view?filename=${encodeURIComponent(name)}&type=input&subfolder=${subfolder}${app.getPreviewFormatParam()}`);
255
+ videoWidget.inputEl.src = url_video
256
+ temp_web_url.value = url_video
257
+ }
258
+
259
+ export function showVideoOutput(name,node) {
260
+ const videoWidget = node.widgets.find((w) => w.name === "videoOutWidget");
261
+
262
+
263
+
264
+ let folder_separator = name.lastIndexOf("/");
265
+ let subfolder = "n-suite/videos";
266
+ if (folder_separator > -1) {
267
+ subfolder = name.substring(0, folder_separator);
268
+ name = name.substring(folder_separator + 1);
269
+ }
270
+
271
+
272
+ let url_video = api.apiURL(`/view?filename=${encodeURIComponent(name)}&type=output&subfolder=${subfolder}${app.getPreviewFormatParam()}`);
273
+ videoWidget.inputEl.src = url_video
274
+
275
+ return url_video;
276
+ }
277
+
278
+
279
+
280
+ export const ExtendedComfyWidgets = {
281
+ ...ComfyWidgets, // Copy all the functions from ComfyWidgets
282
+
283
+ VIDEO(node, inputName, inputData, src, app,type="input",autoplay_value=true) {
284
+ try {
285
+ const videoWidget = node.widgets.find((w) => w.name === "video");
286
+ const autoplay = node.widgets.find((w) => w.name === "autoplay");
287
+ const defaultVal = "";
288
+ let res;
289
+ res = addVideo(node, inputName, src, app,autoplay_value);
290
+
291
+ if (type == "input"){
292
+
293
+ const cb = node.callback;
294
+ videoWidget.callback = function () {
295
+
296
+ showVideoInput(videoWidget.value, node);
297
+ if (cb) {
298
+ return cb.apply(this, arguments);
299
+ }
300
+ };
301
+ autoplay.callback = function () {
302
+ const videoWidgetz = node.widgets.find((w) => w.name === "videoWidget");
303
+
304
+ videoWidgetz.inputEl.autoplay = autoplay.value;
305
+ showVideoInput(videoWidget.value, node);
306
+ if (cb) {
307
+ return cb.apply(this, arguments);
308
+ }
309
+ }
310
+ }
311
+
312
+ if (node.type =="LoadVideoAdvanced"){
313
+
314
+
315
+ }
316
+
317
+ return res;
318
+ }
319
+ catch (error) {
320
+
321
+ console.error("Errore in extended_widgets.js:", error);
322
+ throw error;
323
+
324
+ }
325
+
326
+ },
327
+
328
+
329
+ };
custom_nodes/ComfyUI-N-Nodes/js/gptSampler.js ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { app } from "/scripts/app.js";
2
+ import { ComfyWidgets } from "/scripts/widgets.js";
3
+
4
+ app.registerExtension({
5
+ name: "n.GPTSampler",
6
+ async beforeRegisterNodeDef(nodeType, nodeData, app) {
7
+
8
+ if (nodeData.name === "GPT Sampler [n-suite]") {
9
+ console.warn("GPTSampler detected")
10
+
11
+ const onExecuted = nodeType.prototype.onExecuted;
12
+
13
+ nodeType.prototype.onExecuted = function (message) {
14
+ onExecuted?.apply(this, arguments);
15
+
16
+ const pos_cached = this.widgets.findIndex((w) => w.name === "cached");
17
+ console.warn("value:"+pos_cached)
18
+
19
+ if (this.widgets) {
20
+ const pos_text = this.widgets.findIndex((w) => w.name === "text");
21
+ if (pos_text !== -1) {
22
+ for (let i = pos_text; i < this.widgets.length; i++) {
23
+ this.widgets[i].onRemove?.();
24
+ }
25
+ this.widgets.length = pos_text;
26
+ }
27
+ }
28
+
29
+
30
+ if (this.widgets[pos_cached].value === "NO") {
31
+
32
+ const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app);
33
+ //random seed
34
+ var rnm = Math.floor(Math.random() * 10000)
35
+ w.widget.value = rnm;
36
+
37
+
38
+ }
39
+
40
+ };
41
+ }
42
+ },
43
+ });
custom_nodes/ComfyUI-N-Nodes/js/include_css.js ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { $el } from "../../../scripts/ui.js";
2
+
3
+ function addStylesheet(url) {
4
+ if (url.endsWith(".js")) {
5
+ url = url.substr(0, url.length - 2) + "css";
6
+ }
7
+ $el("link", {
8
+ parent: document.head,
9
+ rel: "stylesheet",
10
+ type: "text/css",
11
+ href: url.startsWith("http") ? url : getUrl(url),
12
+ });
13
+ }
14
+ function getUrl(path, baseUrl) {
15
+ if (baseUrl) {
16
+ return new URL(path, baseUrl).toString();
17
+ } else {
18
+ return new URL("../" + path, import.meta.url).toString();
19
+ }
20
+ }
21
+
22
+ addStylesheet(getUrl("styles.css", import.meta.url));
custom_nodes/ComfyUI-N-Nodes/js/styles.css ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ textarea[placeholder="positive_prompt"] {
2
+ border: 1px solid #64d509;
3
+
4
+
5
+ }
6
+ textarea[placeholder="positive_prompt"]:focus-visible {
7
+ border: 1px solid #72eb0f;
8
+
9
+ }
10
+
11
+
12
+ textarea[placeholder="negative_prompt"] {
13
+ border: 1px solid #a94442;
14
+ border-color: #a94442;
15
+ }
16
+
17
+ textarea[placeholder="negative_prompt"]:focus-visible {
18
+ border: 1px solid #de5755;
19
+ border-color: #de5755;
20
+ }
custom_nodes/ComfyUI-N-Nodes/js/videoLoadAdvanced.js ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { app } from "/scripts/app.js";
2
+ import { api } from "/scripts/api.js"
3
+ import { ExtendedComfyWidgets,showVideoInput } from "./extended_widgets.js";
4
+ const MultilineSymbol = Symbol();
5
+ const MultilineResizeSymbol = Symbol();
6
+
7
+
8
+ async function uploadFile(file, updateNode, node, pasted = false) {
9
+ const videoWidget = node.widgets.find((w) => w.name === "video");
10
+
11
+
12
+ try {
13
+ // Wrap file in formdata so it includes filename
14
+ const body = new FormData();
15
+ body.append("image", file);
16
+ if (pasted) {
17
+ body.append("subfolder", "pasted");
18
+ }
19
+ else {
20
+ body.append("subfolder", "n-suite");
21
+ }
22
+
23
+ const resp = await api.fetchApi("/upload/image", {
24
+ method: "POST",
25
+ body,
26
+ });
27
+
28
+ if (resp.status === 200) {
29
+ const data = await resp.json();
30
+ // Add the file to the dropdown list and update the widget value
31
+ let path = data.name;
32
+
33
+
34
+ if (!videoWidget.options.values.includes(path)) {
35
+ videoWidget.options.values.push(path);
36
+ }
37
+
38
+ if (updateNode) {
39
+
40
+ videoWidget.value = path;
41
+ if (data.subfolder) path = data.subfolder + "/" + path;
42
+ showVideoInput(path,node);
43
+
44
+ }
45
+ } else {
46
+ alert(resp.status + " - " + resp.statusText);
47
+ }
48
+ } catch (error) {
49
+ alert(error);
50
+ }
51
+ }
52
+
53
+
54
+
55
+
56
+ let uploadWidget = "";
57
+ app.registerExtension({
58
+ name: "Comfy.VideoLoadAdvanced",
59
+ async beforeRegisterNodeDef(nodeType, nodeData, app) {
60
+
61
+ const onAdded = nodeType.prototype.onAdded;
62
+ if (nodeData.name === "LoadVideo [n-suite]") {
63
+ nodeType.prototype.onAdded = function () {
64
+ onAdded?.apply(this, arguments);
65
+ const temp_web_url = this.widgets.find((w) => w.name === "local_url");
66
+ const autoplay_value = this.widgets.find((w) => w.name === "autoplay");
67
+
68
+
69
+ let uploadWidget;
70
+ const fileInput = document.createElement("input");
71
+ Object.assign(fileInput, {
72
+ type: "file",
73
+ accept: "video/mp4,image/gif,video/webm",
74
+ style: "display: none",
75
+ onchange: async () => {
76
+ if (fileInput.files.length) {
77
+ await uploadFile(fileInput.files[0], true,this);
78
+ }
79
+ },
80
+ });
81
+ document.body.append(fileInput);
82
+ // Create the button widget for selecting the files
83
+ uploadWidget = this.addWidget("button", "choose file to upload", "image", () => {
84
+ fileInput.click();
85
+ },{
86
+ cursor: "grab",
87
+ },);
88
+ uploadWidget.serialize = false;
89
+
90
+
91
+ setTimeout(() => {
92
+ ExtendedComfyWidgets["VIDEO"](this, "videoWidget", ["STRING"], temp_web_url.value, app,"input", autoplay_value.value);
93
+
94
+ }, 100);
95
+
96
+
97
+ }
98
+
99
+
100
+ nodeType.prototype.onDragOver = function (e) {
101
+ if (e.dataTransfer && e.dataTransfer.items) {
102
+ const image = [...e.dataTransfer.items].find((f) => f.kind === "file");
103
+ return !!image;
104
+ }
105
+
106
+ return false;
107
+ };
108
+
109
+ // On drop upload files
110
+ nodeType.prototype.onDragDrop = function (e) {
111
+ console.log("onDragDrop called");
112
+ let handled = false;
113
+ for (const file of e.dataTransfer.files) {
114
+ if (file.type.startsWith("video/mp4")) {
115
+
116
+ const filePath = file.path || (file.webkitRelativePath || '').split('/').slice(1).join('/');
117
+
118
+
119
+ uploadFile(file, !handled,this ); // Dont await these, any order is fine, only update on first one
120
+
121
+ handled = true;
122
+ }
123
+ }
124
+
125
+ return handled;
126
+ };
127
+
128
+ nodeType.prototype.pasteFile = function(file) {
129
+ if (file.type.startsWith("video/mp4")) {
130
+
131
+ //uploadFile(file, true, is_pasted);
132
+
133
+ return true;
134
+ }
135
+ return false;
136
+ }
137
+
138
+
139
+ };
140
+
141
+ },
142
+ });
custom_nodes/ComfyUI-N-Nodes/js/videoSave.js ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { app } from "/scripts/app.js";
2
+ import { api } from "/scripts/api.js"
3
+ import { ExtendedComfyWidgets,showVideoOutput } from "./extended_widgets.js";
4
+ const MultilineSymbol = Symbol();
5
+ const MultilineResizeSymbol = Symbol();
6
+
7
+
8
+ async function uploadFile(file, updateNode, node, pasted = false) {
9
+ const videoWidget = node.widgets.find((w) => w.name === "video");
10
+
11
+
12
+ try {
13
+ // Wrap file in formdata so it includes filename
14
+ const body = new FormData();
15
+ body.append("image", file);
16
+ if (pasted) {
17
+ body.append("subfolder", "pasted");
18
+ }
19
+ else {
20
+ body.append("subfolder", "n-suite");
21
+ }
22
+
23
+ const resp = await api.fetchApi("/upload/image", {
24
+ method: "POST",
25
+ body,
26
+ });
27
+
28
+ if (resp.status === 200) {
29
+ const data = await resp.json();
30
+ // Add the file to the dropdown list and update the widget value
31
+ let path = data.name;
32
+
33
+
34
+ if (!videoWidget.options.values.includes(path)) {
35
+ videoWidget.options.values.push(path);
36
+ }
37
+
38
+ if (updateNode) {
39
+ // showVideo(path,node);
40
+ videoWidget.value = path;
41
+ if (data.subfolder) path = data.subfolder + "/" + path;
42
+ showVideo(path,node);
43
+
44
+ }
45
+ } else {
46
+ alert(resp.status + " - " + resp.statusText);
47
+ }
48
+ } catch (error) {
49
+ alert(error);
50
+ }
51
+ }
52
+
53
+
54
+
55
+
56
+ let uploadWidget = "";
57
+ app.registerExtension({
58
+ name: "Comfy.VideoSave",
59
+ async beforeRegisterNodeDef(nodeType, nodeData, app) {
60
+
61
+ const onExecuted = nodeType.prototype.onExecuted;
62
+
63
+
64
+ const onAdded = nodeType.prototype.onAdded;
65
+ if (nodeData.name === "SaveVideo [n-suite]") {
66
+ nodeType.prototype.onAdded = function () {
67
+
68
+ ExtendedComfyWidgets["VIDEO"](this, "videoOutWidget", ["STRING"], "", app,"output");
69
+
70
+ };
71
+ nodeType.prototype.onExecuted = function (message) {
72
+ onExecuted?.apply(this, arguments);
73
+ console.log(nodeData)
74
+
75
+ let full_path="";
76
+
77
+ for (const list of message.text) {
78
+ full_path = list;
79
+ }
80
+
81
+ let fullweb= showVideoOutput(full_path,this)
82
+
83
+ }
84
+ };
85
+
86
+ },
87
+ });
custom_nodes/ComfyUI-N-Nodes/libs/__pycache__/joytag_models.cpython-311.pyc ADDED
Binary file (65 kB). View file
 
custom_nodes/ComfyUI-N-Nodes/libs/joytag_models.py ADDED
@@ -0,0 +1,1109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from https://github.com/fpgaminer/joytag.git
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Optional
5
+ import torch
6
+ import torch.backends.cuda
7
+ import torch.nn as nn
8
+ import torch.nn.functional as F
9
+ import torchvision
10
+
11
+ from transformers.activations import QuickGELUActivation
12
+ import math
13
+ from einops.layers.torch import Rearrange
14
+ import einops
15
+
16
+
17
+ class VisionModel(nn.Module):
18
+ image_size: int
19
+ n_tags: int
20
+
21
+ def __init__(self, image_size: int, n_tags: int):
22
+ super().__init__()
23
+
24
+ self.image_size = image_size
25
+ self.n_tags = n_tags
26
+
27
+ @staticmethod
28
+ def load_model(path: Path | str, device: str | None = None) -> 'VisionModel':
29
+ """
30
+ Load a model from a directory.
31
+ :param path: The directory containing the model.
32
+ :return: The model, the image size, and the number of tags.
33
+ """
34
+ with open(Path(path) / 'config.json', 'r') as f:
35
+ config = json.load(f)
36
+
37
+ if (Path(path) / 'model.safetensors').exists():
38
+ from safetensors.torch import load_file
39
+ resume = load_file(Path(path) / 'model.safetensors', device='cpu')
40
+ else:
41
+ resume = torch.load(Path(path) / 'model.pt', map_location=torch.device('cpu'))['model']
42
+
43
+ model_classes = VisionModel.__subclasses__()
44
+ model_cls = next(cls for cls in model_classes if cls.__name__ == config['class'])
45
+
46
+ model = model_cls(**{k: v for k, v in config.items() if k != 'class'})
47
+ model.load(resume)
48
+ if device is not None:
49
+ model = model.to(device)
50
+
51
+ return model
52
+
53
+ @staticmethod
54
+ def from_config(config: dict) -> 'VisionModel':
55
+ model_classes = VisionModel.__subclasses__()
56
+ model_cls = next(cls for cls in model_classes if cls.__name__ == config['class'])
57
+ return model_cls(**{k: v for k, v in config.items() if k != 'class'})
58
+
59
+ def get_optimized_parameters(self, lr: float):
60
+ raise NotImplementedError
61
+
62
+ def save(self):
63
+ raise NotImplementedError
64
+
65
+ def load(self, state_dict):
66
+ raise NotImplementedError
67
+
68
+
69
+ def basic_calculate_loss(preds: dict[str, torch.Tensor], batch: dict, pos_weight: torch.Tensor | None, loss_type: str):
70
+ def asl_helper(preds, target):
71
+ p = F.softmax(preds, dim=1)
72
+ xs_pos = p.clamp(min=1e-6)
73
+ xs_neg = (1 - p).clamp(min=1e-6)
74
+
75
+ los_pos = torch.log(torch.gather(xs_pos, 1, target.unsqueeze(1))).sum()
76
+ los_neg = torch.log(xs_neg)
77
+ los_neg = los_neg.sum() - torch.gather(los_neg, 1, target.unsqueeze(1)).sum()
78
+ loss = los_pos + los_neg
79
+
80
+ return -loss
81
+
82
+ if loss_type == "ce":
83
+ loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'])
84
+ elif loss_type == "weighted":
85
+ loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight)
86
+ elif loss_type == "focal":
87
+ gamma = 2
88
+ p = torch.sigmoid(preds['tags'])
89
+ ce_loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], reduction='none')
90
+ p_t = p * batch['tags'] + (1 - p) * (1 - batch['tags'])
91
+ loss = ce_loss * ((1 - p_t) ** gamma)
92
+ loss = loss.mean()
93
+ elif loss_type == "focal2":
94
+ gamma = 2
95
+ p = torch.sigmoid(preds['tags'])
96
+ ce_loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], reduction='none')
97
+ p_t = p * batch['tags'] + (1 - p) * (1 - batch['tags'])
98
+ loss = ce_loss * ((1 - p_t) ** gamma) * 256
99
+ loss = loss.mean()
100
+ elif loss_type == "asl":
101
+ p = torch.sigmoid(preds['tags'])
102
+ xs_pos = p
103
+ xs_neg = 1 - p
104
+
105
+ los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
106
+ los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
107
+ loss = los_pos + los_neg
108
+ loss = -loss.sum()
109
+
110
+ # Rating
111
+ loss = loss + asl_helper(preds['rating'], batch['rating'])
112
+
113
+ # Score
114
+ loss = loss + asl_helper(preds['score'], batch['score'])
115
+ elif loss_type == "asl2":
116
+ p = torch.sigmoid(preds['tags'])
117
+ xs_pos = p
118
+ xs_neg = 1 - p
119
+
120
+ los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
121
+ los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
122
+ loss = -los_pos - los_neg
123
+ loss = loss.sum()
124
+ elif loss_type == "asl3":
125
+ p = torch.sigmoid(preds['tags'])
126
+ xs_pos = p
127
+ xs_neg = 1 - p
128
+
129
+ los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
130
+ los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
131
+ loss = -los_pos - los_neg
132
+ loss = loss.mean()
133
+ elif loss_type == "asl4":
134
+ p = torch.sigmoid(preds['tags'])
135
+ xs_pos = p
136
+ xs_neg = 1 - p
137
+
138
+ los_pos = batch['tags'] * torch.log(xs_pos.clamp(min=1e-6))
139
+ los_neg = (1 - batch['tags']) * torch.log(xs_neg.clamp(min=1e-6))
140
+ loss = -los_pos - los_neg
141
+ loss = loss.mean() * 128
142
+ elif loss_type == "asl5":
143
+ loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 128
144
+ elif loss_type == "asl6":
145
+ loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 256
146
+ elif loss_type == "asl7":
147
+ loss = F.binary_cross_entropy_with_logits(preds['tags'], batch['tags'], pos_weight=pos_weight) * 2
148
+ else:
149
+ raise ValueError(f"Invalid loss type: {loss_type}")
150
+
151
+ return loss
152
+
153
+
154
+ class CLIPMlp(nn.Module):
155
+ def __init__(self, hidden_size: int, intermediate_size: int, activation_cls):
156
+ super().__init__()
157
+ self.activation_fn = activation_cls()
158
+ self.fc1 = nn.Linear(hidden_size, intermediate_size)
159
+ self.fc2 = nn.Linear(intermediate_size, hidden_size)
160
+
161
+ def forward(self, hidden_states: torch.Tensor):
162
+ hidden_states = self.fc1(hidden_states)
163
+ hidden_states = self.activation_fn(hidden_states)
164
+ hidden_states = self.fc2(hidden_states)
165
+ return hidden_states
166
+
167
+
168
+ class FastCLIPAttention2(nn.Module):
169
+ """Fast Attention module for CLIP-like. This is NOT a drop-in replacement for CLIPAttention, since it adds additional flexibility. Mainly uses xformers."""
170
+ def __init__(self, hidden_size: int, out_dim: int, num_attention_heads: int, out_seq_len: Optional[int] = None, norm_qk: bool = False):
171
+ super().__init__()
172
+ self.out_seq_len = out_seq_len
173
+ self.embed_dim = hidden_size
174
+ self.out_dim = out_dim
175
+ self.norm_qk = norm_qk
176
+ self.num_heads = num_attention_heads
177
+ self.head_dim = hidden_size // num_attention_heads
178
+ assert self.head_dim * num_attention_heads == self.embed_dim, "embed_dim must be divisible by num_attention_heads"
179
+
180
+ self.q_proj = nn.Linear(self.embed_dim, self.embed_dim)
181
+ self.kv_proj = nn.Linear(self.embed_dim, self.embed_dim * 2)
182
+ self.out_proj = nn.Linear(self.embed_dim, self.out_dim)
183
+
184
+ if self.norm_qk:
185
+ self.query_norm = nn.LayerNorm(self.embed_dim)
186
+ self.key_norm = nn.LayerNorm(self.embed_dim)
187
+
188
+ #def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
189
+ # return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).contiguous()
190
+
191
+ def forward(self, query_states: torch.Tensor, kv_states: torch.Tensor) -> torch.Tensor:
192
+ bsz, src_len, embed_dim = kv_states.size()
193
+ if self.out_seq_len is not None:
194
+ tgt_len = self.out_seq_len
195
+ else:
196
+ tgt_len = src_len
197
+
198
+ kv_states = self.kv_proj(kv_states) # (bsz, src_len, embed_dim * 2)
199
+ q_states = self.q_proj(query_states[:, :tgt_len]) # (bsz, tgt_len, embed_dim)
200
+
201
+ # NOTE: It is not clear if LayerNorm should be applied to the embed_dim, or to the head_dim
202
+ if self.norm_qk:
203
+ q_states = self.query_norm(q_states).type(q_states.dtype)
204
+ k_states = self.key_norm(kv_states[:, :, :embed_dim]).type(kv_states.dtype)
205
+ v_states = kv_states[:, :, embed_dim:]
206
+ else:
207
+ k_states = kv_states[:, :, :embed_dim]
208
+ v_states = kv_states[:, :, embed_dim:]
209
+
210
+ q_states = q_states.view(bsz, tgt_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, tgt_len, head_dim)
211
+ k_states = k_states.view(bsz, src_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, src_len, head_dim)
212
+ v_states = v_states.view(bsz, src_len, self.num_heads, self.head_dim).transpose(1, 2) # (bsz, num_heads, src_len, head_dim)
213
+
214
+ # Performs scale of query_states, attention, and softmax
215
+ with torch.backends.cuda.sdp_kernel(enable_math=False):
216
+ x = F.scaled_dot_product_attention(q_states, k_states, v_states) # (bsz, num_heads, tgt_len, head_dim)
217
+ x = x.transpose(1, 2).contiguous().view(bsz, tgt_len, embed_dim) # (bsz, tgt_len, embed_dim)
218
+
219
+ # Projection
220
+ x = self.out_proj(x) # (bsz, tgt_len, out_dim)
221
+
222
+ return x
223
+
224
+
225
+ class SkipInit(nn.Module):
226
+ def __init__(self, hidden_size: int, channel_wise: bool, init_scale: float):
227
+ super().__init__()
228
+ self.hidden_size = hidden_size
229
+ self.channel_wise = channel_wise
230
+ self.init_scale = init_scale
231
+
232
+ if self.channel_wise:
233
+ self.scale = nn.Parameter(torch.ones(hidden_size) * init_scale)
234
+ else:
235
+ self.scale = nn.Parameter(torch.tensor(init_scale))
236
+
237
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
238
+ return x * self.scale
239
+
240
+
241
+ class FastCLIPEncoderLayer(nn.Module):
242
+ def __init__(
243
+ self,
244
+ hidden_size: int,
245
+ num_attention_heads: int,
246
+ out_seq_len: Optional[int],
247
+ activation_cls = QuickGELUActivation,
248
+ use_palm_alt: bool = False,
249
+ norm_qk: bool = False,
250
+ skip_init: Optional[float] = None,
251
+ stochastic_depth: Optional[float] = None,
252
+ ):
253
+ super().__init__()
254
+
255
+ self.use_palm_alt = use_palm_alt
256
+ self.stochastic_depth = stochastic_depth
257
+
258
+ self.self_attn = FastCLIPAttention2(
259
+ hidden_size=hidden_size,
260
+ out_dim=hidden_size,
261
+ num_attention_heads=num_attention_heads,
262
+ out_seq_len=out_seq_len,
263
+ norm_qk=norm_qk,
264
+ )
265
+ self.mlp = CLIPMlp(hidden_size, 4 * hidden_size, activation_cls)
266
+ self.layer_norm1 = nn.LayerNorm(hidden_size)
267
+ if not use_palm_alt:
268
+ self.layer_norm2 = nn.LayerNorm(hidden_size)
269
+
270
+ if skip_init is not None:
271
+ self.attn_skip_init = SkipInit(hidden_size, channel_wise=True, init_scale=skip_init)
272
+ self.mlp_skip_init = SkipInit(hidden_size, channel_wise=True, init_scale=skip_init)
273
+ else:
274
+ self.attn_skip_init = nn.Identity()
275
+ self.mlp_skip_init = nn.Identity()
276
+
277
+ def forward(self, hidden_states: torch.Tensor):
278
+ residual = hidden_states
279
+ hidden_states = self.layer_norm1(hidden_states)
280
+
281
+ if not self.use_palm_alt:
282
+ hidden_states = self.self_attn(query_states=hidden_states, kv_states=hidden_states)
283
+ hidden_states = self.attn_skip_init(hidden_states)
284
+ hidden_states = hidden_states + residual[:, :hidden_states.size(1)]
285
+
286
+ residual = hidden_states
287
+ hidden_states = self.layer_norm2(hidden_states)
288
+ hidden_states = self.mlp(hidden_states)
289
+ hidden_states = self.mlp_skip_init(hidden_states)
290
+ hidden_states = hidden_states + residual
291
+ else:
292
+ # An alternative implementation inspired by the PALM paper
293
+ # By performing the attention and MLP in parallel it's possible to fuse the linear projections of the attention and MLP layers
294
+ # We don't do that here yet, but that supposedly improves efficiency without hurting performance
295
+ attn = self.self_attn(query_states=hidden_states, kv_states=hidden_states)
296
+ attn = self.attn_skip_init(attn)
297
+ mlp = self.mlp(hidden_states[:, :attn.size(1)])
298
+ mlp = self.mlp_skip_init(mlp)
299
+
300
+ if self.stochastic_depth is not None:
301
+ attn = torchvision.ops.stochastic_depth(attn, self.stochastic_depth, mode='row', training=self.training)
302
+ mlp = torchvision.ops.stochastic_depth(mlp, self.stochastic_depth, mode='row', training=self.training)
303
+
304
+ hidden_states = residual[:, :attn.size(1)] + attn + mlp
305
+
306
+ return hidden_states
307
+
308
+
309
+ def sinusoidal_position_embedding(width: int, height: int, depth: int, dtype, device, temperature = 10000):
310
+ """
311
+ Sinusoidal position embedding. Returns a flat tensor of shape (h * w, d).
312
+ """
313
+ assert depth % 4 == 0, "Embedding dimension must be divisible by 4."
314
+
315
+ y, x = torch.meshgrid(torch.arange(height, device=device), torch.arange(width, device=device), indexing="ij")
316
+ omega = torch.arange(depth // 4, device=device) / (depth // 4 - 1)
317
+ omega = 1. / (temperature ** omega)
318
+
319
+ y = y.flatten()[:, None] * omega[None, :]
320
+ x = x.flatten()[:, None] * omega[None, :]
321
+ embedding = torch.cat([x.sin(), x.cos(), y.sin(), y.cos()], dim=1)
322
+
323
+ return embedding.type(dtype)
324
+
325
+
326
+ class CLIPEmbeddingLayer(nn.Module):
327
+ def __init__(self, hidden_size: int, num_channels: int, image_size: int, patch_size: int, patch_dropout: float = 0.0, good_dropout: bool = False, dpn: bool = False, sine_positional_embeddings: bool = False):
328
+ super().__init__()
329
+
330
+ assert image_size % patch_size == 0, "Image dimensions must be divisible by the patch size."
331
+
332
+ seq_len = (image_size // patch_size) ** 2
333
+ self.patch_dropout = patch_dropout
334
+ self.hidden_size = hidden_size
335
+ self.good_dropout = good_dropout
336
+ self.dpn = dpn
337
+ self.sine_positional_embeddings = sine_positional_embeddings
338
+ self.patch_size = patch_size
339
+
340
+ self.patch_embeddings = nn.Conv2d(
341
+ in_channels=num_channels,
342
+ out_channels=hidden_size,
343
+ kernel_size=patch_size,
344
+ stride=patch_size,
345
+ bias=False,
346
+ )
347
+ if not self.sine_positional_embeddings:
348
+ self.positional_embeddings = nn.Embedding(seq_len, hidden_size)
349
+ self.register_buffer("position_ids", torch.arange(seq_len))
350
+
351
+ if self.dpn:
352
+ self.to_patch_embeddings = nn.Sequential(
353
+ Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size),
354
+ nn.LayerNorm(3 * patch_size * patch_size),
355
+ nn.Linear(3 * patch_size * patch_size, hidden_size),
356
+ nn.LayerNorm(hidden_size),
357
+ )
358
+ else:
359
+ self.to_patch_embeddings = nn.Conv2d(
360
+ in_channels=num_channels,
361
+ out_channels=hidden_size,
362
+ kernel_size=patch_size,
363
+ stride=patch_size,
364
+ bias=False,
365
+ )
366
+
367
+ def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
368
+ B, C, H, W = pixel_values.shape
369
+ assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
370
+ assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
371
+
372
+ if self.dpn:
373
+ patches = self.to_patch_embeddings(pixel_values)
374
+ else:
375
+ patches = self.to_patch_embeddings(pixel_values)
376
+ patches = patches.flatten(2).transpose(1, 2)
377
+
378
+ seq_len = patches.shape[1]
379
+ patch_dropout = int(math.ceil((1.0 - self.patch_dropout) * seq_len))
380
+
381
+ if self.sine_positional_embeddings:
382
+ position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.hidden_size, pixel_values.dtype, pixel_values.device)
383
+ else:
384
+ position_embeddings = self.positional_embeddings(self.position_ids)
385
+
386
+ if patch_dropout == seq_len or not self.training:
387
+ embeddings = patches + position_embeddings
388
+ elif self.good_dropout:
389
+ # Pick random patches to drop out
390
+ # The "good_dropout" variant uses random permutations for each batch item, but is slightly slower and involves more code
391
+
392
+ # The below method is a nice trick to generate a batch of random permutations.
393
+ # Torch (as of 1.13) doesn't have a built-in function to do this, and a for loop of torch.randperm is slow.
394
+ # Based on some benchmarks I measured the generation of the mask and the fetching to be only 50% slower than the non-"good_dropout" variant.
395
+ # And the time taken here is only a fraction of the time spent performing the embedding convolution.
396
+ # Generate a matrix of random numbers between 0 and 1 of shape (B, seq_len)
397
+ patch_mask = torch.rand(B, seq_len, device=patches.device)
398
+ # For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
399
+ patch_mask = torch.argsort(patch_mask, dim=1)
400
+ # Truncate
401
+ patch_mask = patch_mask[:, :patch_dropout]
402
+
403
+ embeddings = patches.gather(1, patch_mask.unsqueeze(-1).expand(-1, -1, self.hidden_size)) + position_embeddings[patch_mask]
404
+ else:
405
+ # The non-"good_dropout" variant uses a single random permutation for all batch items, but is faster and uses less code
406
+ indices = torch.randperm(seq_len, device=pixel_values.device)[:patch_dropout]
407
+ embeddings = patches[:, indices, :] + position_embeddings[indices.expand(1, -1)]
408
+
409
+ return embeddings
410
+
411
+
412
+ class MHAPoolingHead(nn.Module):
413
+ def __init__(self, hidden_size: int, num_attention_heads: int, activation_cls, out_dim: int, alt_style: bool, norm_qk: bool):
414
+ super().__init__()
415
+
416
+ self.out_dim = out_dim if not alt_style else hidden_size
417
+
418
+ self.probe = nn.Parameter(torch.randn(hidden_size))
419
+
420
+ self.mlp = CLIPMlp(hidden_size, 4 * hidden_size, activation_cls)
421
+ self.layer_norm = nn.LayerNorm(hidden_size)
422
+ self.pooling_head = nn.Linear(hidden_size, 1)
423
+
424
+ self.self_attn = FastCLIPAttention2(
425
+ hidden_size=hidden_size,
426
+ out_dim=self.out_dim,
427
+ num_attention_heads=num_attention_heads,
428
+ out_seq_len=1,
429
+ norm_qk=norm_qk,
430
+ )
431
+ self.mlp = CLIPMlp(self.out_dim, 4 * self.out_dim, activation_cls)
432
+ self.layer_norm1 = nn.LayerNorm(hidden_size)
433
+ self.layer_norm2 = nn.LayerNorm(self.out_dim)
434
+
435
+ if alt_style:
436
+ self.final_proj = nn.Linear(hidden_size, out_dim)
437
+ else:
438
+ self.final_proj = nn.Identity()
439
+
440
+ def forward(self, hidden_states: torch.Tensor):
441
+ hidden_states = self.layer_norm1(hidden_states)
442
+ query_states = self.probe.unsqueeze(0).unsqueeze(0).expand(hidden_states.size(0), 1, -1)
443
+
444
+ hidden_states = self.self_attn(query_states=query_states, kv_states=hidden_states)
445
+ # We don't use a residual connection here because the out_dim is different from the hidden_size
446
+
447
+ residual = hidden_states
448
+ hidden_states = self.layer_norm2(hidden_states)
449
+ hidden_states = self.mlp(hidden_states)
450
+ hidden_states = hidden_states + residual
451
+ hidden_states = self.final_proj(hidden_states)
452
+
453
+ return hidden_states.squeeze(1)
454
+
455
+
456
+ class GAPHead(nn.Module):
457
+ def __init__(self, hidden_size: int, out_dim: int):
458
+ super().__init__()
459
+
460
+ self.norm = nn.LayerNorm(hidden_size)
461
+ self.proj = nn.Linear(hidden_size, out_dim)
462
+
463
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
464
+ x = x.mean(dim=1)
465
+ x = self.norm(x)
466
+ x = self.proj(x)
467
+ return x
468
+
469
+
470
+ class CLIPLikeModel(VisionModel):
471
+ def __init__(
472
+ self,
473
+ n_tags: int,
474
+ embedding_dim: int,
475
+ num_attention_heads: int,
476
+ activation_cls,
477
+ num_channels: int,
478
+ image_size: int,
479
+ patch_size: int,
480
+ patch_dropout: float,
481
+ use_palm_alt: bool,
482
+ num_layers: int,
483
+ use_mha_alt: bool,
484
+ loss_type: str,
485
+ good_dropout: bool=False,
486
+ dpn: bool=False,
487
+ sine_positional_embeddings: bool=False,
488
+ norm_qk: bool = False,
489
+ no_wd_bias: bool = False,
490
+ use_gap_head: bool = False,
491
+ skip_init: Optional[float] = None,
492
+ stochastic_depth: Optional[float] = None,
493
+ ):
494
+ super().__init__(image_size, n_tags)
495
+
496
+ out_dim = n_tags
497
+ self.n_tags = n_tags
498
+ self.loss_type = loss_type
499
+ self.no_wd_bias = no_wd_bias
500
+
501
+ stochastic_depth_space = torch.linspace(0, stochastic_depth, num_layers) if stochastic_depth is not None else None
502
+
503
+ self.embedding_layer = CLIPEmbeddingLayer(embedding_dim, num_channels, image_size, patch_size, patch_dropout, good_dropout, dpn, sine_positional_embeddings)
504
+ self.pre_layer_norm = nn.LayerNorm(embedding_dim)
505
+ self.encoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
506
+ hidden_size=embedding_dim,
507
+ num_attention_heads=num_attention_heads,
508
+ out_seq_len=None,
509
+ activation_cls=activation_cls,
510
+ use_palm_alt=use_palm_alt,
511
+ norm_qk=norm_qk,
512
+ skip_init=skip_init,
513
+ stochastic_depth=stochastic_depth_space[i].item() if stochastic_depth_space is not None else None,
514
+ ) for i in range(num_layers)])
515
+
516
+ if use_gap_head:
517
+ self.pooling_head = GAPHead(embedding_dim, out_dim)
518
+ else:
519
+ self.pooling_head = MHAPoolingHead(embedding_dim, num_attention_heads, activation_cls, out_dim, use_mha_alt, norm_qk=norm_qk)
520
+
521
+ def forward(self, batch):
522
+ hidden_states = self.embedding_layer(batch['image'])
523
+ hidden_states = self.pre_layer_norm(hidden_states)
524
+
525
+ for layer in self.encoder_layers:
526
+ hidden_states = layer(hidden_states)
527
+
528
+ preds = self.pooling_head(hidden_states)
529
+
530
+ result = {
531
+ 'tags': preds,
532
+ }
533
+
534
+ return result
535
+
536
+ def calculate_loss(self, preds, batch, pos_weight):
537
+ return basic_calculate_loss(preds, batch, pos_weight, self.loss_type)
538
+
539
+ def get_optimized_parameters(self, lr: float):
540
+ if self.no_wd_bias:
541
+ return self.get_optimized_parameters_no_wd_bias()
542
+ else:
543
+ return self.parameters()
544
+
545
+ def get_optimized_parameters_no_wd_bias(self):
546
+ decay = []
547
+ no_decay = []
548
+
549
+ for name, param in self.named_parameters():
550
+ if not param.requires_grad:
551
+ continue
552
+
553
+ if len(param.shape) == 1 or name.endswith(".bias"):
554
+ no_decay.append(param)
555
+ print(f'No decay: {name}')
556
+ else:
557
+ decay.append(param)
558
+
559
+ return [
560
+ {'params': decay},
561
+ {'params': no_decay, 'weight_decay': 0.},
562
+ ]
563
+
564
+ def save(self):
565
+ return self.state_dict()
566
+
567
+ def load(self, state_dict):
568
+ self.load_state_dict(state_dict)
569
+
570
+
571
+ class MaskedAutoEncoderViT(nn.Module):
572
+ def __init__(
573
+ self,
574
+ n_tags: int,
575
+
576
+ embedding_dim: int,
577
+ num_attention_heads: int,
578
+ activation_cls,
579
+ num_channels: int,
580
+ image_size: int,
581
+ patch_size: int,
582
+ num_layers: int,
583
+ loss_type: str,
584
+ sine_positional_embeddings: bool=False,
585
+
586
+ decoder_embedding_dim: int = 512,
587
+ decoder_num_attention_heads: int = 8,
588
+ decoder_num_layers: int = 6,
589
+ decoder_force_projection: bool = False,
590
+
591
+ masking_ratio: float = 0.75,
592
+ mae_loss_weight: float = 1.0,
593
+ mae_normalize_targets: bool = False,
594
+ mae_post_norm: bool = False,
595
+ ):
596
+ super().__init__()
597
+
598
+ self.n_tags = n_tags
599
+ self.seq_len = (image_size // patch_size) ** 2
600
+ self.embedding_dim = embedding_dim
601
+ self.decoder_embedding_dim = decoder_embedding_dim
602
+ self.sine_positional_embeddings = sine_positional_embeddings
603
+ self.image_size = image_size
604
+ self.patch_size = patch_size
605
+ self.masking_ratio = masking_ratio
606
+ self.loss_type = loss_type
607
+ self.mae_loss_weight = mae_loss_weight
608
+ self.mae_normalize_targets = mae_normalize_targets
609
+
610
+ if not self.sine_positional_embeddings:
611
+ self.positional_embeddings = nn.Embedding(self.seq_len, embedding_dim)
612
+ self.decoder_positional_embeddings = nn.Embedding(self.seq_len, decoder_embedding_dim)
613
+ self.register_buffer("position_ids", torch.arange(self.seq_len))
614
+
615
+ self.to_patches = Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size)
616
+ self.patch_embedder = nn.Linear(num_channels * patch_size * patch_size, embedding_dim)
617
+
618
+ # Encoder
619
+ self.pre_layer_norm = nn.LayerNorm(embedding_dim)
620
+ self.encoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
621
+ hidden_size=embedding_dim,
622
+ num_attention_heads=num_attention_heads,
623
+ out_seq_len=None,
624
+ activation_cls=activation_cls,
625
+ use_palm_alt=True,
626
+ norm_qk=False,
627
+ skip_init=None,
628
+ ) for _ in range(num_layers)])
629
+
630
+ # Head for classification
631
+ self.pooling_head = GAPHead(embedding_dim, n_tags)
632
+
633
+ # Decoder
634
+ if embedding_dim != decoder_embedding_dim or decoder_force_projection:
635
+ self.encoder_to_decoder_proj = nn.Linear(embedding_dim, decoder_embedding_dim)
636
+ else:
637
+ self.encoder_to_decoder_proj = nn.Identity()
638
+ self.decoder_pre_layer_norm = nn.LayerNorm(decoder_embedding_dim)
639
+ self.decoder_layers = nn.ModuleList([FastCLIPEncoderLayer(
640
+ hidden_size=decoder_embedding_dim,
641
+ num_attention_heads=decoder_num_attention_heads,
642
+ out_seq_len=None,
643
+ activation_cls=activation_cls,
644
+ use_palm_alt=True,
645
+ norm_qk=False,
646
+ skip_init=None,
647
+ ) for _ in range(decoder_num_layers)])
648
+
649
+ if mae_post_norm:
650
+ self.decoder_to_pixel_values = nn.Sequential(
651
+ nn.LayerNorm(decoder_embedding_dim),
652
+ nn.Linear(decoder_embedding_dim, num_channels * patch_size * patch_size)
653
+ )
654
+ else:
655
+ self.decoder_to_pixel_values = nn.Linear(decoder_embedding_dim, num_channels * patch_size * patch_size)
656
+ self.mask_token = nn.Parameter(torch.zeros(decoder_embedding_dim))
657
+ torch.nn.init.normal_(self.mask_token, std=0.02)
658
+
659
+ def forward(self, batch):
660
+ pixel_values = batch['image']
661
+ device = pixel_values.device
662
+ B, C, H, W = pixel_values.shape
663
+ assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
664
+ assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
665
+
666
+ # Convert image to patches (B, seq_len, C * patch_size * patch_size)
667
+ patches = self.to_patches(pixel_values)
668
+ seq_len = patches.shape[1]
669
+ num_masked = int(self.masking_ratio * seq_len)
670
+
671
+ # For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
672
+ # From this we can get the masked and unmasked indices
673
+ patch_mask = torch.rand(B, seq_len, device=device)
674
+ patch_mask = torch.argsort(patch_mask, dim=1)
675
+ masked_indices, unmasked_indices = patch_mask[:, :num_masked], patch_mask[:, num_masked:]
676
+ batch_range = torch.arange(B, device=device)[:, None]
677
+
678
+ # Masked and unmasked patches
679
+ unmasked_patches = patches[batch_range, unmasked_indices]
680
+ masked_patches = patches[batch_range, masked_indices]
681
+
682
+ # Embed unmasked patches for the encoder (B, seq_len, embedding_dim)
683
+ tokens = self.patch_embedder(unmasked_patches)
684
+
685
+ if self.sine_positional_embeddings:
686
+ position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.embedding_dim, pixel_values.dtype, device)
687
+ decoder_position_embeddings = sinusoidal_position_embedding(W // self.patch_size, H // self.patch_size, self.decoder_embedding_dim, pixel_values.dtype, device)
688
+ else:
689
+ position_embeddings = self.positional_embeddings(self.position_ids)
690
+ decoder_position_embeddings = self.decoder_positional_embeddings(self.position_ids)
691
+
692
+ # Add position embeddings
693
+ tokens = tokens + position_embeddings[unmasked_indices]
694
+
695
+ # Run the encoder
696
+ encoded_tokens = self.pre_layer_norm(tokens)
697
+
698
+ for layer in self.encoder_layers:
699
+ encoded_tokens = layer(encoded_tokens)
700
+
701
+ # Label predictions
702
+ if self.training:
703
+ preds = self.pooling_head(encoded_tokens)
704
+ else:
705
+ # During inference, classify using the entire image
706
+ # But we'll do the usual for the MAE part, just so we can see how MAE is performing during validation
707
+ tokens = self.patch_embedder(patches)
708
+ tokens = tokens + position_embeddings
709
+ tokens = self.pre_layer_norm(tokens)
710
+ for layer in self.encoder_layers:
711
+ tokens = layer(tokens)
712
+ preds = self.pooling_head(tokens)
713
+
714
+ # Projection for the decoder and position embeddings
715
+ decoder_tokens = self.encoder_to_decoder_proj(encoded_tokens)
716
+ decoder_tokens = decoder_tokens + decoder_position_embeddings[unmasked_indices]
717
+
718
+ # Fill in the masked patches
719
+ mask_tokens = einops.repeat(self.mask_token, 'd -> b n d', b = B, n = num_masked)
720
+ mask_tokens = mask_tokens + decoder_position_embeddings[masked_indices]
721
+ decoder_tokens = torch.cat([decoder_tokens, mask_tokens], dim=1)
722
+
723
+ # Run the decoder
724
+ decoded_tokens = self.decoder_pre_layer_norm(decoder_tokens)
725
+
726
+ for layer in self.decoder_layers:
727
+ decoded_tokens = layer(decoded_tokens)
728
+
729
+ # Only predict the masked patches
730
+ # All the masked patches are at the end of the sequence
731
+ decoded_tokens = decoded_tokens[:, -num_masked:]
732
+ pred_pixel_values = self.decoder_to_pixel_values(decoded_tokens)
733
+
734
+ # Calculate the mae loss
735
+ if self.mae_normalize_targets:
736
+ # Normalize each patch by its mean and variance. The ViCHA paper says this provides better results
737
+ means = masked_patches.mean(dim=-1, keepdim=True)
738
+ vars = masked_patches.var(dim=-1, keepdim=True)
739
+ target = (masked_patches - means) / (vars + 1e-6)**0.5
740
+ mae_loss = F.mse_loss(pred_pixel_values, target)
741
+ else:
742
+ mae_loss = F.mse_loss(pred_pixel_values, masked_patches)
743
+ mae_loss = mae_loss * self.mae_loss_weight
744
+
745
+ return {
746
+ 'tags': preds,
747
+ 'mae_loss': mae_loss,
748
+ }
749
+
750
+ def calculate_loss(self, preds, batch, pos_weight):
751
+ return basic_calculate_loss(preds, batch, pos_weight, self.loss_type) + preds['mae_loss']
752
+
753
+ def get_optimized_parameters(self, lr: float):
754
+ return self.parameters()
755
+
756
+ def save(self):
757
+ return self.state_dict()
758
+
759
+ def load(self, state_dict):
760
+ self.load_state_dict(state_dict)
761
+
762
+
763
+ class StochDepth(nn.Module):
764
+ def __init__(self, drop_rate: float, scale_by_keep: bool = False):
765
+ super().__init__()
766
+ self.drop_rate = drop_rate
767
+ self.scale_by_keep = scale_by_keep
768
+
769
+ def forward(self, x):
770
+ if not self.training:
771
+ return x
772
+
773
+ batch_size = x.shape[0]
774
+ r = torch.rand((batch_size, 1, 1), device=x.device)
775
+ keep_prob = 1 - self.drop_rate
776
+ binary_tensor = torch.floor(keep_prob + r)
777
+ if self.scale_by_keep:
778
+ x = x / keep_prob
779
+
780
+ return x * binary_tensor
781
+
782
+
783
+ class SkipInitChannelwise(nn.Module):
784
+ def __init__(self, channels, init_val=1e-6):
785
+ super().__init__()
786
+ self.channels = channels
787
+ self.init_val = init_val
788
+ self.skip = nn.Parameter(torch.ones(channels) * init_val)
789
+
790
+ def forward(self, x):
791
+ return x * self.skip
792
+
793
+
794
+ class PosEmbedding(nn.Module):
795
+ def __init__(self, d_model: int, max_len: int, use_sine: bool, patch_size: int):
796
+ super().__init__()
797
+ self.d_model = d_model
798
+ self.max_len = max_len
799
+ self.use_sine = use_sine
800
+ self.patch_size = patch_size
801
+
802
+ if not self.use_sine:
803
+ self.embedding = nn.Embedding(max_len, d_model)
804
+ nn.init.trunc_normal_(self.embedding.weight, std=0.02)
805
+ self.register_buffer("position_ids", torch.arange(max_len))
806
+
807
+ def forward(self, x, width: int, height: int):
808
+ if self.use_sine:
809
+ position_embeddings = sinusoidal_position_embedding(width // self.patch_size, height // self.patch_size, self.d_model, x.dtype, x.device)
810
+ else:
811
+ position_embeddings = self.embedding(self.position_ids)
812
+
813
+ return x + position_embeddings
814
+
815
+
816
+ class MLPBlock(nn.Module):
817
+ def __init__(self, d_model: int, d_ff: int, stochdepth_rate: float):
818
+ super().__init__()
819
+ self.linear1 = nn.Linear(d_model, d_ff)
820
+ self.linear2 = nn.Linear(d_ff, d_model)
821
+ self.activation = nn.GELU()
822
+ if stochdepth_rate > 0:
823
+ self.stochdepth = StochDepth(stochdepth_rate, scale_by_keep=True)
824
+ else:
825
+ self.stochdepth = None
826
+
827
+ def forward(self, x):
828
+ x = self.linear1(x)
829
+ x = self.activation(x)
830
+ if self.stochdepth is not None:
831
+ x = self.stochdepth(x)
832
+ x = self.linear2(x)
833
+ return x
834
+
835
+
836
+ class ViTBlock(nn.Module):
837
+ def __init__(self, num_heads: int, d_model: int, d_ff: int, layerscale_init: float, stochdepth_rate: float):
838
+ super().__init__()
839
+ self.num_heads = num_heads
840
+ self.d_model = d_model
841
+
842
+ assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
843
+
844
+ # MHA
845
+ self.norm1 = nn.LayerNorm(d_model)
846
+ self.qkv_proj = nn.Linear(d_model, d_model * 3)
847
+ self.out_proj = nn.Linear(d_model, d_model)
848
+ self.skip_init1 = SkipInitChannelwise(channels=d_model, init_val=layerscale_init)
849
+ self.stochdepth1 = StochDepth(stochdepth_rate, scale_by_keep=True) if stochdepth_rate > 0 else None
850
+
851
+ # MLP
852
+ self.norm2 = nn.LayerNorm(d_model)
853
+ self.mlp = MLPBlock(d_model, d_ff, stochdepth_rate)
854
+ self.skip_init2 = SkipInitChannelwise(channels=d_model, init_val=layerscale_init)
855
+ self.stochdepth2 = StochDepth(stochdepth_rate, scale_by_keep=True) if stochdepth_rate > 0 else None
856
+
857
+ def forward(self, x):
858
+ bsz, src_len, embed_dim = x.shape
859
+
860
+ out = x
861
+ out = self.norm1(out)
862
+
863
+ # MHA
864
+ qkv_states = self.qkv_proj(out).split(self.d_model, dim=-1)
865
+ q_states = qkv_states[0].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
866
+ k_states = qkv_states[1].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
867
+ v_states = qkv_states[2].view(bsz, src_len, self.num_heads, embed_dim // self.num_heads).transpose(1, 2) # (bsz, num_heads, src_len, embed_dim // num_heads)
868
+
869
+ with torch.backends.cuda.sdp_kernel(enable_math=False):
870
+ out = F.scaled_dot_product_attention(q_states, k_states, v_states) # (bsz, num_heads, tgt_len, head_dim)
871
+ out = out.transpose(1, 2).contiguous().view(bsz, src_len, embed_dim) # (bsz, tgt_len, embed_dim)
872
+
873
+ out = self.out_proj(out)
874
+
875
+ out = self.skip_init1(out)
876
+ if self.stochdepth1 is not None:
877
+ out = self.stochdepth1(out)
878
+ x = out + x
879
+
880
+ out = self.norm2(x)
881
+ out = self.mlp(out)
882
+ out = self.skip_init2(out)
883
+ if self.stochdepth2 is not None:
884
+ out = self.stochdepth2(out)
885
+
886
+ out = out + x
887
+
888
+ return out
889
+
890
+
891
+ def CaiT_LayerScale_init(network_depth):
892
+ if network_depth <= 18:
893
+ return 1e-1
894
+ elif network_depth <= 24:
895
+ return 1e-5
896
+ else:
897
+ return 1e-6
898
+
899
+
900
+ class CNNLayerNorm(nn.Module):
901
+ def __init__(self, d_model: int):
902
+ super().__init__()
903
+ self.norm = nn.LayerNorm(d_model)
904
+
905
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
906
+ x = x.transpose(1, 3)
907
+ x = self.norm(x)
908
+ x = x.transpose(1, 3)
909
+ return x
910
+
911
+
912
+ class CNNStem(nn.Module):
913
+ def __init__(self, config: str):
914
+ super().__init__()
915
+ self.config = config
916
+
917
+ layers = []
918
+ channels = 3
919
+
920
+ for line in config.split(";"):
921
+ ty, line = line.split(":") if ":" in line else (line, "")
922
+ options = line.split(",")
923
+ options = [o.split("=") for o in options] if line else []
924
+ options = {k: v for k, v in options}
925
+
926
+ if ty == 'conv':
927
+ layers.append(nn.Conv2d(
928
+ in_channels=channels,
929
+ out_channels=int(options['c']),
930
+ kernel_size=int(options['k'] if 'k' in options else 3),
931
+ stride=int(options['s'] if 's' in options else 2),
932
+ bias=True,
933
+ padding=int(options['p'] if 'p' in options else 1),
934
+ ))
935
+ channels = int(options['c'])
936
+ elif ty == 'bn':
937
+ layers.append(nn.BatchNorm2d(channels))
938
+ elif ty == 'ln':
939
+ layers.append(CNNLayerNorm(channels))
940
+ elif ty == 'relu':
941
+ layers.append(nn.ReLU())
942
+ elif ty == 'gelu':
943
+ layers.append(nn.GELU())
944
+
945
+ self.conv = nn.Sequential(*layers)
946
+
947
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
948
+ return self.conv(x)
949
+
950
+
951
+ class ViT(VisionModel):
952
+ def __init__(self,
953
+ n_tags: int,
954
+ image_size: int,
955
+ num_blocks: int,
956
+ patch_size: int,
957
+ d_model: int,
958
+ mlp_dim: int,
959
+ num_heads: int,
960
+ stochdepth_rate: float,
961
+ use_sine: bool,
962
+ loss_type: str,
963
+ layerscale_init: Optional[float] = None,
964
+ head_mean_after: bool = False,
965
+ cnn_stem: str | None = None,
966
+ patch_dropout: float = 0.0,
967
+ ):
968
+ super().__init__(image_size, n_tags)
969
+
970
+ #assert image_size % patch_size == 0, "image_size must be divisible by patch_size"
971
+ assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
972
+
973
+ out_dim = n_tags
974
+ self.n_tags = n_tags
975
+ self.loss_type = loss_type
976
+ self.patch_size = patch_size
977
+ self.head_mean_after = head_mean_after
978
+ self.patch_dropout = patch_dropout
979
+
980
+ layerscale_init = CaiT_LayerScale_init(num_blocks) if layerscale_init is None else layerscale_init
981
+ self.patch_embeddings = nn.Conv2d(
982
+ in_channels=3,
983
+ out_channels=d_model,
984
+ kernel_size=patch_size,
985
+ stride=patch_size,
986
+ bias=True,
987
+ ) if cnn_stem is None else CNNStem(cnn_stem)
988
+ self.pos_embedding = PosEmbedding(d_model, (image_size // patch_size) ** 2, use_sine=use_sine, patch_size=patch_size)
989
+
990
+ self.blocks = nn.ModuleList([
991
+ ViTBlock(num_heads, d_model, mlp_dim, layerscale_init, stochdepth_rate)
992
+ for _ in range(num_blocks)
993
+ ])
994
+
995
+ self.norm = nn.LayerNorm(d_model)
996
+ self.head = nn.Linear(d_model, out_dim)
997
+
998
+ def forward(self, batch, return_embeddings=False, return_loss: bool = False, pos_weight = None):
999
+ B, C, H, W = batch['image'].shape
1000
+ assert H % self.patch_size == 0, f"Input image height ({H}) needs to be divisible by the patch size ({self.patch_size})."
1001
+ assert W % self.patch_size == 0, f"Input image width ({W}) needs to be divisible by the patch size ({self.patch_size})."
1002
+
1003
+ x = self.patch_embeddings(batch['image']) # (bsz, d_model, patch_num, patch_num)
1004
+ x = x.flatten(2).transpose(1, 2) # (bsz, patch_num ** 2, d_model)
1005
+ x = self.pos_embedding(x, W, H) # (bsz, patch_num ** 2, d_model)
1006
+
1007
+ # Patch dropout
1008
+ seq_len = x.shape[1]
1009
+ patch_dropout = int(math.ceil((1.0 - self.patch_dropout) * seq_len))
1010
+
1011
+ if patch_dropout != seq_len:
1012
+ # Generate a matrix of random numbers between 0 and 1 of shape (B, seq_len)
1013
+ patch_mask = torch.rand(B, seq_len, device=x.device)
1014
+ # For each batch tensor, use argsort to convert the random numbers into a permutation of the patch indices
1015
+ patch_mask = torch.argsort(patch_mask, dim=1)
1016
+ # Truncate
1017
+ patch_mask = patch_mask[:, :patch_dropout]
1018
+
1019
+ x = x.gather(1, patch_mask.unsqueeze(-1).expand(-1, -1, x.shape[-1]))
1020
+
1021
+ #indices = torch.randperm(seq_len, device=x.device)[:patch_dropout]
1022
+ #x = x[:, indices, :]
1023
+
1024
+ # Transformer
1025
+ for block in self.blocks:
1026
+ x = block(x)
1027
+
1028
+ # Head
1029
+ result = {}
1030
+
1031
+ x = self.norm(x)
1032
+ if self.head_mean_after:
1033
+ x = self.head(x)
1034
+ x = x.mean(dim=1)
1035
+ else:
1036
+ x = x.mean(dim=1)
1037
+ if return_embeddings:
1038
+ result['embeddings'] = x
1039
+ x = self.head(x)
1040
+
1041
+ result['tags'] = x
1042
+
1043
+ if return_loss:
1044
+ result['loss'] = self.calculate_loss(result, batch, pos_weight)
1045
+
1046
+ return result
1047
+
1048
+ def calculate_loss(self, preds, batch, pos_weight):
1049
+ return basic_calculate_loss(preds, batch, pos_weight, self.loss_type)
1050
+
1051
+ def get_optimized_parameters(self, lr: float):
1052
+ return self.parameters()
1053
+
1054
+ def save(self):
1055
+ return self.state_dict()
1056
+
1057
+ def load(self, state_dict):
1058
+ if 'head.weight' in state_dict and 'head.bias' in state_dict and state_dict['head.weight'].shape[0] == (self.n_tags + 9):
1059
+ # Support old models which included 3 rating and 6 score dimensions
1060
+ state_dict['head.weight'] = state_dict['head.weight'][:self.n_tags]
1061
+ state_dict['head.bias'] = state_dict['head.bias'][:self.n_tags]
1062
+
1063
+ self.load_state_dict(state_dict)
1064
+
1065
+
1066
+ from PIL import Image
1067
+ import torchvision.transforms.functional as TVF
1068
+ from torchvision import transforms
1069
+
1070
+ def prepare_image(image: Image.Image, target_size: int) -> torch.Tensor:
1071
+ # Pad image to square
1072
+ image_shape = image.size
1073
+ max_dim = max(image_shape)
1074
+ pad_left = (max_dim - image_shape[0]) // 2
1075
+ pad_top = (max_dim - image_shape[1]) // 2
1076
+
1077
+ padded_image = Image.new('RGB', (max_dim, max_dim), (255, 255, 255))
1078
+ padded_image.paste(image, (pad_left, pad_top))
1079
+
1080
+ # Resize image
1081
+ if max_dim != target_size:
1082
+ padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
1083
+
1084
+ # Convert to tensor
1085
+ image_tensor = TVF.pil_to_tensor(padded_image) / 255.0
1086
+
1087
+ # Normalize
1088
+ image_tensor = TVF.normalize(image_tensor, mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
1089
+
1090
+ return image_tensor
1091
+
1092
+
1093
+ @torch.no_grad()
1094
+ def predict(image: Image.Image, model, top_tags, THRESHOLD=0.4):
1095
+ image = transforms.ToPILImage()(image.permute(2, 0, 1))
1096
+ image_tensor = prepare_image(image, model.image_size)
1097
+ batch = {
1098
+ 'image': image_tensor.unsqueeze(0).to('cuda'),
1099
+ }
1100
+
1101
+ with torch.amp.autocast_mode.autocast('cuda', enabled=True):
1102
+ preds = model(batch)
1103
+ tag_preds = preds['tags'].sigmoid().cpu()
1104
+
1105
+ scores = {top_tags[i]: tag_preds[0][i] for i in range(len(top_tags))}
1106
+ predicted_tags = [tag for tag, score in scores.items() if score > THRESHOLD]
1107
+ tag_string = ', '.join(predicted_tags)
1108
+
1109
+ return tag_string, scores
custom_nodes/ComfyUI-N-Nodes/libs/migrate.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ def migrate_workflow(input_file_path):
5
+ try:
6
+ file_name, file_extension = os.path.splitext(input_file_path)
7
+
8
+ output_file_path = f"{file_name}_migrated.json"
9
+
10
+ pre_list = ('LoadVideo', 'SaveVideo','FrameInterpolator', 'LoadFramesFromFolder','SetMetadataForSaveVideo','GPT Loader Simple','GPTSampler','String Variable','Integer Variable','Float Variable','DynamicPrompt')
11
+ post_list= ('LoadVideo [n-suite]', 'SaveVideo [n-suite]','FrameInterpolator [n-suite]', 'LoadFramesFromFolder [n-suite]','SetMetadataForSaveVideo [n-suite]','GPT Loader Simple [n-suite]','GPT Sampler [n-suite]','String Variable [n-suite]','Integer Variable [n-suite]','Float Variable [n-suite]','DynamicPrompt [n-suite]')
12
+ replacements = list(zip(pre_list, post_list))
13
+
14
+ with open(input_file_path, 'r') as input_file:
15
+ content = input_file.read()
16
+
17
+ # s&r
18
+ for old, new in replacements:
19
+ content = content.replace(f'"Node name for S&R": "{old}"', f'"Node name for S&R": "{new}"')
20
+ #type
21
+ for old, new in replacements:
22
+ content = content.replace(f'"type": "{old}"', f'"type": "{new}"')
23
+
24
+ with open(output_file_path, 'w') as output_file:
25
+ output_file.write(content)
26
+
27
+ print("Replacement completed successfully.")
28
+ except Exception as e:
29
+ print(f"An error occurred: {str(e)}")
30
+
31
+ if __name__ == "__main__":
32
+ print(len(sys.argv))
33
+ if len(sys.argv) != 2:
34
+ print("Error: Provide the path of the text file to migrate.")
35
+ sys.exit(1)
36
+
37
+ file_path = sys.argv[1]
38
+
39
+ if not os.path.isfile(file_path):
40
+ print(f"Error: The file {file_path} does not exist.")
41
+ sys.exit(1)
42
+
43
+ migrate_workflow(file_path)
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .venv
2
+ __pycache__
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## commit 38af98596e59f2a6c25c6b52b2bd5a672dab4144
2
+
3
+ # 🌔 moondream
4
+
5
+ a tiny vision language model that kicks ass and runs anywhere
6
+
7
+ ## moondream1
8
+
9
+ 1.6B parameter model built using SigLIP, Phi-1.5 and the LLaVA training dataset.
10
+ Weights are licensed under CC-BY-SA due to using the LLaVA dataset. Try it out
11
+ on [Huggingface Spaces](https://huggingface.co/spaces/vikhyatk/moondream1)!
12
+
13
+ **Benchmarks**
14
+
15
+ | Model | Parameters | VQAv2 | GQA | VizWiz | TextVQA |
16
+ | --- | --- | --- | --- | --- | --- |
17
+ | LLaVA-1.5 | 13.3B | 80.0 | 63.3 | 53.6 | 61.3 |
18
+ | LLaVA-1.5 | 7.3B | 78.5 | 62.0 | 50.0 | 58.2 |
19
+ | [MC-LLaVA-3B](https://huggingface.co/visheratin/MC-LLaVA-3b) | 3B | 64.2 | 49.6 | 24.9 | 38.6 |
20
+ | [LLaVA-Phi](https://arxiv.org/pdf/2401.02330.pdf) | 3B | 71.4 | - | 35.9 | 48.6 |
21
+ | **moondream1** | 1.6B | 74.3 | 56.3 | 30.3 | 39.8 |
22
+
23
+ **Examples**
24
+
25
+ | Image | Examples |
26
+ | --- | --- |
27
+ | ![](assets/demo-1.jpg) | **What is the title of this book?**<br>The Little Book of Deep Learning<br><br>**What can you tell me about this book?**<br>The book appears to be a white booklet titled "The Little Book of Deep Learning." It is held in a person's hand, and it seems to be a personal possession. The book's content focuses on the basics of deep learning, which is a field of artificial intelligence that uses neural networks to process and analyze data. It is likely that the book provides an introduction to the concepts and techniques involved in deep learning, making it accessible for beginners and helping them understand the fundamentals of this advanced machine learning approach.<br><br>**Where is the person standing?**<br>The person is standing on a balcony or a deck, which is located outside the house. |
28
+ | ![](assets/demo-2.jpg) | **What type of food is the girl holding?**<br>The girl is holding a large hamburger or burger, which is a type of sandwich made from ground meat, typically consisting of a beef patty, and is usually served between two slices of bread.<br><br>**What color is the woman's hair?**<br>The woman's hair is white.<br><br>**What is the girl doing in the image?**<br>The girl in the image is eating a hamburger. |
29
+ | ![](assets/demo-3.jpg) | **What color is the train?**<br>The train is red.<br><br>**What is the bus doing in the image?**<br>The bus is driving down a street, passing through an intersection, and traveling on a train track.<br><br>**What kind of public transportation is in the image?**<br>The image features a red trolley or commuter train on a city street, which is a form of public transportation. |
30
+ | ![](assets/demo-4.jpg) | **What is the girl looking at?**<br>The girl is looking at her reflection in the mirror while adjusting her uniform.<br><br>**What color is the woman's hair?**<br>The woman's hair is brown.<br><br>**What is the woman doing in the mirror?**<br>The woman is adjusting her hair in the mirror. |
31
+ | ![](assets/demo-5.jpg) | **What might be the reason for the dog's aggressive behavior?**<br>The dog's aggressive behavior, with its teeth bared and growling, could be due to several reasons. It is possible that the dog is feeling threatened, scared, or defensive in its current environment, such as a room with a person it doesn't know well or a situation that provokes it. Alternatively, the dog might be reacting to a perceived threat or discomfort from the person holding it. It is essential to assess the situation and the dog's body language to determine the exact cause of its aggressive behavior and respond accordingly to ensure the safety and well-being of both the dog and the person involved.<br><br>**What kind of dog is in the picture?**<br>The picture features a small dog, possibly a Chihuahua, with red eyes and a mean, hungry-looking expression.<br><br>**What color is the dog?**<br>The dog is black and white. |
32
+
33
+ **Usage**
34
+
35
+ Clone this repository and install the dependencies:
36
+
37
+ ```bash
38
+ pip install -r requirements.txt
39
+ ```
40
+
41
+ Use the `sample.py` script to run the model on CPU:
42
+
43
+ ```bash
44
+ python sample.py --image [IMAGE_PATH] --prompt [PROMPT]
45
+ ```
46
+
47
+ When the `--prompt` argument is not provided, the script will allow you to ask
48
+ questions interactively.
49
+
50
+ **Gradio demo**
51
+
52
+ Use the `gradio_demo.py` script to run the gradio app:
53
+
54
+ ```python
55
+ python gradio_demo.py
56
+ ```
57
+
58
+
59
+ **Limitations**
60
+
61
+ * The model may generate inaccurate statements.
62
+ * It may struggle to adhere to intricate or nuanced instructions.
63
+ * It is primarily designed to understand English. Informal English, slang, and
64
+ non-English languages may not work well.
65
+ * The model may not be free from societal biases. Users should be aware of this
66
+ and exercise caution and critical thinking when using the model.
67
+ * The model may generate offensive, inappropriate, or hurtful content if it is
68
+ prompted to do so.
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/_sample.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from PIL import Image
3
+ from moondream import VisionEncoder, TextModel
4
+ from huggingface_hub import snapshot_download
5
+
6
+ def main(image_path, prompt):
7
+ model_path = snapshot_download("vikhyatk/moondream1")
8
+ vision_encoder = VisionEncoder(model_path)
9
+ text_model = TextModel(model_path)
10
+ image = Image.open(image_path)
11
+ image_embeds = vision_encoder(image)
12
+
13
+ if prompt is None:
14
+ while True:
15
+ question = input("> ")
16
+ print(text_model.answer_question(image_embeds, question))
17
+ else:
18
+ print(">", prompt)
19
+ print(text_model.answer_question(image_embeds, prompt))
20
+
21
+ if __name__ == "__main__":
22
+ parser = argparse.ArgumentParser()
23
+ parser.add_argument("--image", type=str, required=True)
24
+ parser.add_argument("--prompt", type=str, required=False)
25
+ args = parser.parse_args()
26
+ main(args.image, args.prompt)
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-1.jpg ADDED

Git LFS Details

  • SHA256: 8bab1f04ec7187fcc8cc6a66c8242a6d85d652fe78dddbf32e443901b03b09b3
  • Pointer size: 131 Bytes
  • Size of remote file: 285 kB
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-2.jpg ADDED

Git LFS Details

  • SHA256: 52bdf639be45fe501c1bb31dd13df7d4726de32bb676aa4fc84771a4b8242a21
  • Pointer size: 131 Bytes
  • Size of remote file: 139 kB
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-3.jpg ADDED
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-4.jpg ADDED
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/assets/demo-5.jpg ADDED
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/gradio_demo.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from moondream import VisionEncoder, TextModel
4
+ from huggingface_hub import snapshot_download
5
+ from threading import Thread
6
+ from transformers import TextIteratorStreamer
7
+
8
+ model_path = snapshot_download("vikhyatk/moondream1")
9
+ vision_encoder = VisionEncoder(model_path)
10
+ text_model = TextModel(model_path)
11
+
12
+ def moondream(img, prompt):
13
+ image_embeds = vision_encoder(img)
14
+ streamer = TextIteratorStreamer(text_model.tokenizer, skip_special_tokens=True)
15
+ thread = Thread(target=text_model.answer_question, kwargs={
16
+ "image_embeds": image_embeds, "question": prompt, "streamer": streamer})
17
+ thread.start()
18
+
19
+ buffer = ""
20
+ for new_text in streamer:
21
+ clean_text = re.sub("<$|END$", "", new_text)
22
+ buffer += clean_text
23
+ yield buffer.strip("<END")
24
+
25
+ with gr.Blocks() as demo:
26
+ gr.Markdown("# 🌔 moondream \n ### A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)")
27
+ with gr.Row():
28
+ prompt = gr.Textbox(label='Input Prompt', placeholder='Type here...', scale=4)
29
+ submit = gr.Button('Submit')
30
+ with gr.Row():
31
+ img = gr.Image(type='pil', label='Upload an Image')
32
+ output = gr.TextArea(label="Response", info='Please wait for a few seconds..')
33
+ submit.click(moondream, [img, prompt], output)
34
+ prompt.submit(moondream, [img, prompt], output)
35
+
36
+ demo.queue().launch(debug=True)
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ ---
5
+
6
+ Check out the [moondream repository on GitHub](https://github.com/vikhyat/moondream) for inference code and other details.
custom_nodes/ComfyUI-N-Nodes/libs/moondream_repo/moondream/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .vision_encoder import VisionEncoder
2
+ from .text_model import TextModel