AliMc2021 commited on
Commit
d9b8e9c
·
1 Parent(s): 8b12cb3

add application files

Browse files
Files changed (12) hide show
  1. .gitattributes +36 -35
  2. .gitignore +158 -0
  3. LICENSE +21 -0
  4. README.md +83 -13
  5. app.py +206 -0
  6. database.py +132 -0
  7. download.py +40 -0
  8. functions.py +270 -0
  9. generate.py +80 -0
  10. lib.txt +15 -0
  11. model.py +74 -0
  12. train.py +136 -0
.gitattributes CHANGED
@@ -1,35 +1,36 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/** filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ lab/
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+ database.db
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113
+ .pdm.toml
114
+ .pdm-python
115
+ .pdm-build/
116
+
117
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118
+ __pypackages__/
119
+
120
+ # Celery stuff
121
+ celerybeat-schedule
122
+ celerybeat.pid
123
+
124
+ # SageMath parsed files
125
+ *.sage.py
126
+
127
+ # Environments
128
+ .env
129
+ .venv
130
+ env/
131
+ venv/
132
+ ENV/
133
+ env.bak/
134
+ venv.bak/
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Alireza
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,83 @@
1
- ---
2
- title: Gpt Tools
3
- emoji: 👁
4
- colorFrom: gray
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.12.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPT Tools
2
+
3
+ ![GUI)](https://github.com/user-attachments/assets/6a845c99-6ecc-422f-b662-8069cb5c2324)
4
+
5
+ ---
6
+ این پروژه یک رابط کاربری گرافیکی زیبا و کاربردی برای تولید متن، کد، داستان‌های تعاملی و ارزیابی مدل‌های مختلف مانند GPT-2 و CodeGen ارائه می‌دهد. با استفاده از این ابزار می‌توانید به راحتی مدل‌های زبان طبیعی را مدیریت و از خروجی‌های آن بهره‌برداری کنید
7
+
8
+ ---
9
+
10
+ ## **🚨 Requirements**
11
+
12
+ این پروژه برای اجرا نیاز به **پایتون نسخه 3.8.6** دارد. لطفاً اطمینان حاصل کنید که نسخه صحیح پایتون روی سیستم شما نصب است.
13
+ برای بررسی نسخه پایتون، دستور زیر را در خط فرمان اجرا کنید:
14
+ ```bash
15
+ python --version
16
+ ```
17
+
18
+ ---
19
+
20
+ ## **💫 Main features**
21
+
22
+ - تولید متن: تولید متن‌های خلاقانه با استفاده از مدل‌های مختلف GPT-2.
23
+ - تولید کد: تولید کدهای برنامه‌نویسی با مدل CodeGen از طریق ورودی‌های توصیفی.
24
+ - داستان‌های تعاملی: ایجاد داستان‌های سفارشی و خلاقانه با همکاری مدل.
25
+ - مدیریت مدل‌ها: دانلود و ذخیره مدل‌ها در مسیرهای سفارشی.
26
+ - آموزش مدل‌ها: آموزش مجدد مدل‌ها با داده‌های دلخواه و ذخیره‌سازی تغییرات.
27
+ ---
28
+ ## **📁 Project Structure**
29
+ ```bash
30
+ .
31
+ ├── app.py # رابط کاربری گرافیکی (Gradio)
32
+ ├── model.py # مدیریت و بارگذاری مدل‌ها
33
+ ├── generate.py # منطق تولید متن و کد
34
+ ├── train.py # آموزش مجدد مدل‌ها
35
+ ├── database.py # مدیریت پایگاه داده برای ذخیره ورودی‌ها
36
+ ├── models/ # مسیر پیش‌فرض برای ذخیره مدل‌ها
37
+ └── lib.txt # لیست کتابخانه‌های موردنیاز
38
+ ```
39
+ ---
40
+ ## **🚀 Installation and setup**
41
+
42
+ ### **نصب پایتون 3.8.6**
43
+ اگر پایتون نسخه 3.8.6 روی سیستم شما نصب نیست، از صفحه دانلود آن را نصب کنید.
44
+ در سیستم‌های لینوکسی می‌توانید از دستورات زیر استفاده کنید
45
+ ```bash
46
+ sudo apt update
47
+ sudo apt install python3.8
48
+ ```
49
+ ---
50
+ ### **کلون کردن**
51
+ ابتدا مخزن پروژه رو کلون کنید
52
+ ```bash
53
+ git clone https://github.com/ali0discord/gpt-text-generator.git
54
+ cd gpt-text-generator
55
+ ```
56
+ ---
57
+ ### **نصب کتابخانه ها**
58
+ با این دستور کتابخانه ها رو نصب کنید
59
+ ```bash
60
+ pip install -r lib.txt
61
+ ```
62
+ ---
63
+ ### **دانلود مدل ها**
64
+ با اجرای این فایل مدل های مورد نیاز به صورت خودکار دانلود و در دایرکتوری مخصوص ذخیره میشوند
65
+ ```bash
66
+ python download.py
67
+ ```
68
+ ---
69
+ ### **اجرای کدها**
70
+ با دستور زیر کد ها رو اجرا کنید
71
+ ```bash
72
+ python app.py
73
+ ```
74
+ پس از اجرای کد ها به صورت کامل وارد این آدرش شوید
75
+ ```bash
76
+ 127.0.0.1:7860
77
+ ```
78
+ ---
79
+ ## **گزارش باگ ها**
80
+ از طریق بخش Issues گیت هاب با ما در ارتباط باشید
81
+
82
+ ---
83
+ ### **جان گرفته از علیرضا**
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from database import create_db
3
+ from functions import *
4
+ from functions import _generate_code
5
+
6
+ # Supported models
7
+ models_options_general = ['GPT2', 'GPT2-medium', 'GPT2-large', 'GPT2-persian', 'GPT-Neo-125M']
8
+ models_options_codegen = ['codegen']
9
+ models_options_chatbot = ['dialoGPT', 'dialoGPT-medium', 'dialoGPT-large']
10
+
11
+ # Create database
12
+ create_db()
13
+
14
+ # Interface setup
15
+ with gr.Blocks() as interface:
16
+ gr.Markdown(
17
+ "# **GPT Tools**\n\n"
18
+ "Generate something using GPT models. Select the model and adjust the parameters for optimal results."
19
+ )
20
+ with gr.Tabs():
21
+ with gr.Tab("Text Generator"):
22
+ with gr.Row():
23
+ with gr.Column(scale=1, min_width=350):
24
+ input_text = gr.Textbox(label="Input Text", placeholder="Enter your text here...", lines=4, max_lines=6)
25
+ selected_model = gr.Radio(choices=models_options_general, value="GPT2", label="Select Model", type="value")
26
+ with gr.Row():
27
+ max_tokens = gr.Slider(10, 100, value=50, step=1, label="Max New Tokens", interactive=True)
28
+ with gr.Column(scale=1, min_width=350):
29
+ output_text = gr.Textbox(label="Generated Text", interactive=False, lines=8, max_lines=12)
30
+ generate_button = gr.Button("Generate Text", variant="primary")
31
+
32
+ generate_button.click(
33
+ generate,
34
+ inputs=[input_text, selected_model, max_tokens],
35
+ outputs=output_text,
36
+ )
37
+
38
+
39
+ with gr.Tab("Multiverse Story Generator"):
40
+ with gr.Row():
41
+ with gr.Column(scale=1, min_width=350):
42
+ input_text = gr.Textbox(label="Enter your story idea", placeholder="e.g. A scientist discovers a parallel universe...", lines=4, max_lines=6)
43
+ selected_model = gr.Radio(choices=models_options_general, value="GPT2", label="Select Model for Story Generation", type="value")
44
+ max_length = gr.Slider(50, 300, value=150, step=1, label="Max Length", interactive=True)
45
+
46
+ with gr.Column(scale=1, min_width=350):
47
+ output_text = gr.Textbox(label="Generated Worlds", interactive=False, lines=12, max_lines=20)
48
+ generate_button = gr.Button("Generate Parallel Worlds", variant="primary")
49
+
50
+ generate_button.click(
51
+ generate_multiverse,
52
+ inputs=[input_text, selected_model, max_length],
53
+ outputs=output_text,
54
+ )
55
+
56
+ with gr.Tab("Interactive Story Writing"):
57
+ with gr.Row():
58
+ with gr.Column(scale=1, min_width=350):
59
+ story_input = gr.Textbox(label="Add to Story", placeholder="Enter your part of the story...", lines=4, max_lines=6)
60
+ story_model = gr.Radio(choices=models_options_general, value="GPT2", label="Select Model", type="value")
61
+ story_max_length = gr.Slider(50, 300, value=50, step=1, label="Max Length", interactive=True)
62
+ with gr.Column(scale=1, min_width=350):
63
+ story_text = gr.Textbox(label="Story So Far", interactive=False, lines=12, max_lines=20)
64
+ story_button = gr.Button("Generate Next Part", variant="primary")
65
+ reset_button = gr.Button("Reset Story", variant="secondary")
66
+
67
+ story_button.click(
68
+ interactive_story,
69
+ inputs=[story_input, story_model, story_max_length],
70
+ outputs=story_text,
71
+ )
72
+ reset_button.click(
73
+ reset_story,
74
+ inputs=[],
75
+ outputs=story_text,
76
+ )
77
+
78
+ with gr.Tab("Training"):
79
+ gr.Markdown("# **Train Model**\n\n")
80
+ with gr.Column(scale=1, min_width=250):
81
+ train_model_selector = gr.Radio(choices=models_options_general, value="GPT2", label="Select Model for Training", type="value")
82
+ train_method = gr.Radio(
83
+ choices=["Custom Text", "Database", "Dataset File", "Hugging Face Dataset"],
84
+ value="Custom Text",
85
+ label="Training Method",
86
+ type="value"
87
+ )
88
+ dataset_name = gr.Textbox(label="Hugging Face Dataset Name", placeholder="Enter dataset name (e.g., ag_news)")
89
+ split_name = gr.Textbox(label="Dataset Split", placeholder="e.g., train, test, validation")
90
+ epochs = gr.Slider(1, 100, value=10, step=1, label="Epochs", interactive=True)
91
+ batch_size = gr.Slider(1, 100, value=8, step=1, label="Batch Size", interactive=True)
92
+ password = gr.Textbox(label="Enter Training Password", placeholder="Enter password", type="password")
93
+ custom_text = gr.Textbox(label="Custom Text (optional)", placeholder="Enter custom text for training...")
94
+ dataset_file = gr.File(label="Upload Dataset", type="filepath", file_types=[".parquet", ".csv", ".json", ".txt"])
95
+ train_button = gr.Button("Train Model", variant="primary")
96
+ train_status = gr.Textbox(label="Training Status", interactive=False)
97
+
98
+ train_button.click(
99
+ verify_and_train_combined,
100
+ inputs=[train_model_selector, train_method, epochs, batch_size, password, custom_text, dataset_file, dataset_name, split_name],
101
+ outputs=train_status,
102
+ )
103
+ train_button.click(
104
+ verify_and_train_combined,
105
+ inputs=[train_model_selector, train_method, epochs, batch_size, password, custom_text, dataset_file, dataset_name, split_name],
106
+ outputs=train_status,
107
+ )
108
+
109
+ with gr.Tab("Code Generator"):
110
+ gr.Markdown("### Generate Code from Descriptions")
111
+ with gr.Row():
112
+ with gr.Column(scale=1, min_width=350):
113
+ code_prompt = gr.Textbox(label="Code Prompt", placeholder="Describe your coding task, e.g., 'Write a Python function to calculate Fibonacci numbers.'")
114
+ code_max_tokens = gr.Slider(10, 500, value=150, step=10, label="Max Tokens")
115
+ with gr.Column(scale=1, min_width=350):
116
+ generated_code = gr.Textbox(label="Generated Code", interactive=False, lines=10, max_lines=20)
117
+ generate_code_button = gr.Button("Generate Code")
118
+
119
+ generate_code_button.click(
120
+ _generate_code,
121
+ inputs=[code_prompt, code_max_tokens],
122
+ outputs=generated_code,
123
+ )
124
+
125
+ # Add AI-Powered Story World Builder Tab
126
+ with gr.Tab("Story World Builder"):
127
+ with gr.Row():
128
+ with gr.Column(scale=1, min_width=350):
129
+ world_name = gr.Textbox(label="World Name", placeholder="Enter your world name...")
130
+ locations = gr.Textbox(label="Locations", placeholder="Enter locations separated by commas...")
131
+ characters = gr.Textbox(label="Characters", placeholder="Enter characters separated by commas...")
132
+ create_button = gr.Button("Create World", variant='primary')
133
+ generate_story_button = gr.Button("Generate Story")
134
+ with gr.Column(scale=1, min_width=350):
135
+ world_status = gr.Textbox(label="World Status", interactive=False)
136
+ generated_story = gr.Textbox(label="Generated Story", interactive=False, lines=12, max_lines=20)
137
+
138
+
139
+ create_button.click(
140
+ define_world,
141
+ inputs=[world_name, locations, characters],
142
+ outputs=world_status,
143
+ )
144
+
145
+ gr.Markdown("### Generate a Story in Your World")
146
+ with gr.Row():
147
+ with gr.Column(scale=1, min_width=350):
148
+ story_world = gr.Textbox(label="Enter World Name", placeholder="World name...")
149
+ event = gr.Textbox(label="Event", placeholder="Describe an event in the world...")
150
+ selected_model = gr.Radio(choices=models_options_general, value="GPT2", label="Select Model", type="value")
151
+ max_length = gr.Slider(50, 300, value=150, step=1, label="Max Length")
152
+
153
+ with gr.Tab("Chatbot"):
154
+ gr.Markdown("### **Chat With AI Models**")
155
+ with gr.Row():
156
+ with gr.Column(scale=1, min_width=250):
157
+ username = gr.Textbox(label="Username", placeholder="Enter your username", lines=1)
158
+ chat_id = gr.Textbox(label="Chat ID (optional)", placeholder="Enter chat ID or leave blank for a new chat", lines=1)
159
+ selected_model = gr.Radio(models_options_chatbot, label="Select Model", value="dialoGPT")
160
+ send_button = gr.Button("Send", variant="primary")
161
+ reset_button = gr.Button("Reset Chat", variant="secondary")
162
+ with gr.Column(scale=1, min_width=250):
163
+ input_text = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
164
+ emotion_output = gr.Textbox(label="Detected Emotion", interactive=False)
165
+ chat_output = gr.Textbox(label="Chat History", lines=10, interactive=False)
166
+
167
+ send_button.click(
168
+ chatbot_response_with_emotion,
169
+ inputs=[username, input_text, selected_model, chat_id],
170
+ outputs=[chat_output, chat_id, emotion_output]
171
+ )
172
+
173
+ reset_button.click(
174
+ reset_chat,
175
+ inputs=[username],
176
+ outputs=[chat_output]
177
+ )
178
+ gr.Markdown("---")
179
+ gr.Markdown("### **Fetch Chat IDs**")
180
+ with gr.Row():
181
+ with gr.Column(scale=1, min_width=250):
182
+ username = gr.Textbox(label="Username", placeholder="Enter your username", lines=1)
183
+ fetch_btn = gr.Button("Fetch", variant="primary")
184
+ with gr.Column(scale=1, min_width=250):
185
+ fetch_output = gr.Textbox(label="Chat IDs", lines=3, interactive=False)
186
+ fetch_btn.click(
187
+ chat_ids,
188
+ inputs=[username],
189
+ outputs=[fetch_output],
190
+ )
191
+
192
+ generate_story_button.click(
193
+ generate_story,
194
+ inputs=[selected_model, story_world, max_length, event],
195
+ outputs=generated_story,
196
+ )
197
+
198
+ gr.Markdown("Made by **AliMc2021** with ❤️")
199
+
200
+ # Launch the interface
201
+ interface.queue().launch(
202
+ server_port=7860,
203
+ show_error=True,
204
+ inline=False,
205
+ #share=True,
206
+ )
database.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlite3
2
+
3
+ # مسیر پایگاه داده
4
+ DATABASE_PATH = 'database.db'
5
+
6
+ # ایجاد یا بازنشانی جداول پایگاه داده
7
+ def create_db():
8
+ conn = sqlite3.connect(DATABASE_PATH)
9
+ c = conn.cursor()
10
+ c.execute("""
11
+ CREATE TABLE IF NOT EXISTS inputs (
12
+ id INTEGER PRIMARY KEY,
13
+ input_text TEXT,
14
+ selected_model TEXT
15
+ )
16
+ """)
17
+ c.execute("""
18
+ CREATE TABLE IF NOT EXISTS chats (
19
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
20
+ chat_id TEXT NOT NULL,
21
+ username TEXT NOT NULL,
22
+ user_message TEXT NOT NULL,
23
+ ai_response TEXT NOT NULL
24
+ )
25
+ """)
26
+ conn.commit()
27
+ conn.close()
28
+
29
+ # درج چت در جدول chats
30
+ def insert_chat(chat_id, username, user_message, ai_response):
31
+ try:
32
+ conn = sqlite3.connect(DATABASE_PATH)
33
+ cursor = conn.cursor()
34
+ cursor.execute("""
35
+ INSERT INTO chats (chat_id, username, user_message, ai_response)
36
+ VALUES (?, ?, ?, ?)
37
+ """, (str(chat_id), str(username), str(user_message), str(ai_response)))
38
+ conn.commit()
39
+ except sqlite3.Error as e:
40
+ print(f"Error inserting chat: {e}")
41
+ finally:
42
+ conn.close()
43
+
44
+ # درج داده در جدول inputs
45
+ def insert_into_db(input_text, selected_model):
46
+ try:
47
+ conn = sqlite3.connect(DATABASE_PATH)
48
+ c = conn.cursor()
49
+ c.execute("""
50
+ INSERT INTO inputs (input_text, selected_model)
51
+ VALUES (?, ?)
52
+ """, (str(input_text), str(selected_model)))
53
+ conn.commit()
54
+ except sqlite3.Error as e:
55
+ print(f"Error inserting into inputs: {e}")
56
+ finally:
57
+ conn.close()
58
+
59
+ # پاک کردن داده‌های جدول inputs
60
+ def clear_database():
61
+ try:
62
+ conn = sqlite3.connect(DATABASE_PATH)
63
+ c = conn.cursor()
64
+ c.execute("DELETE FROM inputs")
65
+ conn.commit()
66
+ except sqlite3.Error as e:
67
+ print(f"Error clearing database: {e}")
68
+ finally:
69
+ conn.close()
70
+
71
+ # بازیابی تمام ورودی‌ها از جدول inputs
72
+ def fetch_all_inputs():
73
+ try:
74
+ conn = sqlite3.connect(DATABASE_PATH)
75
+ c = conn.cursor()
76
+ c.execute("SELECT input_text, selected_model FROM inputs")
77
+ results = c.fetchall()
78
+ return results
79
+ except sqlite3.Error as e:
80
+ print(f"Error fetching inputs from database: {e}")
81
+ return []
82
+ finally:
83
+ conn.close()
84
+
85
+ # بازیابی پیام‌ها و پاسخ‌های مرتبط با یک chat_id
86
+ def fetch_chats_by_id(chat_id):
87
+ try:
88
+ conn = sqlite3.connect(DATABASE_PATH)
89
+ cursor = conn.cursor()
90
+ cursor.execute("""
91
+ SELECT user_message, ai_response FROM chats
92
+ WHERE chat_id = ?
93
+ """, (str(chat_id),))
94
+ rows = cursor.fetchall()
95
+ return rows
96
+ except sqlite3.Error as e:
97
+ print(f"Error fetching chats by ID: {e}")
98
+ return []
99
+ finally:
100
+ conn.close()
101
+
102
+ # بازیابی chat_id ها برای یک کاربر خاص
103
+ def fetch_ids_by_user(username):
104
+ try:
105
+ conn = sqlite3.connect(DATABASE_PATH)
106
+ cursor = conn.cursor()
107
+ cursor.execute("""
108
+ SELECT chat_id FROM chats
109
+ WHERE username = ?
110
+ """, (str(username),))
111
+ rows = cursor.fetchall()
112
+ return rows
113
+ except sqlite3.Error as e:
114
+ print(f"Error fetching chat IDs by username: {e}")
115
+ return []
116
+ finally:
117
+ conn.close()
118
+
119
+ # حذف چت‌های مرتبط با یک کاربر خاص
120
+ def clear_chats_by_username(username):
121
+ try:
122
+ conn = sqlite3.connect(DATABASE_PATH)
123
+ cursor = conn.cursor()
124
+ cursor.execute("""
125
+ DELETE FROM chats
126
+ WHERE username = ?
127
+ """, (str(username),))
128
+ conn.commit()
129
+ except sqlite3.Error as e:
130
+ print(f"Error clearing chats by username: {e}")
131
+ finally:
132
+ conn.close()
download.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+
4
+ # لیست مدل‌ها با مسیر ذخیره مشخص‌شده
5
+ MODEL_LIST = {
6
+ "gpt2": {"path": "openai-community/gpt2", "save_dir": "./models/gpt2"},
7
+ "gpt2-medium": {"path": "openai-community/gpt2-medium", "save_dir": "./models/gpt2-medium"},
8
+ "gpt2-persian": {"path": "flax-community/gpt2-medium-persian", "save_dir": "./models/gpt2-medium-persian"},
9
+ "gpt2-large": {"path": "openai-community/gpt2-large", "save_dir": "./models/gpt2-large"},
10
+ "codegen": {"path": "Salesforce/codegen-350M-mono", "save_dir": "./models/codegen"},
11
+ "dialogpt": {"path": "microsoft/DialoGPT-small", "save_dir": "./models/dialogpt"},
12
+ "dialogpt-medium": {"path": "microsoft/DialoGPT-medium", "save_dir": "./models/dialogpt-medium"},
13
+ "dialogpt-large": {"path": "microsoft/DialoGPT-large", "save_dir": "./models/dialogpt-large"}
14
+ }
15
+
16
+ def download_and_save_models():
17
+ """
18
+ دانلود و ذخیره تمام مدل‌ها در مسیرهای مشخص‌شده.
19
+ """
20
+ for model_name, model_info in MODEL_LIST.items():
21
+ model_path = model_info["path"] # مسیر مدل در Hugging Face
22
+ save_dir = model_info["save_dir"] # مسیر ذخیره مدل
23
+
24
+ print(f"Downloading and saving model: {model_name} to folder: {save_dir}")
25
+
26
+ if not os.path.exists(save_dir): # بررسی اینکه آیا فولدر ذخیره وجود دارد یا نه
27
+ os.makedirs(save_dir, exist_ok=True)
28
+
29
+ # دانلود و ذخیره مدل
30
+ model = AutoModelForCausalLM.from_pretrained(model_path)
31
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
32
+ model.save_pretrained(save_dir)
33
+ tokenizer.save_pretrained(save_dir)
34
+
35
+ print(f"Model {model_name} saved to {save_dir}")
36
+ else:
37
+ print(f"Model {model_name} already exists in {save_dir}")
38
+
39
+ if __name__ == "__main__":
40
+ download_and_save_models()
functions.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from model import load_model_lazy, unload_model
3
+ from generate import generate_code, generate_text
4
+ from database import *
5
+ import train
6
+ import uuid
7
+
8
+ train_pass = '6818'
9
+
10
+ # AI-Powered Story World Builder Functions
11
+ world_data = {}
12
+
13
+ def _generate_code(code_prompt, max_tokens, selected_model='codegen'):
14
+ """
15
+ Generate code based on the code prompt and selected model.
16
+ """
17
+ # Load the model lazily
18
+ model_data = load_model_lazy(selected_model)
19
+
20
+ # Generate code
21
+ generated_code = generate_code(model_data, code_prompt, max_tokens)
22
+
23
+ # Unload the model after use
24
+ unload_model(selected_model)
25
+
26
+ return generated_code
27
+
28
+ def generate(input_text, selected_model, max_new_token):
29
+ """
30
+ Generate text based on the selected model and input text.
31
+ """
32
+ # Load the model lazily
33
+ model_data = load_model_lazy(selected_model)
34
+
35
+ # Generate text
36
+ generated_text = generate_text(model_data, input_text, max_new_token)
37
+ insert_into_db(input_text, selected_model)
38
+
39
+ # Unload the model after use
40
+ unload_model(selected_model)
41
+
42
+ return generated_text
43
+
44
+ def define_world(world_name, locations, characters):
45
+ """
46
+ Define a new story world with locations and characters.
47
+ """
48
+ world_data["world_name"] = world_name
49
+ world_data["locations"] = locations.split(", ")
50
+ world_data["characters"] = characters.split(", ")
51
+ return f"World '{world_name}' created with locations: {locations} and characters: {characters}"
52
+
53
+ def generate_story(model, world_name, event, max_length):
54
+ """
55
+ Generate a story based on the defined world and an event.
56
+ """
57
+ if not world_name or not world_data.get("world_name"):
58
+ return "Error: Please define a world first."
59
+
60
+ if world_name != world_data["world_name"]:
61
+ return f"Error: World '{world_name}' not found. Define it first."
62
+
63
+ prompt = f"In the world of {world_name}, {event}. Locations: {', '.join(world_data['locations'])}. Characters: {', '.join(world_data['characters'])}."
64
+
65
+ generated_story = generate(prompt, model, max_length)
66
+ return generated_story
67
+
68
+
69
+ # Story Mode
70
+ story = []
71
+
72
+ # Main Function For Story Generating
73
+ def interactive_story(input_text, selected_model, max_length):
74
+ global story
75
+ if input_text.strip():
76
+ story.append(input_text) # Add user input to story
77
+ current_text = " ".join(story) # Build cumulative story
78
+
79
+ generated_text = generate(current_text, selected_model, max_length)
80
+ story.append(generated_text) # Add generated text to story
81
+
82
+ return current_text + "\n\n" + generated_text
83
+
84
+
85
+ def reset_story():
86
+ global story
87
+ story = [] # Reset story
88
+ return ""
89
+
90
+ def generate_multiverse(input_text, selected_model, max_new_tokens, num_worlds=3):
91
+ """
92
+ Generate multiple parallel worlds from a single input text.
93
+ """
94
+ worlds = []
95
+
96
+ for i in range(num_worlds):
97
+ world_intro = f"World {i + 1}: "
98
+ # Custom logic for different parallel worlds
99
+ if i == 0:
100
+ world_intro += f"{input_text} This world leads to a parallel universe!"
101
+ elif i == 1:
102
+ world_intro += f"{input_text} In this world, time splits into different periods!"
103
+ elif i == 2:
104
+ world_intro += f"{input_text} This world faces a strange physical anomaly that changes everything!"
105
+
106
+ # Generate the story for this world
107
+ generated_text = generate(world_intro, selected_model, max_new_tokens)
108
+
109
+ worlds.append(generated_text)
110
+
111
+ return "\n\n".join(worlds)
112
+
113
+
114
+
115
+ # Function to verify password, train the model, and clear the database
116
+ def verify_and_train_combined(selected_model, train_method, epochs, batch_size, password, custom_text, dataset_file, dataset_name, split_name):
117
+ if password != train_pass:
118
+ return "Error: Incorrect password. Training not started."
119
+
120
+ if train_method == "Custom Text" and custom_text.strip():
121
+ train.train_model_with_text(selected_model, custom_text, epochs, batch_size)
122
+ return f"Training completed for model: {selected_model} using custom text."
123
+
124
+ elif train_method == "Database":
125
+ train.train_model_with_database(selected_model, epochs, batch_size)
126
+ clear_database()
127
+ return f"Training completed for model: {selected_model} using database. Database cleared."
128
+
129
+ elif train_method == "Dataset File" and dataset_file is not None:
130
+ try:
131
+ dataset_path = dataset_file.name
132
+ train.train_model_with_dataset(selected_model, epochs, batch_size, dataset_path)
133
+ return f"Training completed for model: {selected_model} using uploaded dataset."
134
+ except Exception as e:
135
+ return f"Error during training with dataset: {str(e)}"
136
+
137
+ elif train_method == "Hugging Face Dataset" and dataset_name.strip():
138
+ try:
139
+ train.train_model_with_hf_dataset(selected_model, epochs, batch_size, dataset_name, split=split_name.strip())
140
+ return f"Training completed for model: {selected_model} using Hugging Face dataset {dataset_name}."
141
+ except Exception as e:
142
+ return f"Error during training with Hugging Face dataset: {str(e)}"
143
+
144
+ else:
145
+ return "Error: Invalid input for training. Please check your selections."
146
+
147
+ def limit_chat_history(chat_history, max_turns=3):
148
+ """
149
+ محدود کردن تعداد پیام‌های تاریخچه به max_turns.
150
+ """
151
+ turns = chat_history.split("\n")
152
+ if len(turns) > max_turns * 2: # هر سوال و پاسخ دو خط می‌شود
153
+ turns = turns[-max_turns * 2:] # فقط n پیام اخیر را نگه می‌دارد
154
+ return "\n".join(turns)
155
+
156
+ def chatbot_response(username, input_text, selected_model, chat_id=None):
157
+ if not username.strip():
158
+ return "Error: Please enter a username.", "", str(uuid.uuid4()) # تولید شناسه جدید
159
+
160
+ # اگر شناسه چت وارد نشده باشد، یک شناسه جدید تولید می‌شود
161
+ if not chat_id or chat_id.strip() == "":
162
+ chat_id = str(uuid.uuid4()) # تولید شناسه جدید
163
+
164
+ # Load model lazily
165
+ model_data = load_model_lazy(selected_model)
166
+
167
+ # Retrieve previous chats from database
168
+ previous_chats = fetch_chats_by_id(chat_id)
169
+ chat_history = "\n".join([f"User: {msg}\nAI: {resp}" for msg, resp in previous_chats])
170
+
171
+ # محدود کردن تاریخچه چت
172
+ if chat_history:
173
+ chat_history = limit_chat_history(chat_history, max_turns=3)
174
+ prompt = f"{chat_history}\nUser: {input_text}\nAI:"
175
+ else:
176
+ prompt = f"User: {input_text}\nAI:"
177
+
178
+ # Generate response
179
+ max_new_token = 150 # تعداد توکن‌های جدید
180
+ full_response = generate_text(model_data, prompt, max_new_token) # حذف آرگومان‌های اضافی
181
+
182
+ # Extract only the new AI response
183
+ ai_response = full_response.split("AI:")[-1].strip()
184
+
185
+ unload_model(selected_model)
186
+
187
+ # Save chat to database
188
+ insert_chat(chat_id, username, input_text, ai_response)
189
+
190
+ # Return updated chat history and chat_id
191
+ updated_history = chat_history + f"\nUser: {input_text}\nAI: {ai_response}"
192
+ return updated_history, chat_id
193
+
194
+ def chat_ids(username):
195
+ return fetch_ids_by_user(username)
196
+
197
+ def reset_chat(username):
198
+ clear_chats_by_username(username) # حذف چت‌های مرتبط با کاربر
199
+ return f"Chat history cleared for user: {username}", ""
200
+
201
+ # توابع تحلیل احساسات
202
+ def analyze_emotion(user_input):
203
+ # بارگذاری مدل احساسات
204
+ model_data = load_model_lazy("bert-emotion")
205
+
206
+ # اگر مدل از pipeline پشتیبانی می‌کند
207
+ if "pipeline" in model_data:
208
+ emotion_pipeline = model_data["pipeline"]
209
+ result = emotion_pipeline(user_input)
210
+ emotion = result[0]['label']
211
+ confidence = result[0]['score']
212
+ else:
213
+ # روش قدیمی برای مدل‌هایی که از pipeline پشتیبانی نمی‌کنند
214
+ emotion_tokenizer = model_data['tokenizer']
215
+ emotion_model = model_data['model']
216
+ inputs = emotion_tokenizer(user_input, return_tensors="pt", truncation=True, padding=True)
217
+ outputs = emotion_model(**inputs)
218
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
219
+ emotion = probs.argmax().item()
220
+ confidence = probs.max().item()
221
+
222
+ unload_model("bert-emotion")
223
+ return emotion, confidence
224
+
225
+ def emotion_label(index):
226
+ emotions = ["anger", "joy", "sadness", "fear", "love", "surprise"]
227
+ return emotions[index]
228
+
229
+ def chatbot_response_with_emotion(username, input_text, selected_model, chat_id=None):
230
+ if not username.strip():
231
+ return "Error: Please enter a username.", "", str(uuid.uuid4())
232
+
233
+ if not chat_id or chat_id.strip() == "":
234
+ chat_id = str(uuid.uuid4())
235
+
236
+ # بارگذاری مدل چت و احساسات
237
+ model_data = load_model_lazy(selected_model)
238
+
239
+ # تحلیل احساسات پیام کاربر
240
+ emotion, confidence = analyze_emotion(input_text)
241
+ user_emotion = emotion # برچسب احساسات
242
+
243
+ # بازیابی چت‌های قبلی از پایگاه داده
244
+ previous_chats = fetch_chats_by_id(chat_id)
245
+ chat_history = "\n".join([f"User: {msg}\nAI: {resp}" for msg, resp in previous_chats])
246
+
247
+ # محدود کردن تاریخچه چت
248
+ if chat_history:
249
+ chat_history = limit_chat_history(chat_history, max_turns=3)
250
+ prompt = f"[Emotion: {user_emotion}]\n{chat_history}\nUser: {input_text}\nAI:"
251
+ else:
252
+ prompt = f"[Emotion: {user_emotion}]\nUser: {input_text}\nAI:"
253
+
254
+ # تولید پاسخ
255
+ max_new_token = 150
256
+ full_response = generate_text(model_data, prompt, max_new_token)
257
+
258
+ # استخراج پاسخ AI
259
+ ai_response = full_response.split("AI:")[-1].strip()
260
+
261
+ # آزادسازی مدل‌ها
262
+ unload_model(selected_model)
263
+ unload_model("bert-emotion")
264
+
265
+ # ذخیره چت در پایگاه داده
266
+ insert_chat(chat_id, username, input_text, ai_response)
267
+
268
+ # بازگرداندن تاریخچه به‌روز شده و شناسه چت
269
+ updated_history = chat_history + f"\nUser: {input_text}\nAI: {ai_response}"
270
+ return updated_history, chat_id, user_emotion
generate.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ seed = 0
4
+
5
+ def generate_text(model_data, input_text, max_new_token):
6
+ """
7
+ Generate text using the given model and tokenizer.
8
+ """
9
+ if "pipeline" in model_data:
10
+ # اگر مدل از pipeline پشتیبانی می‌کند
11
+ model_pipeline = model_data["pipeline"]
12
+ generated_text = model_pipeline(
13
+ input_text,
14
+ max_length=max_new_token,
15
+ do_sample=False, # غیرفعال کردن نمونه‌گیری (حالت حریصانه)
16
+ truncation=True # فعال کردن truncation
17
+ )[0]["generated_text"]
18
+ return generated_text
19
+ else:
20
+ # روش قدیمی برای مدل‌هایی که از pipeline پشتیبانی نمی‌کنند
21
+ model = model_data["model"]
22
+ tokenizer = model_data["tokenizer"]
23
+
24
+ if tokenizer.pad_token is None:
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+
27
+ torch.manual_seed(seed)
28
+ torch.cuda.manual_seed_all(seed)
29
+
30
+ encodings = tokenizer(
31
+ input_text,
32
+ return_tensors="pt",
33
+ padding=True,
34
+ truncation=True, # فعال کردن truncation
35
+ max_length=512
36
+ )
37
+ input_ids = encodings.input_ids
38
+ attention_mask = encodings.attention_mask
39
+
40
+ outputs = model.generate(
41
+ input_ids=input_ids,
42
+ attention_mask=attention_mask,
43
+ max_new_tokens=max_new_token,
44
+ do_sample=False, # غیرفعال کردن نمونه‌گیری (حالت حریصانه)
45
+ pad_token_id=tokenizer.eos_token_id,
46
+ repetition_penalty=1.2,
47
+ no_repeat_ngram_size=3,
48
+ )
49
+
50
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+
52
+ def generate_code(model_data, prompt, max_new_tokens):
53
+ """
54
+ Generate code based on the provided prompt using a code-specific model.
55
+ """
56
+ model = model_data["model"]
57
+ tokenizer = model_data["tokenizer"]
58
+
59
+ # تنظیم seed برای خروجی ثابت
60
+ torch.manual_seed(seed)
61
+ torch.cuda.manual_seed_all(seed)
62
+
63
+ # توکنایز کردن ورودی
64
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
65
+
66
+ # ایجاد attention mask
67
+ attention_mask = torch.ones(input_ids.shape, device=input_ids.device) # ایجاد یک ماسک توجه برای ورودی‌ها
68
+
69
+ # تولید کد
70
+ outputs = model.generate(
71
+ input_ids=input_ids,
72
+ attention_mask=attention_mask, # ارسال attention mask
73
+ max_new_tokens=max_new_tokens,
74
+ do_sample=False,
75
+ pad_token_id=tokenizer.eos_token_id, # تنظیم شناسه توکن پایان به عنوان پرکننده
76
+ repetition_penalty=1.2, # جلوگیری از تکرار
77
+ no_repeat_ngram_size=3, # جلوگیری از تکرار n-gram
78
+ )
79
+
80
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
lib.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ gradio
4
+ datasets
5
+ numpy
6
+ Pillow
7
+ scikit-learn
8
+ wandb
9
+ pyarrow
10
+ pandas
11
+ chardet
12
+ accelerate
13
+ safetensors
14
+ diffusers
15
+ jax
model.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gc
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer, pipeline, AutoModelForSequenceClassification
4
+
5
+ # Dictionary of models and paths
6
+ model_dict = {
7
+ "GPT2": {"path": "./models/gpt2", "library": GPT2LMHeadModel, "tokenizer": GPT2Tokenizer, "use_pipeline": False},
8
+ "GPT2-medium": {"path": "./models/gpt2-medium", "library": GPT2LMHeadModel, "tokenizer": GPT2Tokenizer, "use_pipeline": False},
9
+ "GPT2-large": {"path": "./models/gpt2-large", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": False},
10
+ "GPT2-persian": {"path": "./models/gpt2-medium-persian", "library": GPT2LMHeadModel, "tokenizer": AutoTokenizer, "use_pipeline": False},
11
+ "codegen": {"path": "./models/codegen", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": False},
12
+ "dialoGPT": {"path": "./models/dialogpt", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": False},
13
+ "dialoGPT-medium": {"path": "./models/dialogpt-medium", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": False},
14
+ "dialoGPT-large": {"path": "./models/dialogpt-large", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": False},
15
+ "GPT-Neo-125M": {"path": "./models/GPT-neo-125M", "library": AutoModelForCausalLM, "tokenizer": AutoTokenizer, "use_pipeline": True}, # اضافه کردن مدل جدید
16
+ "bert-emotion": {"path": "./models/bert-emotion", "library": AutoModelForSequenceClassification, "tokenizer": AutoTokenizer, "use_pipeline": True},
17
+ }
18
+
19
+ loaded_models = {}
20
+
21
+ def load_model_lazy(model_name):
22
+ if not isinstance(model_name, str):
23
+ raise ValueError(f"Model name must be a string, not {type(model_name)}")
24
+ if model_name not in model_dict:
25
+ raise ValueError(f"Model {model_name} not found!")
26
+
27
+ model_info = model_dict[model_name]
28
+ print(f"Loading model: {model_name}")
29
+
30
+ # اگر مدل از pipeline پشتیبانی می‌کند
31
+ if model_info.get("use_pipeline", False):
32
+ print(f"Using pipeline for model: {model_name}")
33
+ if model_name == "bert-emotion":
34
+ # برای مدل bert-emotion از text-classification استفاده کنید
35
+ model_pipeline = pipeline(
36
+ "text-classification", # تغییر وظیفه به text-classification
37
+ model=model_info["path"],
38
+ truncation=True
39
+ )
40
+ else:
41
+ # برای سایر مدل‌ها از text-generation استفاده کنید
42
+ model_pipeline = pipeline(
43
+ "text-generation",
44
+ model=model_info["path"],
45
+ truncation=True,
46
+ pad_token_id=50256
47
+ )
48
+ loaded_models[model_name] = {"pipeline": model_pipeline}
49
+ return {"pipeline": model_pipeline}
50
+
51
+ # در غیر این صورت، مدل و توکنایزر را به روش قدیمی بارگذاری کنید
52
+ model = model_info["library"].from_pretrained(model_info["path"])
53
+ tokenizer = model_info["tokenizer"].from_pretrained(model_info["path"])
54
+
55
+ # تنظیمات پیش‌فرض
56
+ if tokenizer.pad_token is None:
57
+ tokenizer.pad_token = tokenizer.eos_token
58
+
59
+ loaded_models[model_name] = {"model": model, "tokenizer": tokenizer}
60
+ return {"model": model, "tokenizer": tokenizer}
61
+
62
+ def unload_model(model_name):
63
+ global loaded_models
64
+ if model_name in loaded_models:
65
+ if "pipeline" in loaded_models[model_name]:
66
+ del loaded_models[model_name]["pipeline"]
67
+ elif "model" in loaded_models[model_name]:
68
+ del loaded_models[model_name]["model"]
69
+ del loaded_models[model_name]["tokenizer"]
70
+ torch.cuda.empty_cache()
71
+ gc.collect()
72
+ print(f"Model {model_name} unloaded and memory cleared.")
73
+ else:
74
+ print(f"Model {model_name} was not loaded.")
train.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset, DataLoader
3
+ from transformers import AdamW
4
+ from model import load_model_lazy, unload_model
5
+ from database import fetch_all_inputs, clear_database # مدیریت دیتابیس
6
+ from datasets import load_dataset
7
+
8
+ class TextDataset(Dataset):
9
+ def __init__(self, texts, tokenizer, max_length=512):
10
+ self.texts = texts
11
+ self.tokenizer = tokenizer
12
+ self.max_length = max_length
13
+
14
+ def __len__(self):
15
+ return len(self.texts)
16
+
17
+ def __getitem__(self, idx):
18
+ text = self.texts[idx]
19
+ encodings = self.tokenizer(
20
+ text,
21
+ truncation=True,
22
+ padding="max_length", # پُر کردن توکن‌ها تا طول مشخص
23
+ max_length=self.max_length,
24
+ return_tensors="pt"
25
+ )
26
+ attention_mask = encodings.attention_mask.squeeze(0)
27
+ return encodings.input_ids.squeeze(0), attention_mask
28
+
29
+ def train_model_with_text(selected_model, custom_text, epochs, batch_size):
30
+ """
31
+ آموزش مدل با متن سفارشی.
32
+ """
33
+ model, tokenizer = load_model_lazy(selected_model)
34
+ dataset = TextDataset([custom_text], tokenizer)
35
+ dataloader = DataLoader(dataset, batch_size=min(batch_size, len(dataset)), shuffle=True)
36
+
37
+ _train_model(model, tokenizer, dataloader, epochs, selected_model, "custom_text")
38
+ unload_model(selected_model)
39
+
40
+ def train_model_with_database(selected_model, epochs, batch_size):
41
+ """
42
+ آموزش مدل با داده‌های موجود در دیتابیس.
43
+ """
44
+ model, tokenizer = load_model_lazy(selected_model)
45
+ inputs_data = fetch_all_inputs()
46
+ texts = [input_text for input_text, model_name in inputs_data if model_name == selected_model]
47
+
48
+ if not texts:
49
+ print("Error: No data found in the database for the selected model.")
50
+ return
51
+
52
+ dataset = TextDataset(texts, tokenizer)
53
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
54
+
55
+ _train_model(model, tokenizer, dataloader, epochs, selected_model, "database")
56
+ clear_database()
57
+ unload_model(selected_model)
58
+
59
+ def train_model_with_dataset(selected_model, epochs, batch_size, dataset_path):
60
+ """
61
+ آموزش مدل با فایل دیتاست آپلود‌شده.
62
+ """
63
+ model, tokenizer = load_model_lazy(selected_model)
64
+
65
+ # خواندن دیتاست
66
+ with open(dataset_path, "r", encoding="utf-8") as f:
67
+ texts = f.readlines()
68
+
69
+ if not texts:
70
+ print("Error: Dataset is empty.")
71
+ return
72
+
73
+ dataset = TextDataset(texts, tokenizer)
74
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
75
+
76
+ _train_model(model, tokenizer, dataloader, epochs, selected_model, "dataset")
77
+ unload_model(selected_model)
78
+
79
+ def _train_model(model, tokenizer, dataloader, epochs, model_name, method):
80
+ """
81
+ منطق مشترک آموزش مدل.
82
+ """
83
+ optimizer = AdamW(model.parameters(), lr=5e-5)
84
+
85
+ # انتقال مدل به GPU در صورت وجود
86
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
87
+ model.to(device)
88
+
89
+ model.train()
90
+ for epoch in range(epochs):
91
+ total_loss = 0
92
+ for step, (input_ids, attention_mask) in enumerate(dataloader):
93
+ optimizer.zero_grad()
94
+ input_ids = input_ids.to(device)
95
+ attention_mask = attention_mask.to(device)
96
+
97
+ # محاسبه خروجی و خطا
98
+ outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
99
+ loss = outputs.loss
100
+ loss.backward()
101
+ optimizer.step()
102
+ total_loss += loss.item()
103
+
104
+ print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader)}")
105
+
106
+ # ذخیره مدل
107
+ save_path = f"trained_{model_name}_{method}"
108
+ model.save_pretrained(save_path)
109
+ tokenizer.save_pretrained(save_path)
110
+ print(f"Model {model_name} trained with {method} and saved to {save_path}.")
111
+
112
+ def train_model_with_hf_dataset(selected_model, epochs, batch_size, dataset_name, split="train"):
113
+ """
114
+ آموزش مدل با استفاده از دیتاست‌های Hugging Face.
115
+
116
+ Args:
117
+ selected_model (str): نام مدل برای آموزش.
118
+ epochs (int): تعداد epochs.
119
+ batch_size (int): اندازه batch.
120
+ dataset_name (str): نام دیتاست در Hugging Face.
121
+ split (str): بخش دیتاست برای بارگذاری (train, test, validation).
122
+ """
123
+ model, tokenizer = load_model_lazy(selected_model)
124
+
125
+ # بارگذاری داده‌ها از Hugging Face
126
+ texts = load_dataset(dataset_name, split)
127
+
128
+ if not texts:
129
+ print(f"Error: Dataset {dataset_name} ({split} split) is empty or invalid.")
130
+ return
131
+
132
+ dataset = TextDataset(texts, tokenizer)
133
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
134
+
135
+ _train_model(model, tokenizer, dataloader, epochs, selected_model, f"huggingface_{dataset_name}")
136
+ unload_model(selected_model)