abdullahmubeen10 commited on
Commit
6cd8a90
·
verified ·
1 Parent(s): 98acb76

Upload 16 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+ from streamlit_tags import st_tags
11
+
12
+ # Page configuration
13
+ st.set_page_config(
14
+ layout="wide",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section {
28
+ background-color: #f9f9f9;
29
+ padding: 10px;
30
+ border-radius: 10px;
31
+ margin-top: 10px;
32
+ }
33
+ .section p, .section ul {
34
+ color: #666666;
35
+ }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ @st.cache_resource
40
+ def init_spark():
41
+ return sparknlp.start()
42
+
43
+ @st.cache_resource
44
+ def create_pipeline(model):
45
+ imageAssembler = ImageAssembler() \
46
+ .setInputCol("image") \
47
+ .setOutputCol("image_assembler")
48
+
49
+ imageClassifier = ConvNextForImageClassification \
50
+ .pretrained("image_classifier_convnext_tiny_224_local", "en") \
51
+ .setInputCols(["image_assembler"]) \
52
+ .setOutputCol("class")
53
+
54
+ pipeline = Pipeline(stages=[image_assembler, imageClassifier])
55
+ return pipeline
56
+
57
+ def fit_data(pipeline, data):
58
+ empty_df = spark.createDataFrame([['']]).toDF('text')
59
+ model = pipeline.fit(empty_df)
60
+ light_pipeline = LightPipeline(model)
61
+ annotations_result = light_pipeline.fullAnnotateImage(data)
62
+ return annotations_result[0]['class'][0].result
63
+
64
+ def save_uploadedfile(uploadedfile):
65
+ filepath = os.path.join(IMAGE_FILE_PATH, uploadedfile.name)
66
+ with open(filepath, "wb") as f:
67
+ if hasattr(uploadedfile, 'getbuffer'):
68
+ f.write(uploadedfile.getbuffer())
69
+ else:
70
+ f.write(uploadedfile.read())
71
+
72
+ # Sidebar content
73
+ model_list = ['image_classifier_convnext_tiny_224_local']
74
+ model = st.sidebar.selectbox(
75
+ "Choose the pretrained model",
76
+ model_list,
77
+ help="For more info about the models visit: https://sparknlp.org/models"
78
+ )
79
+
80
+ # Set up the page layout
81
+ st.markdown(f'<div class="main-title">ConvNext For Image Classification</div>', unsafe_allow_html=True)
82
+ # st.markdown(f'<div class="section"><p>{sub_title}</p></div>', unsafe_allow_html=True)
83
+
84
+ # Reference notebook link in sidebar
85
+ link = """
86
+ <a href="https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/image/ConvNextForImageClassification.ipynb">
87
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
88
+ </a>
89
+ """
90
+ st.sidebar.markdown('Reference notebook:')
91
+ st.sidebar.markdown(link, unsafe_allow_html=True)
92
+
93
+ # Load examples
94
+ IMAGE_FILE_PATH = f"inputs"
95
+ image_files = sorted([file for file in os.listdir(IMAGE_FILE_PATH) if file.split('.')[-1]=='png' or file.split('.')[-1]=='jpg' or file.split('.')[-1]=='JPEG' or file.split('.')[-1]=='jpeg'])
96
+
97
+ img_options = st.selectbox("Select an image", image_files)
98
+ uploadedfile = st.file_uploader("Try it for yourself!")
99
+
100
+ if uploadedfile:
101
+ file_details = {"FileName":uploadedfile.name,"FileType":uploadedfile.type}
102
+ save_uploadedfile(uploadedfile)
103
+ selected_image = f"{IMAGE_FILE_PATH}/{uploadedfile.name}"
104
+ elif img_options:
105
+ selected_image = f"{IMAGE_FILE_PATH}/{img_options}"
106
+
107
+ st.subheader('Classified Image')
108
+
109
+ image_size = st.slider('Image Size', 400, 1000, value=400, step = 100)
110
+
111
+ try:
112
+ st.image(f"{IMAGE_FILE_PATH}/{selected_image}", width=image_size)
113
+ except:
114
+ st.image(selected_image, width=image_size)
115
+
116
+ st.subheader('Classification')
117
+
118
+ spark = init_spark()
119
+ Pipeline = create_pipeline(model)
120
+ output = fit_data(Pipeline, selected_image)
121
+
122
+ st.markdown(f'This document has been classified as : **{output}**')
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+
9
+ # Install required packages
10
+ RUN apt-get update && apt-get install -y \
11
+ tar \
12
+ wget \
13
+ bash \
14
+ rsync \
15
+ gcc \
16
+ libfreetype6-dev \
17
+ libhdf5-serial-dev \
18
+ libpng-dev \
19
+ libzmq3-dev \
20
+ python3 \
21
+ python3-dev \
22
+ python3-pip \
23
+ unzip \
24
+ pkg-config \
25
+ software-properties-common \
26
+ graphviz \
27
+ openjdk-8-jdk \
28
+ ant \
29
+ ca-certificates-java \
30
+ && apt-get clean \
31
+ && update-ca-certificates -f;
32
+
33
+ # Install Python 3.8 and pip
34
+ RUN add-apt-repository ppa:deadsnakes/ppa \
35
+ && apt-get update \
36
+ && apt-get install -y python3.8 python3-pip \
37
+ && apt-get clean;
38
+
39
+ # Set up JAVA_HOME
40
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
41
+ RUN mkdir -p ${HOME} \
42
+ && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
43
+ && chown -R ${NB_UID}:${NB_UID} ${HOME}
44
+
45
+ # Create a new user named "jovyan" with user ID 1000
46
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
47
+
48
+ # Switch to the "jovyan" user
49
+ USER ${NB_USER}
50
+
51
+ # Set home and path variables for the user
52
+ ENV HOME=/home/${NB_USER} \
53
+ PATH=/home/${NB_USER}/.local/bin:$PATH
54
+
55
+ # Set the working directory to the user's home directory
56
+ WORKDIR ${HOME}
57
+
58
+ # Upgrade pip and install Python dependencies
59
+ RUN python3.8 -m pip install --upgrade pip
60
+ COPY requirements.txt /tmp/requirements.txt
61
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
62
+
63
+ # Copy the application code into the container at /home/jovyan
64
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
65
+
66
+ # Expose port for Streamlit
67
+ EXPOSE 7860
68
+
69
+ # Define the entry point for the container
70
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
inputs/image-1.png ADDED
inputs/image-10.png ADDED
inputs/image-11.png ADDED
inputs/image-2.png ADDED
inputs/image-3.png ADDED
inputs/image-4.png ADDED
inputs/image-5.png ADDED
inputs/image-6.png ADDED
inputs/image-7.png ADDED
inputs/image-8.png ADDED
inputs/image-9.png ADDED
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ .benchmark-table {
35
+ width: 100%;
36
+ border-collapse: collapse;
37
+ margin-top: 20px;
38
+ }
39
+ .benchmark-table th, .benchmark-table td {
40
+ border: 1px solid #ddd;
41
+ padding: 8px;
42
+ text-align: left;
43
+ }
44
+ .benchmark-table th {
45
+ background-color: #4A90E2;
46
+ color: white;
47
+ }
48
+ .benchmark-table td {
49
+ background-color: #f2f2f2;
50
+ }
51
+ </style>
52
+ """, unsafe_allow_html=True)
53
+
54
+ # Main Title
55
+ st.markdown('<div class="main-title">ConvNeXT Image Classification</div>', unsafe_allow_html=True)
56
+
57
+ # Description
58
+ st.markdown("""
59
+ <div class="section">
60
+ <p><strong>ConvNeXT</strong> is a state-of-the-art image classification model developed by Facebook. The model <strong>ConvNextForImageClassification</strong> can load ConvNeXT models that compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets.</p>
61
+ <p>This annotator is compatible with all the models trained/fine-tuned by using ConvNextForImageClassification for PyTorch or TFConvNextForImageClassification for TensorFlow models in Hugging Face.</p>
62
+ <p>The model used in this demo is <code>image_classifier_convnext_tiny_224_local</code>, adapted from Hugging Face and curated for scalability and production-readiness using Spark NLP.</p>
63
+ </div>
64
+ """, unsafe_allow_html=True)
65
+
66
+ # Image Classification Overview
67
+ st.markdown('<div class="sub-title">What is Image Classification?</div>', unsafe_allow_html=True)
68
+ st.markdown("""
69
+ <div class="section">
70
+ <p><strong>Image Classification</strong> is a computer vision task where an algorithm is trained to recognize and classify objects within images. This process involves assigning a label or category to an image based on its visual content.</p>
71
+ <h2>How It Works</h2>
72
+ <p>Image classification typically involves the following steps:</p>
73
+ <ul>
74
+ <li><strong>Data Collection</strong>: Gather a dataset of labeled images.</li>
75
+ <li><strong>Preprocessing</strong>: Normalize and resize images to prepare them for the model.</li>
76
+ <li><strong>Model Training</strong>: Use a machine learning model, such as ConvNeXT, to learn patterns and features from the images.</li>
77
+ <li><strong>Inference</strong>: Apply the trained model to new images to predict their labels.</li>
78
+ </ul>
79
+ <h2>Why Use Image Classification?</h2>
80
+ <p>Image classification can automate and streamline many tasks, such as:</p>
81
+ <ul>
82
+ <li>Identifying objects in photos for content tagging.</li>
83
+ <li>Enhancing search functionality by categorizing images.</li>
84
+ <li>Supporting autonomous systems like self-driving cars.</li>
85
+ </ul>
86
+ <h2>Applications</h2>
87
+ <p>Applications of image classification span across various industries:</p>
88
+ <ul>
89
+ <li><strong>Healthcare</strong>: Diagnosing diseases from medical images.</li>
90
+ <li><strong>Retail</strong>: Sorting and tagging product images.</li>
91
+ <li><strong>Security</strong>: Facial recognition for authentication.</li>
92
+ </ul>
93
+ <h2>Importance</h2>
94
+ <p>Image classification is crucial because it enables machines to interpret visual data, which is essential for creating intelligent systems capable of understanding and interacting with the world in a more human-like manner.</p>
95
+ <p>The <strong>ConvNeXT</strong> model used in this example is a state-of-the-art approach for image classification, offering advanced performance and scalability. It utilizes convolutional architecture to capture intricate patterns and relationships within images, enhancing classification accuracy and efficiency.</p>
96
+ </div>
97
+ """, unsafe_allow_html=True)
98
+
99
+ # How to Use
100
+ st.markdown('<div class="sub-title">How to Use the Model</div>', unsafe_allow_html=True)
101
+ st.code('''
102
+ import sparknlp
103
+ from sparknlp.base import *
104
+ from sparknlp.annotator import *
105
+ from pyspark.ml import Pipeline
106
+
107
+ # Load image data
108
+ imageDF = spark.read \\
109
+ .format("image") \\
110
+ .option("dropInvalid", value = True) \\
111
+ .load("src/test/resources/image/")
112
+
113
+ # Define Image Assembler
114
+ imageAssembler = ImageAssembler() \\
115
+ .setInputCol("image") \\
116
+ .setOutputCol("image_assembler")
117
+
118
+ # Define ConvNeXT classifier
119
+ imageClassifier = ConvNextForImageClassification \\
120
+ .pretrained("image_classifier_convnext_tiny_224_local", "en") \\
121
+ .setInputCols(["image_assembler"]) \\
122
+ .setOutputCol("class")
123
+
124
+ # Create pipeline
125
+ pipeline = Pipeline().setStages([imageAssembler, imageClassifier])
126
+
127
+ # Apply pipeline to image data
128
+ pipelineDF = pipeline.fit(imageDF).transform(imageDF)
129
+
130
+ # Show results
131
+ pipelineDF \\
132
+ .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "class.result") \\
133
+ .show(truncate=False)
134
+ ''', language='python')
135
+
136
+ # Results
137
+ st.markdown('<div class="sub-title">Results</div>', unsafe_allow_html=True)
138
+ st.markdown("""
139
+ <div class="section">
140
+ <table class="benchmark-table">
141
+ <tr>
142
+ <th>Image Name</th>
143
+ <th>Result</th>
144
+ </tr>
145
+ <tr>
146
+ <td>dog.JPEG</td>
147
+ <td>[whippet]</td>
148
+ </tr>
149
+ <tr>
150
+ <td>cat.JPEG</td>
151
+ <td>[Siamese]</td>
152
+ </tr>
153
+ <tr>
154
+ <td>bird.JPEG</td>
155
+ <td>[peacock]</td>
156
+ </tr>
157
+ </table>
158
+ </div>
159
+ """, unsafe_allow_html=True)
160
+
161
+ # Model Information
162
+ st.markdown('<div class="sub-title">Model Information</div>', unsafe_allow_html=True)
163
+ st.markdown("""
164
+ <div class="section">
165
+ <table class="benchmark-table">
166
+ <tr>
167
+ <th>Attribute</th>
168
+ <th>Description</th>
169
+ </tr>
170
+ <tr>
171
+ <td><strong>Model Name</strong></td>
172
+ <td>image_classifier_convnext_tiny_224_local</td>
173
+ </tr>
174
+ <tr>
175
+ <td><strong>Compatibility</strong></td>
176
+ <td>Spark NLP 5.0.0+</td>
177
+ </tr>
178
+ <tr>
179
+ <td><strong>License</strong></td>
180
+ <td>Open Source</td>
181
+ </tr>
182
+ <tr>
183
+ <td><strong>Edition</strong></td>
184
+ <td>Official</td>
185
+ </tr>
186
+ <tr>
187
+ <td><strong>Input Labels</strong></td>
188
+ <td>[image_assembler]</td>
189
+ </tr>
190
+ <tr>
191
+ <td><strong>Output Labels</strong></td>
192
+ <td>[class]</td>
193
+ </tr>
194
+ <tr>
195
+ <td><strong>Language</strong></td>
196
+ <td>en</td>
197
+ </tr>
198
+ <tr>
199
+ <td><strong>Size</strong></td>
200
+ <td>107.6 MB</td>
201
+ </tr>
202
+ </table>
203
+ </div>
204
+ """, unsafe_allow_html=True)
205
+
206
+ # Predicted Entities
207
+ st.markdown('<div class="sub-title">Predicted Entities</div>', unsafe_allow_html=True)
208
+ st.markdown("""
209
+ <div class="section">
210
+ <ul>
211
+ <li>turnstile</li>
212
+ <li>damselfly</li>
213
+ <li>mixing bowl</li>
214
+ <li>sea snake</li>
215
+ <li>cockroach</li>
216
+ <li>...and many more</li>
217
+ </ul>
218
+ </div>
219
+ """, unsafe_allow_html=True)
220
+
221
+ # Data Source Section
222
+ st.markdown('<div class="sub-title">Data Source</div>', unsafe_allow_html=True)
223
+ st.markdown("""
224
+ <div class="section">
225
+ <p>The ConvNeXT model is available on <a class="link" href="https://huggingface.co/models" target="_blank">Hugging Face</a>. This model was trained on a large dataset of images and can be used for accurate image classification.</p>
226
+ </div>
227
+ """, unsafe_allow_html=True)
228
+
229
+ # References
230
+ st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
231
+ st.markdown("""
232
+ <div class="section">
233
+ <ul>
234
+ <li><a class="link" href="https://sparknlp.org/2023/07/05/image_classifier_convnext_tiny_224_local_en.html" target="_blank" rel="noopener">ConvNeXT Model on Spark NLP</a></li>
235
+ <li><a class="link" href="https://huggingface.co/facebook/convnext-tiny-224" target="_blank" rel="noopener">ConvNeXT Model on Hugging Face</a></li>
236
+ <li><a class="link" href="https://github.com/facebookresearch/ConvNeXT" target="_blank" rel="noopener">ConvNeXT GitHub Repository</a></li>
237
+ <li><a class="link" href="https://arxiv.org/abs/2201.03545" target="_blank" rel="noopener">ConvNeXT Paper</a></li>
238
+ </ul>
239
+ </div>
240
+ """, unsafe_allow_html=True)
241
+
242
+ # Community & Support
243
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
244
+ st.markdown("""
245
+ <div class="section">
246
+ <ul>
247
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
248
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
249
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
250
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
251
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
252
+ </ul>
253
+ </div>
254
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit-tags
3
+ pandas
4
+ numpy
5
+ spark-nlp
6
+ pyspark