EdBoy2202 commited on
Commit
ea53bcc
·
verified ·
1 Parent(s): 68a22c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -665
app.py CHANGED
@@ -1,208 +1,63 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
  import joblib
 
 
 
5
  import matplotlib.pyplot as plt
6
- import os
7
- import openai
8
  from sklearn.preprocessing import LabelEncoder
9
- import requests # Add this at the top with other imports
10
- from io import BytesIO
11
- import gdown
12
  from huggingface_hub import hf_hub_download
13
 
14
- # --- Set page configuration ---
15
- st.set_page_config(
16
- page_title="The Guide",
17
- page_icon="🚗",
18
- layout="wide",
19
- initial_sidebar_state="expanded"
20
- )
21
 
22
- # --- Custom CSS for better styling ---
 
23
 
24
- st.markdown("""
25
- <style>
26
- /* Base styles */
27
- * {
28
- color: black !important;
29
- }
30
-
31
- /* Streamlit specific input elements */
32
- .stSelectbox,
33
- .stNumberInput,
34
- .stTextInput {
35
- color: black !important;
36
- }
37
-
38
- /* Dropdown and select elements */
39
- select option,
40
- .streamlit-selectbox option,
41
- .stSelectbox > div[data-baseweb="select"] > div,
42
- .stSelectbox > div > div > div {
43
- color: black !important;
44
- background-color: white !important;
45
- }
46
-
47
- /* Input fields */
48
- input,
49
- .stNumberInput > div > div > input {
50
- color: black !important;
51
- }
52
-
53
- /* Text elements */
54
- div.row-widget.stSelectbox > div,
55
- div.row-widget.stSelectbox > div > div > div,
56
- .streamlit-expanderContent,
57
- .stMarkdown,
58
- p, span, label {
59
- color: black !important;
60
- }
61
-
62
- /* Keep button text white */
63
- .stButton > button {
64
- color: white !important;
65
- background-color: #FF4B4B;
66
- }
67
-
68
- /* Specific styling for select boxes */
69
- div[data-baseweb="select"] {
70
- color: black !important;
71
- background-color: white !important;
72
- }
73
-
74
- div[data-baseweb="select"] * {
75
- color: black !important;
76
- }
77
-
78
- /* Style for the selected option */
79
- div[data-baseweb="select"] > div:first-child {
80
- color: black !important;
81
- background-color: white !important;
82
- }
83
-
84
- /* Dropdown menu items */
85
- [role="listbox"] {
86
- background-color: white !important;
87
- }
88
-
89
- [role="listbox"] [role="option"] {
90
- color: black !important;
91
- }
92
-
93
- /* Number input specific styling */
94
- input[type="number"] {
95
- color: black !important;
96
- background-color: white !important;
97
- }
98
-
99
- .stNumberInput div[data-baseweb="input"] {
100
- background-color: white !important;
101
- }
102
 
103
- /* Headers */
104
- h1, h2, h3, h4, h5, h6 {
105
- color: black !important;
106
- }
107
- </style>
108
- """, unsafe_allow_html=True)
109
 
110
- # --- Cache functions ---
111
- def create_brand_categories():
112
- return {
113
- 'luxury_brands': {
114
- 'rolls-royce': (300000, 600000),
115
- 'bentley': (200000, 500000),
116
- 'lamborghini': (250000, 550000),
117
- 'ferrari': (250000, 600000),
118
- 'mclaren': (200000, 500000),
119
- 'aston-martin': (150000, 400000),
120
- 'maserati': (100000, 300000)
121
- },
122
- 'premium_brands': {
123
- 'porsche': (60000, 150000),
124
- 'bmw': (40000, 90000),
125
- 'mercedes-benz': (45000, 95000),
126
- 'audi': (35000, 85000),
127
- 'lexus': (40000, 80000),
128
- 'jaguar': (45000, 90000),
129
- 'land-rover': (40000, 90000),
130
- 'volvo': (35000, 75000),
131
- 'infiniti': (35000, 70000),
132
- 'cadillac': (40000, 85000),
133
- 'tesla': (40000, 100000)
134
- },
135
- 'mid_tier_brands': {
136
- 'acura': (30000, 50000),
137
- 'lincoln': (35000, 65000),
138
- 'buick': (25000, 45000),
139
- 'chrysler': (25000, 45000),
140
- 'alfa-romeo': (35000, 60000),
141
- 'genesis': (35000, 60000)
142
- },
143
- 'standard_brands': {
144
- 'toyota': (20000, 35000),
145
- 'honda': (20000, 35000),
146
- 'volkswagen': (20000, 35000),
147
- 'mazda': (20000, 32000),
148
- 'subaru': (22000, 35000),
149
- 'hyundai': (18000, 32000),
150
- 'kia': (17000, 30000),
151
- 'ford': (20000, 40000),
152
- 'chevrolet': (20000, 38000),
153
- 'gmc': (25000, 45000),
154
- 'jeep': (25000, 45000),
155
- 'dodge': (22000, 40000),
156
- 'ram': (25000, 45000),
157
- 'nissan': (18000, 32000)
158
- },
159
- 'economy_brands': {
160
- 'mitsubishi': (15000, 25000),
161
- 'suzuki': (12000, 22000),
162
- 'fiat': (15000, 25000),
163
- 'mini': (20000, 35000),
164
- 'smart': (15000, 25000)
165
- },
166
- 'discontinued_brands': {
167
- 'pontiac': (5000, 15000),
168
- 'saturn': (4000, 12000),
169
- 'mercury': (4000, 12000),
170
- 'oldsmobile': (3000, 10000),
171
- 'plymouth': (3000, 10000),
172
- 'saab': (5000, 15000)
173
- }
174
- }
175
 
176
- @st.cache_data
177
- def load_datasets():
178
- try:
179
- with st.spinner('Loading dataset...'):
180
- # Use BytesIO to read the CSV content
181
- original_data = pd.read_csv('CTP_Model1.csv', low_memory=False)
182
-
183
-
184
- # Ensure column names match the model's expectations
185
- original_data.columns = original_data.columns.str.strip().str.capitalize()
186
- return original_data
187
- except Exception as e:
188
- st.error(f"Error loading dataset: {str(e)}")
189
- raise e
190
 
191
- @st.cache_resource
192
  def load_model_and_encodings():
193
  try:
194
- # Show loading message
195
  with st.spinner('Loading model...'):
196
  model_content = hf_hub_download(repo_id="EdBoy2202/car_prediction_model", filename="car_price_modelv3.pkl")
197
  model = joblib.load(model_content)
 
 
198
 
199
- # Load data for encodings
200
- original_data = load_datasets()
201
-
202
- # Create fresh encoders from data
203
  label_encoders = {}
204
  categorical_features = ['Make', 'model', 'condition', 'fuel', 'title_status',
205
- 'transmission', 'drive', 'size', 'type', 'paint_color']
206
 
207
  for feature in categorical_features:
208
  if feature in original_data.columns:
@@ -216,487 +71,94 @@ def load_model_and_encodings():
216
  st.error(f"Error loading model: {str(e)}")
217
  raise e
218
 
219
-
220
- # --- Load data and models ---
221
- try:
222
- original_data = load_datasets()
223
- model, label_encoders = load_model_and_encodings() # Using the new function
224
- except Exception as e:
225
- st.error(f"Error loading data or models: {str(e)}")
226
- st.stop()
227
-
228
- # --- Define categorical and numeric features ---
229
- # From model.py
230
- # --- Define features ---
231
- numeric_features = ['year', 'odometer', 'age', 'age_squared', 'mileage_per_year']
232
- # Update the categorical features list to use lowercase
233
- categorical_features = ['make', 'model', 'condition', 'fuel', 'title_status',
234
- 'transmission', 'drive', 'size', 'type', 'paint_color']
235
- required_features = numeric_features + categorical_features
236
-
237
- # --- Feature engineering functions ---
238
- def create_features(df):
239
- df = df.copy()
240
- current_year = 2024
241
- df['age'] = current_year - df['year']
242
- df['age_squared'] = df['age'] ** 2
243
- df['mileage_per_year'] = np.clip(df['odometer'] / (df['age'] + 1), 0, 200000)
244
- return df
245
-
246
- def prepare_input(input_dict, label_encoders):
247
- # Convert None values to 'unknown' for safe handling
248
- input_dict = {k: v if v is not None else 'unknown' for k, v in input_dict.items()}
249
-
250
- # Convert input dictionary to DataFrame
251
- input_df = pd.DataFrame([input_dict])
252
-
253
- # Ensure columns match the model's expected casing
254
- feature_name_mapping = {
255
- "make": "Make", # Match casing for 'Make'
256
- "model": "Model", # Match casing for 'Model'
257
- "condition": "Condition",
258
- "fuel": "Fuel",
259
- "title_status": "Title_status",
260
- "transmission": "Transmission",
261
- "drive": "Drive",
262
- "size": "Size",
263
- "type": "Type",
264
- "paint_color": "Paint_color",
265
- "year": "Year",
266
- "odometer": "Odometer",
267
- "age": "Age",
268
- "age_squared": "Age_squared",
269
- "mileage_per_year": "Mileage_per_year"
270
- }
271
- input_df.rename(columns=feature_name_mapping, inplace=True)
272
-
273
- # Numeric feature conversions
274
- input_df["Year"] = pd.to_numeric(input_df.get("Year", 0), errors="coerce")
275
- input_df["Odometer"] = pd.to_numeric(input_df.get("Odometer", 0), errors="coerce")
276
-
277
- # Feature engineering
278
- current_year = 2024
279
- input_df["Age"] = current_year - input_df["Year"]
280
- input_df["Age_squared"] = input_df["Age"] ** 2
281
- input_df["Mileage_per_year"] = input_df["Odometer"] / (input_df["Age"] + 1)
282
- input_df["Mileage_per_year"] = input_df["Mileage_per_year"].clip(0, 200000)
283
-
284
- # Encode categorical features
285
- for feature, encoded_feature in feature_name_mapping.items():
286
- if feature in label_encoders:
287
- input_df[encoded_feature] = input_df[encoded_feature].fillna("unknown").astype(str).str.strip()
288
- try:
289
- input_df[encoded_feature] = label_encoders[feature].transform(input_df[encoded_feature])
290
- except ValueError:
291
- input_df[encoded_feature] = 0 # Assign default for unseen values
292
-
293
- # Ensure all required features are present
294
- for feature in model.feature_names_in_:
295
- if feature not in input_df:
296
- input_df[feature] = 0 # Default value for missing features
297
-
298
- # Reorder columns
299
- input_df = input_df[model.feature_names_in_]
300
-
301
- return input_df
302
-
303
-
304
-
305
- # --- Styling functions ---
306
- st.markdown("""
307
- <style>
308
- /* Force black text globally */
309
- .stApp, .stApp * {
310
- color: black !important;
311
- }
312
-
313
- /* Specific overrides for different elements */
314
- .main {
315
- padding: 0rem 1rem;
316
- }
317
-
318
- .stButton>button {
319
- width: 100%;
320
- background-color: #FF4B4B;
321
- color: white !important; /* Keep button text white */
322
- border-radius: 5px;
323
- padding: 0.5rem 1rem;
324
- border: none;
325
- }
326
-
327
- .stButton>button:hover {
328
- background-color: #FF6B6B;
329
- }
330
-
331
- .sidebar .sidebar-content {
332
- background-color: #f5f5f5;
333
- }
334
-
335
- /* Input fields and selectboxes */
336
- .stSelectbox select,
337
- .stSelectbox option,
338
- .stSelectbox div,
339
- .stNumberInput input,
340
- .stTextInput input {
341
- color: black !important;
342
- }
343
-
344
- /* Headers */
345
- h1, h2, h3, h4, h5, h6 {
346
- color: black !important;
347
- }
348
-
349
- /* Labels and text */
350
- label, .stText, p, span {
351
- color: black !important;
352
- }
353
-
354
- /* Selectbox options */
355
- option {
356
- color: black !important;
357
- background-color: white !important;
358
- }
359
-
360
- /* Override for any Streamlit specific classes */
361
- .st-emotion-cache-16idsys p,
362
- .st-emotion-cache-1wmy9hl p,
363
- .st-emotion-cache-16idsys span,
364
- .st-emotion-cache-1wmy9hl span {
365
- color: black !important;
366
- }
367
-
368
- /* Force white text only for the prediction button */
369
- .stButton>button[data-testid="stButton"] {
370
- color: white !important;
371
- }
372
- </style>
373
- """, unsafe_allow_html=True)
374
-
375
- def style_metric_container(label, value):
376
- st.markdown(f"""
377
- <div style="
378
- background-color: #f8f9fa;
379
- padding: 1rem;
380
- border-radius: 5px;
381
- margin: 0.5rem 0;
382
- border-left: 5px solid #FF4B4B;
383
- ">
384
- <p style="color: #666; margin-bottom: 0.2rem; font-size: 0.9rem;">{label}</p>
385
- <p style="color: #1E1E1E; font-size: 1.5rem; font-weight: 600; margin: 0;">{value}</p>
386
- </div>
387
- """, unsafe_allow_html=True)
388
-
389
- # --- OpenAI GPT-3 Assistant ---
390
- def generate_gpt_response(prompt, dataset):
391
- """
392
- First look up the dataset for relevant information. If no matches are found,
393
- generate a GPT response.
394
- """
395
- # Extract make and model from the prompt (simplified NLP parsing)
396
- prompt_lower = prompt.lower()
397
- make = None
398
- model = None
399
-
400
- # Example: Parse make and model from user query
401
- for word in prompt_lower.split():
402
- if word in dataset['Make'].str.lower().unique():
403
- make = word
404
- elif word in dataset['Model'].str.lower().unique():
405
- model = word
406
-
407
- # If we find relevant data, use it to respond
408
- if make:
409
- dataset_response = search_dataset(dataset, make, model)
410
- if dataset_response is not None:
411
- st.write("### Dataset Match Found")
412
- st.dataframe(dataset_response) # Show results to the user
413
- return f"I found some information in our dataset about {make.title()} {model.title() if model else ''}. Please see the details above."
414
-
415
- # Ensure the API key is set securely
416
- # You can use Streamlit's secrets management or environment variables
417
- openai.api_key = os.getenv("GPT_TOKEN")
418
-
419
- # Define the system message and messages list
420
- system_message = {
421
- "role": "system",
422
- "content": (
423
- "You are a helpful car shopping assistant. "
424
- "Provide car recommendations based on user queries. "
425
- "Include car makes, models, years, and approximate prices. "
426
- "Be friendly and informative."
427
- )
428
- }
429
-
430
- messages = [system_message, {"role": "user", "content": prompt}]
431
-
432
- # Call the OpenAI ChatCompletion API
433
- response = openai.ChatCompletion.create(
434
- model="gpt-3.5-turbo", # or "gpt-4" if you have access
435
- messages=messages,
436
- max_tokens=500,
437
- n=1,
438
- stop=None,
439
- temperature=0.7,
440
- )
441
-
442
- # Extract the assistant's reply
443
- assistant_reply = response['choices'][0]['message']['content'].strip()
444
-
445
- return assistant_reply
446
-
447
- def create_assistant_section(dataset):
448
- st.markdown("""
449
- <div style='background-color: #f8f9fa; padding: 1.5rem; border-radius: 10px; margin-bottom: 1rem;'>
450
- <h2 style='color: #1E1E1E; margin-top: 0;'>🤖 Car Shopping Assistant</h2>
451
- <p style='color: #666;'>Ask me anything about cars! For example: 'What's a good car under $30,000 with low mileage?'</p>
452
- </div>
453
- """, unsafe_allow_html=True)
454
-
455
- if "assistant_responses" not in st.session_state:
456
- st.session_state.assistant_responses = []
457
-
458
- prompt = st.text_input("Ask about car recommendations...",
459
- placeholder="Type your question here...")
460
-
461
- if prompt:
462
- try:
463
- # Use OpenAI API to generate response
464
- response = generate_gpt_response(prompt, dataset)
465
- st.session_state.assistant_responses.append(response)
466
- except Exception as e:
467
- response = f"Sorry, I encountered an error: {str(e)}"
468
- st.session_state.assistant_responses.append(response)
469
-
470
- # Display the latest response
471
- st.write(response)
472
-
473
- # Optionally display previous responses
474
- if len(st.session_state.assistant_responses) > 1:
475
- st.markdown("### Previous Responses")
476
- for prev_response in st.session_state.assistant_responses[:-1]:
477
- st.markdown("---")
478
- st.write(prev_response)
479
-
480
- if st.button("Clear Chat"):
481
- st.session_state.assistant_responses = []
482
- st.experimental_rerun()
483
-
484
- # --- Prediction Interface ---
485
- def create_prediction_interface():
486
- with st.sidebar:
487
- st.markdown("""
488
- <div style='background-color: #FF4B4B; padding: 1rem; border-radius: 5px; margin-bottom: 2rem;'>
489
- <h2 style='color: white; margin: 0;'>Car Details</h2>
490
- </div>
491
- """, unsafe_allow_html=True)
492
-
493
- # Year slider
494
- year = st.slider("Year", min_value=1980, max_value=2024, value=2022)
495
-
496
- # Make selection
497
- make_options = sorted(original_data['Make'].dropna().unique()) # Correct casing for 'Make'
498
- make = st.selectbox("Make", options=make_options)
499
-
500
- # Filter models based on selected make
501
- filtered_models = sorted(original_data[original_data['Make'] == make]['Model'].dropna().unique()) # Match 'Model' casing
502
- model_name = st.selectbox("Model", options=filtered_models if len(filtered_models) > 0 else ["No models available"])
503
-
504
- if model_name == "No models available":
505
- st.warning("No models are available for the selected make.")
506
-
507
- # Additional inputs
508
- condition = st.selectbox("Condition", ['new', 'like new', 'excellent', 'good', 'fair', 'salvage', 'parts only'])
509
- fuel = st.selectbox("Fuel Type", sorted(original_data['Fuel'].fillna('Unknown').unique())) # Match casing for 'Fuel'
510
- odometer = st.number_input("Odometer (miles)", min_value=0, value=20000, format="%d", step=1000)
511
- title_status = st.selectbox("Title Status", sorted(original_data['Title_status'].fillna('Unknown').unique())) # Match casing
512
- transmission = st.selectbox("Transmission", sorted(original_data['Transmission'].fillna('Unknown').unique()))
513
- drive = st.selectbox("Drive Type", sorted(original_data['Drive'].fillna('Unknown').unique()))
514
- size = st.selectbox("Size", sorted(original_data['Size'].fillna('Unknown').unique()))
515
- paint_color = st.selectbox("Paint Color", sorted(original_data['Paint_color'].fillna('Unknown').unique()))
516
-
517
- car_type = 'sedan' # Default type
518
-
519
- # Prediction button
520
- predict_button = st.button("📊 Predict Price", use_container_width=True)
521
-
522
- return {
523
- 'year': year,
524
- 'make': make.strip(), # Use correctly cased `make`
525
- 'model': model_name if model_name != "No models available" else 'unknown',
526
- 'condition': condition.lower().strip(),
527
- 'fuel': fuel.lower().strip(),
528
- 'odometer': odometer,
529
- 'title_status': title_status.lower().strip(),
530
- 'transmission': transmission.lower().strip(),
531
- 'drive': drive.lower().strip(),
532
- 'size': size.lower().strip(),
533
- 'type': car_type.lower().strip(),
534
- 'paint_color': paint_color.lower().strip()
535
- }, predict_button
536
-
537
-
538
-
539
- def create_market_trends_plot_with_model(model, make, base_inputs, label_encoders, years_range=range(1980, 2025)):
540
- predictions = []
541
-
542
- for year in years_range:
543
- try:
544
- current_inputs = base_inputs.copy()
545
- current_inputs['year'] = float(year)
546
- age = 2024 - year
547
-
548
- # Base value calculation
549
- base_price = 30000 # Average new car price
550
-
551
- # Depreciation curve
552
- if age <= 1:
553
- value_factor = 0.85 # 15% first year depreciation
554
- elif age <= 5:
555
- value_factor = 0.85 * (0.90 ** (age - 1)) # 10% years 2-5
556
- else:
557
- value_factor = 0.85 * (0.90 ** 4) * (0.95 ** (age - 5)) # 5% thereafter
558
-
559
- price = base_price * value_factor
560
- predictions.append({"year": year, "predicted_price": max(price, 2000)}) # Floor of $2000
561
-
562
- except Exception as e:
563
- continue
564
-
565
- if not predictions:
566
- return None
567
-
568
- predictions_df = pd.DataFrame(predictions)
569
- fig, ax = plt.subplots(figsize=(12, 6))
570
- ax.plot(predictions_df["year"], predictions_df["predicted_price"], color="#FF4B4B", linewidth=2)
571
- ax.set_title(f"Average Car Value by Age")
572
- ax.set_xlabel("Year")
573
- ax.set_ylabel("Value ($)")
574
- ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'${x:,.0f}'))
575
- plt.grid(True, alpha=0.3)
576
-
577
- return fig
578
-
579
- def inspect_model_features(model):
580
- # Check feature names the model expects
581
- try:
582
- if hasattr(model, "feature_names_in_"):
583
- print("Model feature names:", model.feature_names_in_)
584
- else:
585
- print("Model does not have 'feature_names_in_' attribute.")
586
- except Exception as e:
587
- print(f"Error inspecting model features: {e}")
588
-
589
- def predict_with_ranges(inputs, model, label_encoders):
590
- input_df = prepare_input(inputs, label_encoders)
591
- base_prediction = float(np.expm1(model.predict(input_df)[0]))
592
-
593
- brand_categories = create_brand_categories()
594
- make = inputs['make'].lower()
595
- year = inputs['year']
596
- condition = inputs['condition']
597
- odometer = inputs['odometer']
598
- age = 2024 - year
599
-
600
- # Find brand category and price range
601
- price_range = None
602
- for category, brands in brand_categories.items():
603
- if make in brands:
604
- price_range = brands[make]
605
- break
606
- if not price_range:
607
- price_range = (15000, 35000) # Default range
608
-
609
- # Calculate adjustment factors
610
- mileage_factor = max(1 - (odometer / 200000) * 0.3, 0.7)
611
- age_factor = 0.85 ** min(age, 15)
612
- condition_factor = {
613
- 'new': 1.0,
614
- 'like new': 0.9,
615
- 'excellent': 0.8,
616
- 'good': 0.7,
617
- 'fair': 0.5,
618
- 'salvage': 0.3
619
- }.get(condition, 0.7)
620
-
621
- # Apply all factors
622
- min_price = price_range[0] * mileage_factor * age_factor * condition_factor
623
- max_price = price_range[1] * mileage_factor * age_factor * condition_factor
624
- predicted_price = base_prediction * mileage_factor * age_factor * condition_factor
625
-
626
- # Use uniform distribution instead of clamping
627
- final_prediction = np.random.uniform(min_price, max_price)
628
-
629
- return {
630
- 'predicted_price': final_prediction,
631
- 'min_price': min_price,
632
- 'max_price': max_price
633
- }
634
- # --- Main Application ---
635
- def main(model, label_encoders, dataset):
636
- col1, col2 = st.columns([2, 1])
637
-
638
- with col1:
639
- st.markdown("""
640
- <h1 style='text-align: center;'>The Guide 🚗</h1>
641
- <p style='text-align: center; color: #666; font-size: 1.1rem; margin-bottom: 2rem;'>
642
- A cutting-edge data science project leveraging machine learning to detect which car would be best for you.
643
- </p>
644
- """, unsafe_allow_html=True)
645
-
646
- inputs, predict_button = create_prediction_interface()
647
-
648
- # Prepare base inputs
649
- base_inputs = {
650
- "year": inputs.get("year", 2022),
651
- "make": inputs.get("make", "toyota").lower(),
652
- "model": inputs.get("model", "camry"),
653
- "odometer": inputs.get("odometer", 20000),
654
- "condition": inputs.get("condition", "good"),
655
- "fuel": inputs.get("fuel", "gas"),
656
- "title_status": inputs.get("title_status", "clean"),
657
- "transmission": inputs.get("transmission", "automatic"),
658
- "drive": inputs.get("drive", "fwd"),
659
- "size": inputs.get("size", "mid-size"),
660
- "paint_color": inputs.get("paint_color", "black"),
661
- "type": inputs.get("type", "sedan")
662
- }
663
-
664
- if base_inputs["condition"] == "new":
665
- base_inputs["odometer"] = 0
666
-
667
- if predict_button:
668
- st.write(f"Analyzing {base_inputs['year']} {base_inputs['make'].title()} {base_inputs['model'].title()}...")
669
- prediction_results = predict_with_ranges(base_inputs, model, label_encoders)
670
-
671
- st.markdown(f"""
672
- ### Price Analysis
673
- - **Estimated Range**: ${prediction_results['min_price']:,.2f} - ${prediction_results['max_price']:,.2f}
674
- - **Model Prediction**: ${prediction_results['predicted_price']:,.2f}
675
 
676
- *Note: Range based on market data, condition, and mileage*
677
- """)
 
 
 
 
 
 
 
 
 
678
 
679
- # Generate and display the graph
680
- fig = create_market_trends_plot_with_model(model, base_inputs["make"], base_inputs, label_encoders)
681
- if fig:
682
- st.pyplot(fig)
683
  else:
684
- st.warning("No graph generated. Please check your data or selection.")
685
-
686
- with col2:
687
- create_assistant_section(dataset)
688
-
689
- if __name__ == "__main__":
690
- try:
691
- # Load data and model
692
- original_data = load_datasets()
693
- model, label_encoders = load_model_and_encodings()
694
-
695
- # Inspect model features
696
- inspect_model_features(model)
697
-
698
- # Call the main function
699
- main(model, label_encoders, original_data)
700
- except Exception as e:
701
- st.error(f"Error loading data or models: {str(e)}")
702
- st.stop()
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import openai
4
  import joblib
5
+ from PIL import Image
6
+ import requests
7
+ from io import BytesIO
8
  import matplotlib.pyplot as plt
9
+ import numpy as np
 
10
  from sklearn.preprocessing import LabelEncoder
 
 
 
11
  from huggingface_hub import hf_hub_download
12
 
13
+ # Function definitions
 
 
 
 
 
 
14
 
15
+ def load_image(image_file):
16
+ return Image.open(image_file)
17
 
18
+ def classify_image(image):
19
+ img_byte_arr = BytesIO()
20
+ image.save(img_byte_arr, format='PNG')
21
+ img_byte_arr = img_byte_arr.getvalue()
22
+
23
+ headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}
24
+ response = requests.post(
25
+ 'https://api-inference.huggingface.co/models/dima806/car_models_image_detection',
26
+ headers=headers,
27
+ files={"file": img_byte_arr}
28
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ if response.status_code == 200:
31
+ return response.json()
32
+ else:
33
+ st.error("Image classification failed. Please try again.")
34
+ return None
 
35
 
36
+ def find_closest_match(df, brand, model):
37
+ match = df[(df['make'].str.contains(brand, case=False)) & (df['model'].str.contains(model, case=False))]
38
+ if not match.empty:
39
+ return match.iloc[0]
40
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ def get_car_overview(car_data):
43
+ prompt = f"Provide an overview of the following car:\nYear: {car_data['year']}\nMake: {car_data['make']}\nModel: {car_data['model']}\nTrim: {car_data['trim']}\nPrice: ${car_data['price']}\nCondition: {car_data['condition']}\n"
44
+ response = openai.ChatCompletion.create(
45
+ model="gpt-3.5-turbo",
46
+ messages=[{"role": "user", "content": prompt}]
47
+ )
48
+ return response.choices[0].message['content']
 
 
 
 
 
 
 
49
 
 
50
  def load_model_and_encodings():
51
  try:
 
52
  with st.spinner('Loading model...'):
53
  model_content = hf_hub_download(repo_id="EdBoy2202/car_prediction_model", filename="car_price_modelv3.pkl")
54
  model = joblib.load(model_content)
55
+
56
+ original_data = load_datasets() # Ensure this function loads your CSV data
57
 
 
 
 
 
58
  label_encoders = {}
59
  categorical_features = ['Make', 'model', 'condition', 'fuel', 'title_status',
60
+ 'transmission', 'drive', 'size', 'type', 'paint_color']
61
 
62
  for feature in categorical_features:
63
  if feature in original_data.columns:
 
71
  st.error(f"Error loading model: {str(e)}")
72
  raise e
73
 
74
+ def predict_price(model, encoders, user_input):
75
+ # Transform user input into model input format
76
+ encoded_features = {feature: encoders[feature].transform([value])[0] if value in encoders[feature] else 0
77
+ for feature, value in user_input.items()}
78
+
79
+ # Create a DataFrame for prediction
80
+ input_data = pd.DataFrame([encoded_features])
81
+
82
+ # Predict price
83
+ predicted_price = model.predict(input_data)
84
+ return predicted_price[0]
85
+
86
+ # Streamlit App
87
+
88
+ st.title("Auto Appraise")
89
+ st.write("Capture a car image using your camera or upload an image to get its brand, model, overview, and expected price!")
90
+
91
+ # Load the CSV file
92
+ df = pd.read_csv('car_data.csv')
93
+
94
+ # Load model and encoders
95
+ model, label_encoders = load_model_and_encodings()
96
+
97
+ # Initialize OpenAI API key
98
+ openai.api_key = st.secrets["GPT_TOKEN"] # Your OpenAI API key
99
+ HUGGINGFACE_API_KEY = st.secrets["HF_TOKEN"] # Your Hugging Face API key
100
+
101
+ # Camera input for taking photo
102
+ camera_image = st.camera_input("Take a picture of the car!")
103
+
104
+ if camera_image is not None:
105
+ image = load_image(camera_image)
106
+ st.image(image, caption='Captured Image.', use_column_width=True)
107
+
108
+ # Classify the car image
109
+ car_info = classify_image(image)
110
+ if car_info:
111
+ brand = car_info['brand'] # Adjust according to response structure
112
+ model_name = car_info['model']
113
+ st.write(f"Identified Car: {brand} {model_name}")
114
+
115
+ # Find the closest match in the CSV
116
+ match = find_closest_match(df, brand, model_name)
117
+ if match is not None:
118
+ st.write("Closest Match Found:")
119
+ st.write(match)
120
+
121
+ # Get additional information using GPT-3.5-turbo
122
+ overview = get_car_overview(match)
123
+ st.write("Car Overview:")
124
+ st.write(overview)
125
+
126
+ # Interactive Price Prediction
127
+ st.subheader("Price Prediction Over Time")
128
+ selected_years = st.slider("Select range of years for price prediction",
129
+ min_value=2000, max_value=2023, value=(2010, 2023))
130
+
131
+ years = np.arange(selected_years[0], selected_years[1] + 1)
132
+ predicted_prices = []
133
+
134
+ for year in years:
135
+ user_input = {
136
+ 'Make': brand,
137
+ 'model': model_name,
138
+ 'condition': match['condition'],
139
+ 'fuel': match['fuel'],
140
+ 'title_status': match['title_status'],
141
+ 'transmission': match['transmission'],
142
+ 'drive': match['drive'],
143
+ 'size': match['size'],
144
+ 'type': match['type'],
145
+ 'paint_color': match['paint_color'],
146
+ 'year': year
147
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ price = predict_price(model, label_encoders, user_input)
150
+ predicted_prices.append(price)
151
+
152
+ # Plotting the results
153
+ plt.figure(figsize=(10, 5))
154
+ plt.plot(years, predicted_prices, marker='o')
155
+ plt.title(f"Predicted Price of {brand} {model_name} Over Time")
156
+ plt.xlabel("Year")
157
+ plt.ylabel("Predicted Price ($)")
158
+ plt.grid()
159
+ st.pyplot(plt)
160
 
 
 
 
 
161
  else:
162
+ st.write("No match found in the database.")
163
+ else:
164
+ st.write("Please take a picture of the car to proceed.")