File size: 4,996 Bytes
53dc0ac
 
 
 
 
 
 
 
 
 
 
 
 
cb8d566
53dc0ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import streamlit as st
import pandas as pd
import plotly.express as px

from src.st_helpers import st_setup
from src.datatypes import *

if st_setup('LLM Arch'):
    if not DataLoader.loaded:
        DataLoader.load_data()

    summary = st.container()
    with summary:
        st.write("# Data Browser")

        sumcol1, sumcol2 = st.columns([1, 3])

        with sumcol1:
            dbs = DataLoader.available_dbs()
            if len(dbs) > 1:
                st.write(f'**:1234: Database Selector**')
                idx = dbs.index(DataLoader.active_db)
                DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))

            st.write(f'**:1234: Summary Statistics**')
            summary = f'- **{len(Category.all):,}** categories'
            summary += f'\n- **{len(Product.all):,}** products'
            summary += f'\n- **{len(Feature.all):,}** features'
            summary += f'\n- **{len(Review.all):,}** reviews'
            st.markdown(summary)
            if st.button('Force data reload'):
                DataLoader.load_data(True)
                st.rerun()

        with sumcol2:
            cats = Category.all_sorted()

            with st.expander("**Review Counts**"):
                category_names = [c.name for c in cats]
                category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
                data = zip(category_names, category_review_counts)
                df = pd.DataFrame(data, columns=["Category", "Review Count"])
                st.bar_chart(df, x="Category", y="Review Count")

            with st.expander("**Product Ratings**"):
                data = []
                for c in cats:
                    for p in c.products:
                        data.append([c.name, p.average_rating])
                df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
                fig = px.box(df, x="Category", y="Mean Product Rating")
                fig.update_xaxes(tickangle=-90)
                st.plotly_chart(fig, use_container_width=True)

            with st.expander("**Product Prices**"):
                data = []
                for c in cats:
                    for p in c.products:
                        data.append([c.name, p.price])
                df = pd.DataFrame(data, columns=['Category', 'Price'])
                fig = px.box(df, x="Category", y="Price")
                fig.update_xaxes(tickangle=-90)
                st.plotly_chart(fig, use_container_width=True)

    subhead = st.container()

    col1, col2 = st.columns([1, 3])

    with col1:

        st.write('**Category**')
        cats = Category.all_sorted()
        options = [f"{c.name}" for c in cats]
        selection = st.radio("**Category**", options, label_visibility="collapsed")

    selected_category = Category.by_name(selection)

    with subhead:
        st.write(f'### {selection}')

    with col2:
        features = [f.name for f in selected_category.features]
        features.sort()
        st.write(f"**{selection[:-1]} Features ({len(features)}):**")
        st.write('; '.join(features))

        prod_index = [p.id for p in selected_category.products]
        prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
                      p.average_rating, False, p.description] for p in selected_category.products]
        prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
                        'Description']
        prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
        total_reviews = sum([p.review_count for p in selected_category.products])
        st.write(f"**{selection} ({len(prod_index)}). Having {total_reviews} reviews in total:**")
        edited_df = st.data_editor(prod_df, disabled=(
        'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))

        selected_product_count = edited_df['Show Reviews?'].sum()
        selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()

        st.write(f"**{selection[:-1]} Reviews ({selected_review_count} from {selected_product_count} products):**")
        if selected_review_count > 0:
            selected_products = list(edited_df[edited_df['Show Reviews?']].index)
            products = Product.for_ids(selected_products)
            rev_data = []
            rev_index = []
            for p in products:
                for r in p.reviews:
                    rev_index.append(r.id)
                    rev_data.append([p.name, r.rating, r.review_text])
            rev_columns = ['Product', 'Review Rating', 'Review Text']
            rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
            st.dataframe(rev_df, width=10000)
        else:
            st.write("Check boxes in the table above to see reviews for products.")