Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

llm-arch / pages /020_Data_Browser.py

alfraser

Added page level comments to describe the purpose of each page

4f07f72 about 1 year ago

raw

history blame

6.24 kB

	"""
	This page allows users to browse the test data set. Mainly this is just to get a sense of the size,
	content and composition of the dataset behind the project.
	"""

	import streamlit as st
	import pandas as pd
	import plotly.express as px

	from src.st_helpers import st_setup
	from src.datatypes import *


	def show_db_selector_and_summary_in_container(container) -> None:
	with container:
	dbs = DataLoader.available_dbs()
	if len(dbs) > 1:
	st.write(f':1234: Database Selector')
	idx = dbs.index(DataLoader.active_db)
	DataLoader.set_db_name(st.selectbox("Connected to:", dbs, index=idx, label_visibility="collapsed"))

	st.write(f':1234: Summary Statistics')
	summary = f'- {len(Category.all):,} categories'
	summary += f'\n- {len(Product.all):,} products'
	summary += f'\n- {len(Feature.all):,} features'
	summary += f'\n- {len(Review.all):,} reviews'
	st.markdown(summary)
	if st.button('Force data reload'):
	DataLoader.load_data(True)
	st.rerun()


	def show_data_summary_charts_in_container(container) -> None:
	with container:
	cats = Category.all_sorted()

	with st.expander("Review Counts"):
	category_names = [c.name for c in cats]
	category_review_counts = [sum([p.review_count for p in c.products]) for c in cats]
	data = zip(category_names, category_review_counts)
	df = pd.DataFrame(data, columns=["Category", "Review Count"])
	st.bar_chart(df, x="Category", y="Review Count")

	with st.expander("Product Ratings"):
	data = []
	for c in cats:
	for p in c.products:
	data.append([c.name, p.average_rating])
	df = pd.DataFrame(data, columns=['Category', 'Mean Product Rating'])
	fig = px.box(df, x="Category", y="Mean Product Rating")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)

	with st.expander("Product Prices"):
	data = []
	for c in cats:
	for p in c.products:
	data.append([c.name, p.price])
	df = pd.DataFrame(data, columns=['Category', 'Price'])
	fig = px.box(df, x="Category", y="Price")
	fig.update_xaxes(tickangle=-90)
	st.plotly_chart(fig, use_container_width=True)


	def show_top_section() -> None:
	"""
	Writes the top section to the streamlit page, showing the currently selected database
	and some associated summary numbers and charts
	"""
	top_section = st.container()
	with top_section:
	summary_left, summary_right = st.columns([1, 3])
	show_db_selector_and_summary_in_container(summary_left)
	show_data_summary_charts_in_container(summary_right)


	def get_user_selected_category(container) -> Category:
	"""
	Show a selector to pick a category and return the selected category
	"""
	with container:
	st.write('Category')
	cats = Category.all_sorted()
	options = [f"{c.name}" for c in cats]
	selection = st.radio("Category", options, label_visibility="collapsed")
	return Category.by_name(selection)


	def show_category_datatable_in_container(category, container) -> None:
	"""
	Displays a category of products into a given container. Shows the products themselvses
	as a table and then if any products are ticked to show the reviews it displays the associated reviews
	also.
	"""
	with container:
	features = [f.name for f in category.features]
	features.sort()
	st.write(f"{category.singular_name} Features ({len(features)}):")
	st.write('; '.join(features))

	prod_index = [p.id for p in category.products]
	prod_data = [[p.name, p.price, p.feature_count, ', '.join([str(f) for f in p.features]), p.review_count,
	p.average_rating, False, p.description] for p in category.products]
	prod_columns = ['Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Show Reviews?',
	'Description']
	prod_df = pd.DataFrame(prod_data, index=prod_index, columns=prod_columns)
	total_reviews = sum([p.review_count for p in category.products])
	st.write(f"{category.name} ({len(prod_index)}). Having {total_reviews} reviews in total:")
	edited_df = st.data_editor(prod_df, disabled=(
	'Name', 'Price', 'Feature Count', 'Features', 'Review Count', 'Average Rating', 'Description'))

	selected_product_count = edited_df['Show Reviews?'].sum()
	selected_review_count = edited_df[edited_df['Show Reviews?']]['Review Count'].sum()

	st.write(f"{category.singular_name} Reviews ({selected_review_count} from {selected_product_count} products):")
	if selected_review_count > 0:
	selected_products = list(edited_df[edited_df['Show Reviews?']].index)
	products = Product.for_ids(selected_products)
	rev_data = []
	rev_index = []
	for p in products:
	for r in p.reviews:
	rev_index.append(r.id)
	rev_data.append([p.name, r.rating, r.review_text])
	rev_columns = ['Product', 'Review Rating', 'Review Text']
	rev_df = pd.DataFrame(rev_data, index=rev_index, columns=rev_columns)
	st.dataframe(rev_df, width=10000)
	else:
	st.write("Check boxes in the table above to see reviews for products.")


	def show_bottom_section() -> None:
	# Set up space
	selected_category_sub_heading = st.container()
	category_col, datatable_col = st.columns([1, 3])

	# Display into containers
	selected_category = get_user_selected_category(category_col)
	with selected_category_sub_heading:
	st.write(f'### {selected_category.name}')
	show_category_datatable_in_container(selected_category, datatable_col)


	if st_setup('LLM Arch'):
	if not DataLoader.loaded:
	DataLoader.load_data()
	st.write("# Data Browser")
	show_top_section()
	show_bottom_section()