2catycm commited on
Commit
1d0271a
·
1 Parent(s): 9c580ee

feat: init

Browse files
Files changed (2) hide show
  1. app.py +83 -0
  2. gmm_point_tracking_with_centroids.csv +0 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import time
6
+
7
+ # 读取数据
8
+ df = pd.read_csv("gmm_point_tracking_with_centroids.csv")
9
+
10
+ # Streamlit 应用
11
+ st.title("高斯混合分布聚类可视化")
12
+
13
+ # 使用 sidebar 控制参数
14
+ with st.sidebar:
15
+ st.header("控制面板")
16
+ iteration = st.slider("选择迭代次数", min_value=1, max_value=10, value=1, step=1)
17
+ max_samples = len(df)
18
+ num_samples = st.slider("选择采样论文数量", min_value=1, max_value=min(100, max_samples), value=min(10, max_samples), step=1)
19
+ autoplay = st.checkbox("自动播放", value=False)
20
+ if autoplay:
21
+ for i in range(1, 11):
22
+ iteration = i
23
+ st.session_state.iteration = i
24
+ time.sleep(1)
25
+ st.experimental_rerun()
26
+
27
+ # 主页面布局
28
+ st.header("高斯混合分布聚类结果")
29
+
30
+ # 随机采样论文
31
+ sampled_df = df.sample(n=num_samples, random_state=iteration)
32
+
33
+ # 用 Plotly 可视化
34
+ fig = px.scatter(
35
+ sampled_df,
36
+ x="x",
37
+ y="y",
38
+ color="cluster",
39
+ hover_data=["title", "keywords", "rating_avg", "confidence_avg", "author", "site"],
40
+ title=f"高斯混合分布聚类(迭代 {iteration})",
41
+ )
42
+
43
+ # 添加聚类中心点
44
+ for cluster in sampled_df["cluster"].unique():
45
+ centroid_x = sampled_df[sampled_df["cluster"] == cluster]["centroid_x"].iloc[0]
46
+ centroid_y = sampled_df[sampled_df["cluster"] == cluster]["centroid_y"].iloc[0]
47
+ fig.add_scatter(
48
+ x=[centroid_x],
49
+ y=[centroid_y],
50
+ mode="markers",
51
+ marker=dict(size=15, color="black", symbol="x"),
52
+ name=f"Cluster {cluster} Center",
53
+ )
54
+
55
+ # 让图占比更大
56
+ st.plotly_chart(fig, use_container_width=True)
57
+
58
+ # 显示采样论文的详细信息
59
+ st.subheader("采样论文详细信息")
60
+ st.dataframe(sampled_df[["title", "keywords", "rating_avg", "confidence_avg", "site"]])
61
+
62
+ # 增加第二种可视化方式
63
+ st.header("论文评分分布")
64
+
65
+ # 创建柱状图
66
+ fig_bar = px.bar(
67
+ sampled_df,
68
+ x="title",
69
+ y="rating_avg",
70
+ color="cluster",
71
+ title="论文评分分布",
72
+ hover_data=["keywords", "confidence_avg", "author"],
73
+ )
74
+
75
+ # 调整布局
76
+ fig_bar.update_layout(
77
+ xaxis_title="论文标题",
78
+ yaxis_title="平均评分",
79
+ xaxis_tickangle=-45,
80
+ )
81
+
82
+ # 显示柱状图
83
+ st.plotly_chart(fig_bar, use_container_width=True)
gmm_point_tracking_with_centroids.csv ADDED
The diff for this file is too large to render. See raw diff