File size: 1,222 Bytes
db9ca60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import plotly.express as px

def visualize_gmm(sampled_df, iteration):
    fig = px.scatter(
        sampled_df,
        x="x",
        y="y",
        color="cluster",
        hover_data=["title", "keywords", "rating_avg", "confidence_avg", "author", "site"],
        title=f"高斯混合分布聚类(迭代 {iteration})",
    )

    # 添加聚类中心点
    for cluster in sampled_df["cluster"].unique():
        centroid_x = sampled_df[sampled_df["cluster"] == cluster]["centroid_x"].iloc[0]
        centroid_y = sampled_df[sampled_df["cluster"] == cluster]["centroid_y"].iloc[0]
        fig.add_scatter(
            x=[centroid_x],
            y=[centroid_y],
            mode="markers",
            marker=dict(size=15, color="black", symbol="x"),
            name=f"Cluster {cluster} Center",
        )

    return fig

def visualize_ratings(sampled_df):
    fig = px.bar(
        sampled_df,
        x="title",
        y="rating_avg",
        color="cluster",
        title="论文评分分布",
        hover_data=["keywords", "confidence_avg", "author"],
    )

    fig.update_layout(
        xaxis_title="论文标题",
        yaxis_title="平均评分",
        xaxis_tickangle=-45,
    )

    return fig