Ekjaer commited on
Commit
750ff86
1 Parent(s): 9345112

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +42 -0
README.md CHANGED
@@ -45,7 +45,45 @@ Use the code below to get started with the model.
45
  import xgboost as xgb
46
  import pandas as pd
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  N_CPU = 8 # Number of CPUs used
 
49
 
50
  # Load model
51
  bst = xgb.Booster({'nthread': N_CPU})
@@ -60,6 +98,10 @@ labels = pd.read_csv("labels.csv", index_col=0)
60
 
61
  # Do inference
62
  pred = bst.predict(data_xgb)
 
 
 
 
63
  ```
64
 
65
 
 
45
  import xgboost as xgb
46
  import pandas as pd
47
 
48
+ def show_best(pred: np.ndarray,
49
+ best_list: np.ndarray,
50
+ df_stru_catalog: pd.DataFrame,
51
+ num_show: int) -> None:
52
+ """
53
+ Display the best predictions based on the model output.
54
+
55
+ Parameters
56
+ ----------
57
+ pred : np.ndarray
58
+ Predictions from the model.
59
+ best_list : np.ndarray
60
+ List of best predictions.
61
+ df_stru_catalog : pd.DataFrame
62
+ The structure catalog associated with the model.
63
+ num_show : int
64
+ Number of top predictions to show.
65
+
66
+ Returns
67
+ -------
68
+ None
69
+ """
70
+ for count, idx in enumerate(reversed(best_list[-num_show:])):
71
+ print(f"\n{count}) Probability: {pred[idx]*100:3.1f}%")
72
+
73
+ compo = clean_string(df_stru_catalog.iloc[idx]["composition"])
74
+ sgs = clean_string(df_stru_catalog.iloc[idx]["space_group_symmetry"])
75
+
76
+ print(f' COD-IDs: {df_stru_catalog.iloc[idx]["Label"].rsplit(".",1)[0]}, composition: {compo[0]}, space group: {sgs[0]}')
77
+ if not pd.isna(df_stru_catalog.at[idx, "Similar"]):
78
+ similar_files = extract_filenames(df_stru_catalog.at[idx, "Similar"])
79
+ compo = clean_string(df_stru_catalog.iloc[idx]["composition"])
80
+ sgs = clean_string(df_stru_catalog.iloc[idx]["space_group_symmetry"])
81
+ for jdx in range(len(similar_files)):
82
+ print(f' COD-IDs: {similar_files[jdx]}, composition: {compo[jdx]}, space group: {sgs[jdx]}')
83
+
84
+
85
  N_CPU = 8 # Number of CPUs used
86
+ NUM_SHOW = 5 # Show to X best predictions
87
 
88
  # Load model
89
  bst = xgb.Booster({'nthread': N_CPU})
 
98
 
99
  # Do inference
100
  pred = bst.predict(data_xgb)
101
+
102
+ # Show
103
+ best_list = np.argsort(pred)
104
+ show_best(pred[0], best_list[0], df_stru_catalog, NUM_SHOW)
105
  ```
106
 
107