Spaces:
Running
Running
Ilyas KHIAT
commited on
Commit
·
7b897df
1
Parent(s):
c80303c
chatbot
Browse files- audit_page/knowledge_graph.py +97 -8
- utils/audit/response_llm.py +3 -3
- utils/kg/construct_kg.py +1 -1
audit_page/knowledge_graph.py
CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
|
|
2 |
from utils.kg.construct_kg import get_graph
|
3 |
from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
|
4 |
from streamlit_agraph import agraph, Node, Edge, Config
|
|
|
|
|
|
|
5 |
|
6 |
def if_node_exists(nodes, node_id):
|
7 |
"""
|
@@ -19,7 +22,51 @@ def if_node_exists(nodes, node_id):
|
|
19 |
return True
|
20 |
return False
|
21 |
|
22 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
"""
|
24 |
Converts a Neo4j graph into an Agraph format.
|
25 |
|
@@ -39,11 +86,12 @@ def convert_neo4j_to_agraph(neo4j_graph):
|
|
39 |
# Use the node id as the Agraph node id
|
40 |
node_id = node.id.replace(" ", "_") # Replace spaces with underscores for ids
|
41 |
label = node.id
|
|
|
42 |
size = 25 # Default size, can be customized
|
43 |
shape = "circle" # Default shape, can be customized
|
44 |
|
45 |
# For example purposes, no images are added, but you can set 'image' if needed.
|
46 |
-
nodes.append(Node(id=node_id, label=label, size=size, shape=shape))
|
47 |
|
48 |
# Creating Agraph edges
|
49 |
for relationship in neo4j_graph.relationships:
|
@@ -51,18 +99,20 @@ def convert_neo4j_to_agraph(neo4j_graph):
|
|
51 |
shape = "circle" # Default shape, can be customized
|
52 |
|
53 |
source = relationship.source
|
|
|
54 |
source_id = source.id.replace(" ", "_")
|
55 |
label_source = source.id
|
56 |
|
57 |
-
source_node = Node(id=source_id, label=label_source, size=size, shape=shape)
|
58 |
if not if_node_exists(nodes, source_node.id):
|
59 |
nodes.append(source_node)
|
60 |
|
61 |
target = relationship.target
|
|
|
62 |
target_id = target.id.replace(" ", "_")
|
63 |
label_target = target.id
|
64 |
|
65 |
-
target_node = Node(id=target_id, label=label_target, size=size, shape=shape)
|
66 |
if not if_node_exists(nodes, target_node.id):
|
67 |
nodes.append(target_node)
|
68 |
|
@@ -74,9 +124,19 @@ def convert_neo4j_to_agraph(neo4j_graph):
|
|
74 |
config = Config(width=1200, height=950, directed=True, physics=False, hierarchical=False, nodeSpacing=500)
|
75 |
|
76 |
# Create the Agraph visualization
|
77 |
-
return_value = agraph(nodes=nodes, edges=edges, config=config)
|
78 |
|
79 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
def kg_main():
|
@@ -85,6 +145,13 @@ def kg_main():
|
|
85 |
st.session_state.graph = None
|
86 |
st.title("Graphe de connaissance")
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
if "audit" not in st.session_state or st.session_state.audit == {}:
|
89 |
st.error("Veuillez d'abord effectuer un audit pour obtenir des recommandations d'agents.")
|
90 |
return
|
@@ -96,18 +163,40 @@ def kg_main():
|
|
96 |
text = get_text_from_content_for_doc(content)
|
97 |
elif audit["type de fichier"] == "audio":
|
98 |
text = get_text_from_content_for_audio(content)
|
|
|
|
|
99 |
|
100 |
if st.button("Générer le graphe"):
|
|
|
|
|
|
|
|
|
101 |
with st.spinner("Génération du graphe..."):
|
102 |
graph = get_graph(text)
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
104 |
else:
|
105 |
graph = st.session_state.graph
|
|
|
106 |
if graph is not None:
|
107 |
#st.write(graph)
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
|
111 |
|
112 |
|
|
|
|
|
113 |
kg_main()
|
|
|
2 |
from utils.kg.construct_kg import get_graph
|
3 |
from utils.audit.rag import get_text_from_content_for_doc,get_text_from_content_for_audio
|
4 |
from streamlit_agraph import agraph, Node, Edge, Config
|
5 |
+
import random
|
6 |
+
import math
|
7 |
+
from utils.audit.response_llm import generate_response_openai
|
8 |
|
9 |
def if_node_exists(nodes, node_id):
|
10 |
"""
|
|
|
22 |
return True
|
23 |
return False
|
24 |
|
25 |
+
def generate_random_color():
|
26 |
+
r = random.randint(180, 255)
|
27 |
+
g = random.randint(180, 255)
|
28 |
+
b = random.randint(180, 255)
|
29 |
+
return (r, g, b)
|
30 |
+
|
31 |
+
def rgb_to_hex(rgb):
|
32 |
+
return '#{:02x}{:02x}{:02x}'.format(rgb[0], rgb[1], rgb[2])
|
33 |
+
|
34 |
+
def get_node_types(graph):
|
35 |
+
node_types = set()
|
36 |
+
for node in graph.nodes:
|
37 |
+
node_types.add(node.type)
|
38 |
+
for relationship in graph.relationships:
|
39 |
+
source = relationship.source
|
40 |
+
target = relationship.target
|
41 |
+
node_types.add(source.type)
|
42 |
+
node_types.add(target.type)
|
43 |
+
return node_types
|
44 |
+
|
45 |
+
def color_distance(color1, color2):
|
46 |
+
# Calculate Euclidean distance between two RGB colors
|
47 |
+
return math.sqrt((color1[0] - color2[0]) ** 2 + (color1[1] - color2[1]) ** 2 + (color1[2] - color2[2]) ** 2)
|
48 |
+
|
49 |
+
def generate_distinct_colors(num_colors, min_distance=30):
|
50 |
+
colors = []
|
51 |
+
while len(colors) < num_colors:
|
52 |
+
new_color = generate_random_color()
|
53 |
+
if all(color_distance(new_color, existing_color) >= min_distance for existing_color in colors):
|
54 |
+
colors.append(new_color)
|
55 |
+
return [rgb_to_hex(color) for color in colors]
|
56 |
+
|
57 |
+
def list_to_dict_colors(node_types:set):
|
58 |
+
|
59 |
+
number_of_colors = len(node_types)
|
60 |
+
colors = generate_distinct_colors(number_of_colors)
|
61 |
+
|
62 |
+
node_colors = {}
|
63 |
+
for i, node_type in enumerate(node_types):
|
64 |
+
node_colors[node_type] = colors[i]
|
65 |
+
|
66 |
+
return node_colors
|
67 |
+
|
68 |
+
|
69 |
+
def convert_neo4j_to_agraph(neo4j_graph, node_colors):
|
70 |
"""
|
71 |
Converts a Neo4j graph into an Agraph format.
|
72 |
|
|
|
86 |
# Use the node id as the Agraph node id
|
87 |
node_id = node.id.replace(" ", "_") # Replace spaces with underscores for ids
|
88 |
label = node.id
|
89 |
+
type = node.type
|
90 |
size = 25 # Default size, can be customized
|
91 |
shape = "circle" # Default shape, can be customized
|
92 |
|
93 |
# For example purposes, no images are added, but you can set 'image' if needed.
|
94 |
+
nodes.append(Node(id=node_id,title=type, label=label, size=size, shape=shape,color=node_colors[type]))
|
95 |
|
96 |
# Creating Agraph edges
|
97 |
for relationship in neo4j_graph.relationships:
|
|
|
99 |
shape = "circle" # Default shape, can be customized
|
100 |
|
101 |
source = relationship.source
|
102 |
+
source_type = source.type
|
103 |
source_id = source.id.replace(" ", "_")
|
104 |
label_source = source.id
|
105 |
|
106 |
+
source_node = Node(id=source_id,title=source_type, label=label_source, size=size, shape=shape,color=node_colors[source_type])
|
107 |
if not if_node_exists(nodes, source_node.id):
|
108 |
nodes.append(source_node)
|
109 |
|
110 |
target = relationship.target
|
111 |
+
target_type = target.type
|
112 |
target_id = target.id.replace(" ", "_")
|
113 |
label_target = target.id
|
114 |
|
115 |
+
target_node = Node(id=target_id,title=target_type, label=label_target, size=size, shape=shape,color=node_colors[target_type])
|
116 |
if not if_node_exists(nodes, target_node.id):
|
117 |
nodes.append(target_node)
|
118 |
|
|
|
124 |
config = Config(width=1200, height=950, directed=True, physics=False, hierarchical=False, nodeSpacing=500)
|
125 |
|
126 |
# Create the Agraph visualization
|
|
|
127 |
|
128 |
+
return edges, nodes, config
|
129 |
+
|
130 |
+
def display_graph(edges, nodes, config):
|
131 |
+
# Display the Agraph visualization
|
132 |
+
agraph(edges=edges, nodes=nodes, config=config)
|
133 |
+
|
134 |
+
def filter_nodes_by_types(nodes:list[Node], node_types_filter:list) -> list[Node]:
|
135 |
+
filtered_nodes = []
|
136 |
+
for node in nodes:
|
137 |
+
if node.title in node_types_filter: #the title represents the type of the node
|
138 |
+
filtered_nodes.append(node)
|
139 |
+
return filtered_nodes
|
140 |
|
141 |
|
142 |
def kg_main():
|
|
|
145 |
st.session_state.graph = None
|
146 |
st.title("Graphe de connaissance")
|
147 |
|
148 |
+
if "node_types" not in st.session_state:
|
149 |
+
st.session_state.node_types = None
|
150 |
+
|
151 |
+
if "summary" not in st.session_state:
|
152 |
+
st.session_state.summary = None
|
153 |
+
|
154 |
+
|
155 |
if "audit" not in st.session_state or st.session_state.audit == {}:
|
156 |
st.error("Veuillez d'abord effectuer un audit pour obtenir des recommandations d'agents.")
|
157 |
return
|
|
|
163 |
text = get_text_from_content_for_doc(content)
|
164 |
elif audit["type de fichier"] == "audio":
|
165 |
text = get_text_from_content_for_audio(content)
|
166 |
+
|
167 |
+
#summary_prompt = f"Voici un ensemble de documents : {text}. À partir de ces documents, veuillez fournir des résumés concis en vous concentrant sur l'extraction des relations essentielles et des événements. Il est crucial d'inclure les dates des actions ou des événements, car elles seront utilisées pour l'analyse chronologique. Par exemple : 'Sam a été licencié par le conseil d'administration d'OpenAI le 17 novembre 2023 (17 novembre, vendredi)', ce qui illustre la relation entre Sam et OpenAI ainsi que la date de l'événement."
|
168 |
|
169 |
if st.button("Générer le graphe"):
|
170 |
+
# with st.spinner("Extractions des relations..."):
|
171 |
+
# sum = generate_response_openai(summary_prompt,model="gpt-4o")
|
172 |
+
# st.session_state.summary = sum
|
173 |
+
|
174 |
with st.spinner("Génération du graphe..."):
|
175 |
graph = get_graph(text)
|
176 |
+
st.session_state.graph = graph
|
177 |
+
|
178 |
+
node_types = get_node_types(graph[0])
|
179 |
+
nodes_type_dict = list_to_dict_colors(node_types)
|
180 |
+
st.session_state.node_types = nodes_type_dict
|
181 |
+
|
182 |
else:
|
183 |
graph = st.session_state.graph
|
184 |
+
|
185 |
if graph is not None:
|
186 |
#st.write(graph)
|
187 |
+
|
188 |
+
edges,nodes,config = convert_neo4j_to_agraph(graph[0],st.session_state.node_types)
|
189 |
+
filter = st.multiselect("Filtrer selon l'étiquette",st.session_state.node_types.keys(),placeholder="Sélectionner une ou plusieurs étiquettes")
|
190 |
+
|
191 |
+
if filter:
|
192 |
+
nodes = filter_nodes_by_types(nodes,filter)
|
193 |
+
display_graph(edges,nodes,config)
|
194 |
+
|
195 |
+
node_types = st.session_state.node_types
|
196 |
|
197 |
|
198 |
|
199 |
|
200 |
+
|
201 |
+
|
202 |
kg_main()
|
utils/audit/response_llm.py
CHANGED
@@ -3,10 +3,10 @@ from langchain_openai import ChatOpenAI
|
|
3 |
from langchain_core.output_parsers import StrOutputParser
|
4 |
from langchain_core.prompts import PromptTemplate
|
5 |
|
6 |
-
def generate_response_openai(prompt: str,stream:bool = False) -> str:
|
7 |
client = OpenAI()
|
8 |
response = client.chat.completions.create(
|
9 |
-
model=
|
10 |
messages=[
|
11 |
{"role": "user", "content": prompt}
|
12 |
],
|
@@ -18,7 +18,7 @@ def generate_response_openai(prompt: str,stream:bool = False) -> str:
|
|
18 |
|
19 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini") :
|
20 |
# Define the prompt template
|
21 |
-
template = "
|
22 |
prompt = PromptTemplate.from_template(template)
|
23 |
|
24 |
# Initialize the OpenAI LLM with the specified model
|
|
|
3 |
from langchain_core.output_parsers import StrOutputParser
|
4 |
from langchain_core.prompts import PromptTemplate
|
5 |
|
6 |
+
def generate_response_openai(prompt: str,stream:bool = False,model = "gpt-4o-mini") -> str:
|
7 |
client = OpenAI()
|
8 |
response = client.chat.completions.create(
|
9 |
+
model=model,
|
10 |
messages=[
|
11 |
{"role": "user", "content": prompt}
|
12 |
],
|
|
|
18 |
|
19 |
def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini") :
|
20 |
# Define the prompt template
|
21 |
+
template = "{query}"
|
22 |
prompt = PromptTemplate.from_template(template)
|
23 |
|
24 |
# Initialize the OpenAI LLM with the specified model
|
utils/kg/construct_kg.py
CHANGED
@@ -5,13 +5,13 @@ from langchain_core.documents import Document
|
|
5 |
|
6 |
def get_graph(text):
|
7 |
|
8 |
-
graph = Neo4jGraph()
|
9 |
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
|
10 |
|
11 |
llm_transformer = LLMGraphTransformer(llm=llm)
|
12 |
documents = [Document(page_content=text)]
|
13 |
|
14 |
graph_documents = llm_transformer.convert_to_graph_documents(documents)
|
|
|
15 |
return graph_documents
|
16 |
|
17 |
|
|
|
5 |
|
6 |
def get_graph(text):
|
7 |
|
|
|
8 |
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
|
9 |
|
10 |
llm_transformer = LLMGraphTransformer(llm=llm)
|
11 |
documents = [Document(page_content=text)]
|
12 |
|
13 |
graph_documents = llm_transformer.convert_to_graph_documents(documents)
|
14 |
+
|
15 |
return graph_documents
|
16 |
|
17 |
|