Spaces:
Runtime error
Runtime error
modify app to output contributors, as well as provide a dropdown list of repos, and ability to list multiple files
Browse files- app.py +71 -21
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
-
from langchain.document_loaders import GithubFileLoader
|
|
|
5 |
# from langchain.embeddings import HuggingFaceEmbeddings
|
6 |
from langchain_huggingface import HuggingFaceEmbeddings
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
from langchain_text_splitters import CharacterTextSplitter
|
|
|
|
|
9 |
|
10 |
load_dotenv()
|
11 |
|
@@ -13,6 +16,10 @@ load_dotenv()
|
|
13 |
GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
|
14 |
GITHUB_BASE_URL = "https://github.com/"
|
15 |
|
|
|
|
|
|
|
|
|
16 |
|
17 |
@st.cache_resource
|
18 |
def get_hugging_face_model():
|
@@ -24,30 +31,65 @@ def get_similar_files(query, db, embeddings):
|
|
24 |
docs_and_scores = db.similarity_search_with_score(query)
|
25 |
return docs_and_scores
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# STREAMLIT INTERFACE
|
28 |
st.title("Find Similar Code")
|
29 |
|
30 |
st.markdown("This app takes a code sample you provide, and finds similar code in a Github repository.")
|
31 |
st.markdown("This functionality could ideally be implemented across multiple repos to allow you to find helpful examples of how to implement the code you are working on writing, or identify other code contributors who could help you resolve your issues")
|
32 |
|
33 |
-
USER = st.text_input("Enter the Github User", value = "
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
text_input = st.text_area("Enter a Code Example", value =
|
38 |
"""
|
39 |
-
|
40 |
-
app = connexion.FlaskApp(__name__, specification_dir="../.openapi")
|
41 |
-
app.add_api(
|
42 |
-
API_VERSION, resolver=connexion.resolver.RelativeResolver("provider.app")
|
43 |
-
)
|
44 |
""", height = 330
|
45 |
)
|
46 |
|
47 |
-
|
48 |
|
49 |
-
|
50 |
-
|
51 |
loader = GithubFileLoader(
|
52 |
repo=f"{USER}/{REPO}",
|
53 |
access_token=GITHUB_ACCESS_TOKEN,
|
@@ -63,16 +105,24 @@ if button:
|
|
63 |
db = FAISS.from_documents(docs, embedding_vector)
|
64 |
query = text_input
|
65 |
results_with_scores = get_similar_files(query, db, embedding_vector)
|
|
|
66 |
for doc, score in results_with_scores:
|
67 |
-
print
|
68 |
-
|
69 |
-
|
70 |
-
top_file_content = results_with_scores[0][0].page_content
|
71 |
-
top_file_score = results_with_scores[0][1]
|
72 |
-
top_file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{top_file_path}"
|
73 |
-
# write a clickable link in streamlit
|
74 |
-
st.markdown(f"[Top file link]({top_file_link})")
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
else:
|
78 |
-
st.info("Please Submit a Code Sample to Find Similar Code")
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
from dotenv import load_dotenv
|
4 |
+
# from langchain.document_loaders import GithubFileLoader
|
5 |
+
from langchain_community.document_loaders import GithubFileLoader
|
6 |
# from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
from langchain_text_splitters import CharacterTextSplitter
|
10 |
+
from github import Github
|
11 |
+
from github import Auth
|
12 |
|
13 |
load_dotenv()
|
14 |
|
|
|
16 |
GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
|
17 |
GITHUB_BASE_URL = "https://github.com/"
|
18 |
|
19 |
+
# initialize Github
|
20 |
+
auth = Auth.Token(GITHUB_ACCESS_TOKEN)
|
21 |
+
g = Github(auth=auth)
|
22 |
+
|
23 |
|
24 |
@st.cache_resource
|
25 |
def get_hugging_face_model():
|
|
|
31 |
docs_and_scores = db.similarity_search_with_score(query)
|
32 |
return docs_and_scores
|
33 |
|
34 |
+
def fetch_repos(username):
|
35 |
+
print(f"Fetching repositories for user: {username}")
|
36 |
+
try:
|
37 |
+
user = g.get_user(username)
|
38 |
+
print(f"User: {user}")
|
39 |
+
return [repo.name for repo in user.get_repos()]
|
40 |
+
except Exception as e:
|
41 |
+
st.error(f"Error fetching repositories: {e}")
|
42 |
+
return []
|
43 |
+
|
44 |
+
def get_file_contributors(repo_name, file_path):
|
45 |
+
try:
|
46 |
+
repo = g.get_repo(f"{USER}/{repo_name}")
|
47 |
+
commits = repo.get_commits(path=file_path)
|
48 |
+
contributors = {}
|
49 |
+
for commit in commits:
|
50 |
+
author = commit.author.login if commit.author else "Unknown"
|
51 |
+
if author in contributors:
|
52 |
+
contributors[author] += 1
|
53 |
+
else:
|
54 |
+
contributors[author] = 1
|
55 |
+
return contributors
|
56 |
+
except Exception as e:
|
57 |
+
st.error(f"Error fetching contributors: {e}")
|
58 |
+
return {}
|
59 |
+
|
60 |
+
# Initialize session state for repositories
|
61 |
+
if "repos" not in st.session_state:
|
62 |
+
st.session_state.repos = []
|
63 |
+
|
64 |
# STREAMLIT INTERFACE
|
65 |
st.title("Find Similar Code")
|
66 |
|
67 |
st.markdown("This app takes a code sample you provide, and finds similar code in a Github repository.")
|
68 |
st.markdown("This functionality could ideally be implemented across multiple repos to allow you to find helpful examples of how to implement the code you are working on writing, or identify other code contributors who could help you resolve your issues")
|
69 |
|
70 |
+
USER = st.text_input("Enter the Github User", value = "Satttoshi")
|
71 |
+
|
72 |
+
fetch_repos_button = st.button("Fetch Repositories")
|
73 |
+
|
74 |
+
if fetch_repos_button:
|
75 |
+
st.session_state.repos = fetch_repos(USER)
|
76 |
+
|
77 |
+
|
78 |
+
REPO = st.selectbox("Select a Github Repository", options=st.session_state.repos)
|
79 |
+
|
80 |
+
|
81 |
+
FILE_TYPES_TO_LOAD = st.multiselect("Select File Types", [".py", ".ts",".js",".css",".html"], default = [".ts"])
|
82 |
|
83 |
text_input = st.text_area("Enter a Code Example", value =
|
84 |
"""
|
85 |
+
|
|
|
|
|
|
|
|
|
86 |
""", height = 330
|
87 |
)
|
88 |
|
89 |
+
find_similar_code_button = st.button("Find Similar Code")
|
90 |
|
91 |
+
if find_similar_code_button:
|
92 |
+
print(f"Searching for similar code in {USER}/{REPO}")
|
93 |
loader = GithubFileLoader(
|
94 |
repo=f"{USER}/{REPO}",
|
95 |
access_token=GITHUB_ACCESS_TOKEN,
|
|
|
105 |
db = FAISS.from_documents(docs, embedding_vector)
|
106 |
query = text_input
|
107 |
results_with_scores = get_similar_files(query, db, embedding_vector)
|
108 |
+
results_with_scores = results_with_scores[:5] #limit to 5 results
|
109 |
for doc, score in results_with_scores:
|
110 |
+
#print all metadata info in the doc.metadata dictionary
|
111 |
+
# for key, value in doc.metadata.items():
|
112 |
+
# print(f"{key}: {value}")
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
path = doc.metadata['path']
|
115 |
+
content = doc.page_content
|
116 |
+
score = round(float(score), 2)
|
117 |
+
contributors = get_file_contributors(REPO, path)
|
118 |
+
print(f"Path: {doc.metadata['path']}, Score: {score}, Contributors: {contributors}")
|
119 |
+
file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{path}"
|
120 |
+
st.markdown(f"[{path}]({file_link})")
|
121 |
+
for contributor, count in contributors.items():
|
122 |
+
st.write(f"* Contributor: [{contributor}](https://github.com/{contributor}), Commits: {count}")
|
123 |
|
124 |
else:
|
125 |
+
st.info("Please Submit a Code Sample to Find Similar Code")
|
126 |
+
|
127 |
+
#https://github.com/heaversm/gdrive-docker/blob/main/gdrive/provider/__init__.py
|
128 |
+
#https://github.com/heaversm/gdrive-docker/blob/main/gdrive/provider/__init__.py
|
requirements.txt
CHANGED
@@ -7,3 +7,4 @@ langchain_text_splitters
|
|
7 |
sentence-transformers
|
8 |
faiss-cpu
|
9 |
altair==4.0
|
|
|
|
7 |
sentence-transformers
|
8 |
faiss-cpu
|
9 |
altair==4.0
|
10 |
+
PyGithub
|