heaversm commited on
Commit
04e99d1
·
1 Parent(s): 91b8fbc

modify app to output contributors, as well as provide a dropdown list of repos, and ability to list multiple files

Browse files
Files changed (2) hide show
  1. app.py +71 -21
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import streamlit as st
2
  import os
3
  from dotenv import load_dotenv
4
- from langchain.document_loaders import GithubFileLoader
 
5
  # from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
  from langchain_text_splitters import CharacterTextSplitter
 
 
9
 
10
  load_dotenv()
11
 
@@ -13,6 +16,10 @@ load_dotenv()
13
  GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
14
  GITHUB_BASE_URL = "https://github.com/"
15
 
 
 
 
 
16
 
17
  @st.cache_resource
18
  def get_hugging_face_model():
@@ -24,30 +31,65 @@ def get_similar_files(query, db, embeddings):
24
  docs_and_scores = db.similarity_search_with_score(query)
25
  return docs_and_scores
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # STREAMLIT INTERFACE
28
  st.title("Find Similar Code")
29
 
30
  st.markdown("This app takes a code sample you provide, and finds similar code in a Github repository.")
31
  st.markdown("This functionality could ideally be implemented across multiple repos to allow you to find helpful examples of how to implement the code you are working on writing, or identify other code contributors who could help you resolve your issues")
32
 
33
- USER = st.text_input("Enter the Github User", value = "heaversm")
34
- REPO = st.text_input("Enter the Github Repository", value = "gdrive-docker")
35
- FILE_TYPES_TO_LOAD = st.multiselect("Select File Types", [".py", ".ts",".js",".css",".html"], default = [".py"])
 
 
 
 
 
 
 
 
 
36
 
37
  text_input = st.text_area("Enter a Code Example", value =
38
  """
39
- def create_app():
40
- app = connexion.FlaskApp(__name__, specification_dir="../.openapi")
41
- app.add_api(
42
- API_VERSION, resolver=connexion.resolver.RelativeResolver("provider.app")
43
- )
44
  """, height = 330
45
  )
46
 
47
- button = st.button("Find Similar Code")
48
 
49
-
50
- if button:
51
  loader = GithubFileLoader(
52
  repo=f"{USER}/{REPO}",
53
  access_token=GITHUB_ACCESS_TOKEN,
@@ -63,16 +105,24 @@ if button:
63
  db = FAISS.from_documents(docs, embedding_vector)
64
  query = text_input
65
  results_with_scores = get_similar_files(query, db, embedding_vector)
 
66
  for doc, score in results_with_scores:
67
- print(f"Path: {doc.metadata['path']}, Score: {score}")
68
-
69
- top_file_path = results_with_scores[0][0].metadata['path']
70
- top_file_content = results_with_scores[0][0].page_content
71
- top_file_score = results_with_scores[0][1]
72
- top_file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{top_file_path}"
73
- # write a clickable link in streamlit
74
- st.markdown(f"[Top file link]({top_file_link})")
75
 
 
 
 
 
 
 
 
 
 
76
 
77
  else:
78
- st.info("Please Submit a Code Sample to Find Similar Code")
 
 
 
 
1
  import streamlit as st
2
  import os
3
  from dotenv import load_dotenv
4
+ # from langchain.document_loaders import GithubFileLoader
5
+ from langchain_community.document_loaders import GithubFileLoader
6
  # from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
  from langchain_text_splitters import CharacterTextSplitter
10
+ from github import Github
11
+ from github import Auth
12
 
13
  load_dotenv()
14
 
 
16
  GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")
17
  GITHUB_BASE_URL = "https://github.com/"
18
 
19
+ # initialize Github
20
+ auth = Auth.Token(GITHUB_ACCESS_TOKEN)
21
+ g = Github(auth=auth)
22
+
23
 
24
  @st.cache_resource
25
  def get_hugging_face_model():
 
31
  docs_and_scores = db.similarity_search_with_score(query)
32
  return docs_and_scores
33
 
34
+ def fetch_repos(username):
35
+ print(f"Fetching repositories for user: {username}")
36
+ try:
37
+ user = g.get_user(username)
38
+ print(f"User: {user}")
39
+ return [repo.name for repo in user.get_repos()]
40
+ except Exception as e:
41
+ st.error(f"Error fetching repositories: {e}")
42
+ return []
43
+
44
+ def get_file_contributors(repo_name, file_path):
45
+ try:
46
+ repo = g.get_repo(f"{USER}/{repo_name}")
47
+ commits = repo.get_commits(path=file_path)
48
+ contributors = {}
49
+ for commit in commits:
50
+ author = commit.author.login if commit.author else "Unknown"
51
+ if author in contributors:
52
+ contributors[author] += 1
53
+ else:
54
+ contributors[author] = 1
55
+ return contributors
56
+ except Exception as e:
57
+ st.error(f"Error fetching contributors: {e}")
58
+ return {}
59
+
60
+ # Initialize session state for repositories
61
+ if "repos" not in st.session_state:
62
+ st.session_state.repos = []
63
+
64
  # STREAMLIT INTERFACE
65
  st.title("Find Similar Code")
66
 
67
  st.markdown("This app takes a code sample you provide, and finds similar code in a Github repository.")
68
  st.markdown("This functionality could ideally be implemented across multiple repos to allow you to find helpful examples of how to implement the code you are working on writing, or identify other code contributors who could help you resolve your issues")
69
 
70
+ USER = st.text_input("Enter the Github User", value = "Satttoshi")
71
+
72
+ fetch_repos_button = st.button("Fetch Repositories")
73
+
74
+ if fetch_repos_button:
75
+ st.session_state.repos = fetch_repos(USER)
76
+
77
+
78
+ REPO = st.selectbox("Select a Github Repository", options=st.session_state.repos)
79
+
80
+
81
+ FILE_TYPES_TO_LOAD = st.multiselect("Select File Types", [".py", ".ts",".js",".css",".html"], default = [".ts"])
82
 
83
  text_input = st.text_area("Enter a Code Example", value =
84
  """
85
+
 
 
 
 
86
  """, height = 330
87
  )
88
 
89
+ find_similar_code_button = st.button("Find Similar Code")
90
 
91
+ if find_similar_code_button:
92
+ print(f"Searching for similar code in {USER}/{REPO}")
93
  loader = GithubFileLoader(
94
  repo=f"{USER}/{REPO}",
95
  access_token=GITHUB_ACCESS_TOKEN,
 
105
  db = FAISS.from_documents(docs, embedding_vector)
106
  query = text_input
107
  results_with_scores = get_similar_files(query, db, embedding_vector)
108
+ results_with_scores = results_with_scores[:5] #limit to 5 results
109
  for doc, score in results_with_scores:
110
+ #print all metadata info in the doc.metadata dictionary
111
+ # for key, value in doc.metadata.items():
112
+ # print(f"{key}: {value}")
 
 
 
 
 
113
 
114
+ path = doc.metadata['path']
115
+ content = doc.page_content
116
+ score = round(float(score), 2)
117
+ contributors = get_file_contributors(REPO, path)
118
+ print(f"Path: {doc.metadata['path']}, Score: {score}, Contributors: {contributors}")
119
+ file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{path}"
120
+ st.markdown(f"[{path}]({file_link})")
121
+ for contributor, count in contributors.items():
122
+ st.write(f"* Contributor: [{contributor}](https://github.com/{contributor}), Commits: {count}")
123
 
124
  else:
125
+ st.info("Please Submit a Code Sample to Find Similar Code")
126
+
127
+ #https://github.com/heaversm/gdrive-docker/blob/main/gdrive/provider/__init__.py
128
+ #https://github.com/heaversm/gdrive-docker/blob/main/gdrive/provider/__init__.py
requirements.txt CHANGED
@@ -7,3 +7,4 @@ langchain_text_splitters
7
  sentence-transformers
8
  faiss-cpu
9
  altair==4.0
 
 
7
  sentence-transformers
8
  faiss-cpu
9
  altair==4.0
10
+ PyGithub