SushantGautam commited on
Commit
03fbd26
·
1 Parent(s): b75dbd7

Initial project setup with Django, including core models, admin configuration, GraphQL schema, and data population script.

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ **/*.json
2
+ venv
3
+ migrations
4
+ db.sqlite3*
BridgeMentor/__init__.py ADDED
File without changes
BridgeMentor/asgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASGI config for BridgeMentor project.
3
+
4
+ It exposes the ASGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.asgi import get_asgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'BridgeMentor.settings')
15
+
16
+ application = get_asgi_application()
BridgeMentor/settings.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Django settings for BridgeMentor project.
3
+
4
+ Generated by 'django-admin startproject' using Django 4.2.20.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/4.2/topics/settings/
8
+
9
+ For the full list of settings and their values, see
10
+ https://docs.djangoproject.com/en/4.2/ref/settings/
11
+ """
12
+
13
+ from pathlib import Path
14
+
15
+ # Build paths inside the project like this: BASE_DIR / 'subdir'.
16
+ BASE_DIR = Path(__file__).resolve().parent.parent
17
+
18
+
19
+ # Quick-start development settings - unsuitable for production
20
+ # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
21
+
22
+ # SECURITY WARNING: keep the secret key used in production secret!
23
+ SECRET_KEY = 'django-insecure-*bimkkt===!#m(@d!x*he9z4gj#+ewbt9(y0o=8si2ny88w+yd'
24
+
25
+ # SECURITY WARNING: don't run with debug turned on in production!
26
+ DEBUG = True
27
+
28
+ ALLOWED_HOSTS = []
29
+
30
+
31
+ # Application definition
32
+
33
+ INSTALLED_APPS = [
34
+ 'django.contrib.admin',
35
+ 'django.contrib.auth',
36
+ 'django.contrib.contenttypes',
37
+ 'django.contrib.sessions',
38
+ 'django.contrib.messages',
39
+ 'django.contrib.staticfiles',
40
+ 'django_filters',
41
+ 'graphene_django',
42
+ 'core',
43
+ ]
44
+
45
+ MIDDLEWARE = [
46
+ 'django.middleware.security.SecurityMiddleware',
47
+ 'django.contrib.sessions.middleware.SessionMiddleware',
48
+ 'django.middleware.common.CommonMiddleware',
49
+ 'django.middleware.csrf.CsrfViewMiddleware',
50
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
51
+ 'django.contrib.messages.middleware.MessageMiddleware',
52
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
53
+ ]
54
+
55
+ ROOT_URLCONF = 'BridgeMentor.urls'
56
+
57
+ TEMPLATES = [
58
+ {
59
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
60
+ 'DIRS': [],
61
+ 'APP_DIRS': True,
62
+ 'OPTIONS': {
63
+ 'context_processors': [
64
+ 'django.template.context_processors.debug',
65
+ 'django.template.context_processors.request',
66
+ 'django.contrib.auth.context_processors.auth',
67
+ 'django.contrib.messages.context_processors.messages',
68
+ ],
69
+ },
70
+ },
71
+ ]
72
+
73
+ WSGI_APPLICATION = 'BridgeMentor.wsgi.application'
74
+
75
+
76
+ # Database
77
+ # https://docs.djangoproject.com/en/4.2/ref/settings/#databases
78
+
79
+ DATABASES = {
80
+ 'default': {
81
+ 'ENGINE': 'django.db.backends.sqlite3',
82
+ 'NAME': BASE_DIR / 'db.sqlite3',
83
+ }
84
+ }
85
+
86
+
87
+ # Password validation
88
+ # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
89
+
90
+ AUTH_PASSWORD_VALIDATORS = [
91
+ {
92
+ 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
93
+ },
94
+ {
95
+ 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
96
+ },
97
+ {
98
+ 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
99
+ },
100
+ {
101
+ 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
102
+ },
103
+ ]
104
+
105
+
106
+ # Internationalization
107
+ # https://docs.djangoproject.com/en/4.2/topics/i18n/
108
+
109
+ LANGUAGE_CODE = 'en-us'
110
+
111
+ TIME_ZONE = 'UTC'
112
+
113
+ USE_I18N = True
114
+
115
+ USE_TZ = True
116
+
117
+
118
+ # Static files (CSS, JavaScript, Images)
119
+ # https://docs.djangoproject.com/en/4.2/howto/static-files/
120
+
121
+ STATIC_URL = 'static/'
122
+
123
+ # Default primary key field type
124
+ # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
125
+
126
+ DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
127
+
128
+ GRAPHENE = {
129
+ "SCHEMA": "core.schema.schema" # Adjust to match your project
130
+ }
BridgeMentor/urls.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ URL configuration for BridgeMentor project.
3
+
4
+ The `urlpatterns` list routes URLs to views. For more information please see:
5
+ https://docs.djangoproject.com/en/4.2/topics/http/urls/
6
+ Examples:
7
+ Function views
8
+ 1. Add an import: from my_app import views
9
+ 2. Add a URL to urlpatterns: path('', views.home, name='home')
10
+ Class-based views
11
+ 1. Add an import: from other_app.views import Home
12
+ 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
13
+ Including another URLconf
14
+ 1. Import the include() function: from django.urls import include, path
15
+ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
16
+ """
17
+ from graphene_django.views import GraphQLView
18
+ from django.contrib import admin
19
+ from django.urls import path
20
+
21
+ urlpatterns = [
22
+ path('admin/', admin.site.urls),
23
+ path("graphql/", GraphQLView.as_view(graphiql=True)),
24
+
25
+ ]
BridgeMentor/wsgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WSGI config for BridgeMentor project.
3
+
4
+ It exposes the WSGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.wsgi import get_wsgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'BridgeMentor.settings')
15
+
16
+ application = get_wsgi_application()
core/__init__.py ADDED
File without changes
core/admin.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.contrib import admin
2
+ from .models import (
3
+ Institution, Author, Affiliation, Domain, Field, Subfield, Topic,
4
+ AuthorTopic, Work, AuthorYearlyStats, Concept, AuthorConcept
5
+ )
6
+
7
+
8
+ admin.site.site_header = "BridgeMentor Admin"
9
+ admin.site.site_title = "BridgeMentor Data Management"
10
+ admin.site.index_title = "BridgeMentor Dashboard"
11
+
12
+
13
+ @admin.register(Institution)
14
+ class InstitutionAdmin(admin.ModelAdmin):
15
+ list_display = ('id', 'name', 'ror_id', 'country_code', 'institution_type')
16
+ search_fields = ('name', 'ror_id')
17
+ list_filter = ('country_code', 'institution_type')
18
+
19
+
20
+ @admin.register(Author)
21
+ class AuthorAdmin(admin.ModelAdmin):
22
+ list_display = ('id', 'name', 'orcid', 'h_index',
23
+ 'i10_index', 'cited_by_count', 'works_count')
24
+ search_fields = ('name', 'orcid')
25
+ list_filter = ('h_index',)
26
+
27
+
28
+ @admin.register(Affiliation)
29
+ class AffiliationAdmin(admin.ModelAdmin):
30
+ list_display = ('author', 'institution', 'year', 'is_last_known')
31
+ list_filter = ('year', 'is_last_known', 'institution')
32
+ search_fields = ('author__name', 'institution__name')
33
+
34
+
35
+ @admin.register(Domain)
36
+ class DomainAdmin(admin.ModelAdmin):
37
+ list_display = ('id', 'name')
38
+ search_fields = ('name',)
39
+
40
+
41
+ @admin.register(Field)
42
+ class FieldAdmin(admin.ModelAdmin):
43
+ list_display = ('id', 'name', 'domain')
44
+ list_filter = ('domain',)
45
+ search_fields = ('name',)
46
+
47
+
48
+ @admin.register(Subfield)
49
+ class SubfieldAdmin(admin.ModelAdmin):
50
+ list_display = ('id', 'name', 'field')
51
+ list_filter = ('field',)
52
+ search_fields = ('name',)
53
+
54
+
55
+ @admin.register(Topic)
56
+ class TopicAdmin(admin.ModelAdmin):
57
+ list_display = ('id', 'name', 'subfield')
58
+ list_filter = ('subfield',)
59
+ search_fields = ('name',)
60
+
61
+
62
+ @admin.register(AuthorTopic)
63
+ class AuthorTopicAdmin(admin.ModelAdmin):
64
+ list_display = ('author', 'topic', 'count', 'share_value')
65
+ list_filter = ('topic',)
66
+ search_fields = ('author__name', 'topic__name')
67
+
68
+
69
+ @admin.register(Work)
70
+ class WorkAdmin(admin.ModelAdmin):
71
+ list_display = ('id', 'title', 'author', 'year', 'cited_by_count')
72
+ list_filter = ('year',)
73
+ search_fields = ('title', 'author__name')
74
+
75
+
76
+ @admin.register(AuthorYearlyStats)
77
+ class AuthorYearlyStatsAdmin(admin.ModelAdmin):
78
+ list_display = ('author', 'year', 'works_count', 'cited_by_count')
79
+ list_filter = ('year',)
80
+ search_fields = ('author__name',)
81
+
82
+
83
+ @admin.register(Concept)
84
+ class ConceptAdmin(admin.ModelAdmin):
85
+ list_display = ('id', 'name', 'level', 'score')
86
+ search_fields = ('name',)
87
+
88
+
89
+ @admin.register(AuthorConcept)
90
+ class AuthorConceptAdmin(admin.ModelAdmin):
91
+ list_display = ('author', 'concept', 'level', 'score')
92
+ search_fields = ('author__name', 'concept__name')
core/apps.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class CoreConfig(AppConfig):
5
+ default_auto_field = 'django.db.models.BigAutoField'
6
+ name = 'core'
core/models.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.db import models
2
+
3
+
4
+ class TimestampedModel(models.Model):
5
+ created_at = models.DateTimeField(auto_now_add=True)
6
+ updated_at = models.DateTimeField(auto_now=True)
7
+
8
+ class Meta:
9
+ abstract = True
10
+
11
+
12
+ class Institution(TimestampedModel):
13
+ id = models.CharField(max_length=20, primary_key=True) # e.g., I204778367
14
+ name = models.CharField(max_length=255)
15
+ ror_id = models.CharField(max_length=100, unique=True)
16
+ country_code = models.CharField(max_length=2)
17
+ institution_type = models.CharField(max_length=50) # funder, company, etc.
18
+
19
+ def __str__(self):
20
+ return self.name
21
+
22
+
23
+ class Author(TimestampedModel):
24
+ id = models.CharField(max_length=20, primary_key=True) # e.g., A5006834808
25
+ name = models.CharField(max_length=255)
26
+ orcid = models.CharField(
27
+ max_length=100, unique=True, null=True, blank=True)
28
+ h_index = models.IntegerField()
29
+ i10_index = models.IntegerField()
30
+ cited_by_count = models.IntegerField()
31
+ works_count = models.IntegerField()
32
+ mean_2yr_citedness = models.FloatField()
33
+
34
+ def __str__(self):
35
+ return self.name
36
+
37
+
38
+ class Affiliation(TimestampedModel):
39
+ author = models.ForeignKey(
40
+ Author, on_delete=models.CASCADE, related_name='affiliations')
41
+ institution = models.ForeignKey(Institution, on_delete=models.CASCADE)
42
+ year = models.IntegerField()
43
+ is_last_known = models.BooleanField(default=False)
44
+
45
+ class Meta:
46
+ unique_together = ('author', 'institution', 'year')
47
+
48
+
49
+ class Domain(TimestampedModel):
50
+ id = models.CharField(max_length=10, primary_key=True) # e.g., 3
51
+ name = models.CharField(max_length=255)
52
+
53
+ def __str__(self):
54
+ return self.name
55
+
56
+
57
+ class Field(TimestampedModel):
58
+ id = models.CharField(max_length=10, primary_key=True) # e.g., 23
59
+ name = models.CharField(max_length=255)
60
+ domain = models.ForeignKey(
61
+ Domain, on_delete=models.CASCADE, related_name='fields')
62
+
63
+ def __str__(self):
64
+ return self.name
65
+
66
+
67
+ class Subfield(TimestampedModel):
68
+ id = models.CharField(max_length=10, primary_key=True) # e.g., 2304
69
+ name = models.CharField(max_length=255)
70
+ field = models.ForeignKey(
71
+ Field, on_delete=models.CASCADE, related_name='subfields')
72
+
73
+ def __str__(self):
74
+ return self.name
75
+
76
+
77
+ class Topic(TimestampedModel):
78
+ id = models.CharField(max_length=20, primary_key=True) # e.g., T13180
79
+ name = models.CharField(max_length=255)
80
+ subfield = models.ForeignKey(
81
+ Subfield, on_delete=models.CASCADE, related_name='topics')
82
+
83
+ def __str__(self):
84
+ return self.name
85
+
86
+
87
+ class AuthorTopic(TimestampedModel):
88
+ author = models.ForeignKey(
89
+ Author, on_delete=models.CASCADE, related_name='topics')
90
+ topic = models.ForeignKey(Topic, on_delete=models.CASCADE)
91
+ count = models.IntegerField()
92
+ share_value = models.FloatField()
93
+
94
+ class Meta:
95
+ unique_together = ('author', 'topic')
96
+
97
+
98
+ class Work(TimestampedModel):
99
+ id = models.CharField(max_length=20, primary_key=True) # e.g., W123456789
100
+ author = models.ForeignKey(
101
+ Author, on_delete=models.CASCADE, related_name='works')
102
+ title = models.CharField(max_length=512)
103
+ year = models.IntegerField()
104
+ cited_by_count = models.IntegerField()
105
+
106
+ def __str__(self):
107
+ return self.title
108
+
109
+
110
+ class AuthorYearlyStats(TimestampedModel):
111
+ author = models.ForeignKey(
112
+ Author, on_delete=models.CASCADE, related_name='yearly_stats')
113
+ year = models.IntegerField()
114
+ works_count = models.IntegerField()
115
+ cited_by_count = models.IntegerField()
116
+
117
+ class Meta:
118
+ unique_together = ('author', 'year')
119
+
120
+
121
+ class Concept(TimestampedModel):
122
+ id = models.CharField(max_length=20, primary_key=True) # e.g., C41008148
123
+ name = models.CharField(max_length=255)
124
+ wikidata_url = models.URLField(null=True, blank=True)
125
+ level = models.IntegerField()
126
+ score = models.FloatField()
127
+
128
+ def __str__(self):
129
+ return f"{self.name} ({self.id})"
130
+
131
+
132
+ class AuthorConcept(models.Model):
133
+ author = models.ForeignKey(
134
+ Author, on_delete=models.CASCADE, related_name='concepts')
135
+ concept = models.ForeignKey(Concept, on_delete=models.CASCADE)
136
+ level = models.IntegerField(null=True, blank=True)
137
+ score = models.FloatField(null=True, blank=True)
138
+
139
+ class Meta:
140
+ unique_together = ('author', 'concept')
core/schema.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import graphene
2
+ from graphene_django import DjangoObjectType
3
+ from graphene_django.filter import DjangoFilterConnectionField
4
+ from .models import (
5
+ Institution, Author, Affiliation, Domain, Field,
6
+ Subfield, Topic, AuthorTopic, Work, AuthorYearlyStats,
7
+ Concept, AuthorConcept
8
+ )
9
+
10
+
11
+ # === GraphQL Types ===
12
+ class InstitutionType(DjangoObjectType):
13
+ class Meta:
14
+ model = Institution
15
+ fields = "__all__"
16
+ filter_fields = {
17
+ 'id': ['exact', 'icontains'],
18
+ 'name': ['icontains', 'iexact'],
19
+ 'ror_id': ['exact'],
20
+ 'country_code': ['exact'],
21
+ 'institution_type': ['exact'],
22
+ }
23
+ interfaces = (graphene.relay.Node,)
24
+
25
+
26
+ class AuthorType(DjangoObjectType):
27
+ class Meta:
28
+ model = Author
29
+ fields = "__all__"
30
+ filter_fields = {
31
+ 'id': ['exact', 'icontains'],
32
+ 'name': ['icontains'],
33
+ 'orcid': ['exact'],
34
+ 'h_index': ['exact', 'gte', 'lte'],
35
+ 'cited_by_count': ['gte', 'lte'],
36
+ 'works_count': ['gte', 'lte'],
37
+ }
38
+ interfaces = (graphene.relay.Node,)
39
+
40
+
41
+ class AffiliationType(DjangoObjectType):
42
+ class Meta:
43
+ model = Affiliation
44
+ fields = "__all__"
45
+ filter_fields = {
46
+ 'author__id': ['exact'],
47
+ 'institution__id': ['exact'],
48
+ 'year': ['exact', 'gte', 'lte'],
49
+ 'is_last_known': ['exact'],
50
+ }
51
+ interfaces = (graphene.relay.Node,)
52
+
53
+
54
+ class DomainType(DjangoObjectType):
55
+ class Meta:
56
+ model = Domain
57
+ fields = "__all__"
58
+ filter_fields = ['id', 'name']
59
+ interfaces = (graphene.relay.Node,)
60
+
61
+
62
+ class FieldType(DjangoObjectType):
63
+ class Meta:
64
+ model = Field
65
+ fields = "__all__"
66
+ filter_fields = {
67
+ 'id': ['exact'],
68
+ 'name': ['icontains'],
69
+ 'domain__id': ['exact'],
70
+ }
71
+ interfaces = (graphene.relay.Node,)
72
+
73
+
74
+ class SubfieldType(DjangoObjectType):
75
+ class Meta:
76
+ model = Subfield
77
+ fields = "__all__"
78
+ filter_fields = {
79
+ 'id': ['exact'],
80
+ 'name': ['icontains'],
81
+ 'field__id': ['exact'],
82
+ }
83
+ interfaces = (graphene.relay.Node,)
84
+
85
+
86
+ class TopicType(DjangoObjectType):
87
+ class Meta:
88
+ model = Topic
89
+ fields = "__all__"
90
+ filter_fields = {
91
+ 'id': ['exact'],
92
+ 'name': ['icontains'],
93
+ 'subfield__id': ['exact'],
94
+ }
95
+ interfaces = (graphene.relay.Node,)
96
+
97
+
98
+ class AuthorTopicType(DjangoObjectType):
99
+ class Meta:
100
+ model = AuthorTopic
101
+ fields = "__all__"
102
+ filter_fields = {
103
+ 'author__id': ['exact'],
104
+ 'topic__id': ['exact'],
105
+ }
106
+ interfaces = (graphene.relay.Node,)
107
+
108
+
109
+ class WorkType(DjangoObjectType):
110
+ class Meta:
111
+ model = Work
112
+ fields = "__all__"
113
+ filter_fields = {
114
+ 'id': ['exact'],
115
+ 'title': ['icontains'],
116
+ 'author__id': ['exact'],
117
+ 'year': ['exact', 'gte', 'lte'],
118
+ 'cited_by_count': ['gte', 'lte'],
119
+ }
120
+ interfaces = (graphene.relay.Node,)
121
+
122
+
123
+ class AuthorYearlyStatsType(DjangoObjectType):
124
+ class Meta:
125
+ model = AuthorYearlyStats
126
+ fields = "__all__"
127
+ filter_fields = {
128
+ 'author__id': ['exact'],
129
+ 'year': ['exact'],
130
+ 'works_count': ['gte', 'lte'],
131
+ }
132
+ interfaces = (graphene.relay.Node,)
133
+
134
+
135
+ class ConceptType(DjangoObjectType):
136
+ class Meta:
137
+ model = Concept
138
+ fields = "__all__"
139
+ filter_fields = ['id', 'name', 'level', 'score']
140
+ interfaces = (graphene.relay.Node,)
141
+
142
+
143
+ class AuthorConceptType(DjangoObjectType):
144
+ class Meta:
145
+ model = AuthorConcept
146
+ fields = "__all__"
147
+ filter_fields = {
148
+ 'author__id': ['exact'],
149
+ 'concept__id': ['exact'],
150
+ }
151
+ interfaces = (graphene.relay.Node,)
152
+
153
+
154
+ # === Query with Filtered Connections ===
155
+ class Query(graphene.ObjectType):
156
+ institution = graphene.relay.Node.Field(InstitutionType)
157
+ all_institutions = DjangoFilterConnectionField(InstitutionType)
158
+
159
+ author = graphene.relay.Node.Field(AuthorType)
160
+ all_authors = DjangoFilterConnectionField(AuthorType)
161
+
162
+ affiliation = graphene.relay.Node.Field(AffiliationType)
163
+ all_affiliations = DjangoFilterConnectionField(AffiliationType)
164
+
165
+ domain = graphene.relay.Node.Field(DomainType)
166
+ all_domains = DjangoFilterConnectionField(DomainType)
167
+
168
+ field = graphene.relay.Node.Field(FieldType)
169
+ all_fields = DjangoFilterConnectionField(FieldType)
170
+
171
+ subfield = graphene.relay.Node.Field(SubfieldType)
172
+ all_subfields = DjangoFilterConnectionField(SubfieldType)
173
+
174
+ topic = graphene.relay.Node.Field(TopicType)
175
+ all_topics = DjangoFilterConnectionField(TopicType)
176
+
177
+ author_topic = graphene.relay.Node.Field(AuthorTopicType)
178
+ all_author_topics = DjangoFilterConnectionField(AuthorTopicType)
179
+
180
+ work = graphene.relay.Node.Field(WorkType)
181
+ all_works = DjangoFilterConnectionField(WorkType)
182
+
183
+ author_yearly_stats = graphene.relay.Node.Field(AuthorYearlyStatsType)
184
+ all_author_yearly_stats = DjangoFilterConnectionField(
185
+ AuthorYearlyStatsType)
186
+
187
+ concept = graphene.relay.Node.Field(ConceptType)
188
+ all_concepts = DjangoFilterConnectionField(ConceptType)
189
+
190
+ author_concept = graphene.relay.Node.Field(AuthorConceptType)
191
+ all_author_concepts = DjangoFilterConnectionField(AuthorConceptType)
192
+
193
+
194
+ schema = graphene.Schema(query=Query)
core/tests.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.test import TestCase
2
+
3
+ # Create your tests here.
core/views.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.shortcuts import render
2
+
3
+ # Create your views here.
manage.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """Django's command-line utility for administrative tasks."""
3
+ import os
4
+ import sys
5
+
6
+
7
+ def main():
8
+ """Run administrative tasks."""
9
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'BridgeMentor.settings')
10
+ try:
11
+ from django.core.management import execute_from_command_line
12
+ except ImportError as exc:
13
+ raise ImportError(
14
+ "Couldn't import Django. Are you sure it's installed and "
15
+ "available on your PYTHONPATH environment variable? Did you "
16
+ "forget to activate a virtual environment?"
17
+ ) from exc
18
+ execute_from_command_line(sys.argv)
19
+
20
+
21
+ if __name__ == '__main__':
22
+ main()
populate_user.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.utils.timezone import make_aware, is_naive
2
+ from django.utils.dateparse import parse_datetime
3
+ from tqdm import tqdm
4
+ import glob
5
+ import json
6
+ from core.models import (
7
+ Author, Institution, Affiliation, Domain, Field, Subfield, Topic, AuthorTopic, AuthorYearlyStats, Concept, AuthorConcept
8
+ )
9
+ from urllib.parse import urlparse
10
+
11
+
12
+ def parse_id_from_url(url):
13
+ try:
14
+ return urlparse(url).path.strip('/').split('/')[-1]
15
+ except Exception as e:
16
+ print(f"Error parsing URL {url}: {e}")
17
+ breakpoint()
18
+
19
+
20
+ def add_author(user_info, updated_date):
21
+ author_id = parse_id_from_url(user_info["id"])
22
+ author, _ = Author.objects.update_or_create(
23
+ id=author_id,
24
+ defaults={
25
+ "name": max(user_info['display_name_alternatives'], key=lambda name: len(name)),
26
+ "orcid": parse_id_from_url(user_info["orcid"]) if user_info.get("orcid") else None,
27
+ "h_index": user_info["summary_stats"]["h_index"],
28
+ "i10_index": user_info["summary_stats"]["i10_index"],
29
+ "cited_by_count": user_info["cited_by_count"],
30
+ "works_count": user_info["works_count"],
31
+ "mean_2yr_citedness": user_info["summary_stats"]["2yr_mean_citedness"]
32
+ }
33
+ )
34
+ author.updated_at = updated_date
35
+ author.save(update_fields=["updated_at"])
36
+ return author
37
+
38
+
39
+ def add_institution(inst_data):
40
+ inst_id = parse_id_from_url(inst_data["id"])
41
+ inst, _ = Institution.objects.update_or_create(
42
+ id=inst_id,
43
+ defaults={
44
+ "name": inst_data["display_name"],
45
+ "ror_id": parse_id_from_url(inst_data["ror"]),
46
+ "country_code": 'N/A' or inst_data.get("country_code"),
47
+ "institution_type": inst_data["type"]
48
+ }
49
+ )
50
+ return inst
51
+
52
+
53
+ def add_affiliations(author, affiliations, last_known_insts):
54
+ last_known_ids = {parse_id_from_url(
55
+ inst["id"]) for inst in last_known_insts}
56
+ for aff in affiliations:
57
+ institution = add_institution(aff["institution"])
58
+ for year in aff["years"]:
59
+ Affiliation.objects.update_or_create(
60
+ author=author,
61
+ institution=institution,
62
+ year=year,
63
+ defaults={"is_last_known": institution.id in last_known_ids}
64
+ )
65
+
66
+
67
+ def add_hierarchy(domain_data):
68
+ domain_id = parse_id_from_url(domain_data["id"])
69
+ domain, _ = Domain.objects.update_or_create(
70
+ id=domain_id, defaults={"name": domain_data["display_name"]})
71
+ return domain
72
+
73
+
74
+ def add_field(field_data, domain_data):
75
+ domain = add_hierarchy(domain_data)
76
+ field_id = parse_id_from_url(field_data["id"])
77
+ field, _ = Field.objects.update_or_create(
78
+ id=field_id, defaults={
79
+ "name": field_data["display_name"], "domain": domain}
80
+ )
81
+ return field
82
+
83
+
84
+ def add_subfield(subfield_data, field_data, domain_data):
85
+ field = add_field(field_data, domain_data)
86
+ subfield_id = parse_id_from_url(subfield_data["id"])
87
+ subfield, _ = Subfield.objects.update_or_create(
88
+ id=subfield_id, defaults={
89
+ "name": subfield_data["display_name"], "field": field}
90
+ )
91
+ return subfield
92
+
93
+
94
+ def add_topic(author, topic_data, topic_share_map):
95
+ topic_id = parse_id_from_url(topic_data["id"])
96
+ subfield = add_subfield(
97
+ topic_data["subfield"], topic_data["field"], topic_data["domain"])
98
+ topic, _ = Topic.objects.update_or_create(
99
+ id=topic_id, defaults={
100
+ "name": topic_data["display_name"], "subfield": subfield}
101
+ )
102
+ share_value = topic_share_map.get(topic_id, 0)
103
+ AuthorTopic.objects.update_or_create(
104
+ author=author, topic=topic, defaults={
105
+ "count": topic_data["count"], "share_value": share_value}
106
+ )
107
+
108
+
109
+ def add_topic_shares(topic_share_list):
110
+ return {parse_id_from_url(topic["id"]): topic["value"] for topic in topic_share_list}
111
+
112
+
113
+ def add_yearly_stats(author, stats):
114
+ for stat in stats:
115
+ AuthorYearlyStats.objects.update_or_create(
116
+ author=author,
117
+ year=stat["year"],
118
+ defaults={
119
+ "works_count": stat["works_count"],
120
+ "cited_by_count": stat["cited_by_count"]
121
+ }
122
+ )
123
+
124
+
125
+ def add_concepts(author, concepts):
126
+ for concept in concepts:
127
+ concept_id = parse_id_from_url(concept["id"])
128
+ obj, _ = Concept.objects.update_or_create(
129
+ id=concept_id,
130
+ defaults={
131
+ "name": concept["display_name"],
132
+ "wikidata_url": concept.get("wikidata"),
133
+ "level": concept["level"],
134
+ "score": concept["score"]
135
+ }
136
+ )
137
+ AuthorConcept.objects.update_or_create(
138
+ author=author,
139
+ concept=obj,
140
+ defaults={
141
+ "level": concept["level"],
142
+ "score": concept["score"]
143
+ }
144
+ )
145
+
146
+
147
+ def populate_user(user_info):
148
+ author_id = parse_id_from_url(user_info["id"])
149
+ updated_date = parse_datetime(user_info["updated_date"])
150
+ author = Author.objects.filter(id=author_id).first()
151
+ if is_naive(updated_date):
152
+ updated_date = make_aware(updated_date)
153
+ if author and (author.updated_at >= updated_date):
154
+ return
155
+ author = add_author(user_info, updated_date)
156
+ add_affiliations(
157
+ author, user_info["affiliations"], user_info["last_known_institutions"])
158
+ topic_share_map = add_topic_shares(user_info["topic_share"])
159
+ for topic in user_info["topics"]:
160
+ add_topic(author, topic, topic_share_map)
161
+ add_yearly_stats(author, user_info["counts_by_year"])
162
+ add_concepts(author, user_info["x_concepts"])
163
+
164
+
165
+ # Call this function to load data
166
+ # populate_user(user_info)
167
+
168
+ jsons = "/Users/sgautam/Documents/BridgeMentor/C41008148_authors"
169
+ for page, json_file in tqdm(enumerate(glob.glob(f"{jsons}/*.json"))):
170
+ with open(json_file, "r") as file:
171
+ user_infos = json.load(file)['results']
172
+ for user_info in tqdm(user_infos, leave=False):
173
+ populate_user(user_info)
174
+ print(f"{page}-{user_info['display_name']}")
175
+
176
+ # python manage.py shell
177
+ # from populate_user import populate_user
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ django==4.2.20
2
+ graphene-django
3
+ django-filter
scrap_openalex.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ from time import sleep
5
+
6
+ # Base API URL
7
+ BASE_URL = "https://api.openalex.org/authors"
8
+ FILTER = "last_known_institutions.country_code:NO,x_concepts.id:C41008148"
9
+ PER_PAGE = 200
10
+ TOTAL_RESULTS = 86500
11
+ TOTAL_PAGES = (TOTAL_RESULTS + PER_PAGE - 1) // PER_PAGE # Ceiling division
12
+
13
+ # Output directory
14
+ OUTPUT_DIR = "C41008148_authors"
15
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
16
+
17
+ # Loop through pages
18
+ for page in range(50, TOTAL_PAGES + 1):
19
+ url = f"{BASE_URL}?filter={FILTER}&per-page={PER_PAGE}&page={page}"
20
+ try:
21
+ print(f"Fetching page {page}...")
22
+ response = requests.get(url)
23
+ response.raise_for_status()
24
+ data = response.json()
25
+
26
+ filename = os.path.join(OUTPUT_DIR, f"{page:010}.json")
27
+ # skip if exists
28
+ if os.path.exists(filename):
29
+ print(f"File {filename} already exists, skipping...")
30
+ continue
31
+ with open(filename, 'w', encoding='utf-8') as f:
32
+ json.dump(data, f, ensure_ascii=False, indent=2)
33
+ sleep(1) # Rate-limiting to avoid hitting the server too hard
34
+ except Exception as e:
35
+ print(f"Error on page {page}: {e}")
36
+ break
37
+
38
+ print("Download complete.")