3535
3636As output, a csv file of this format containing comma separated email addresses
3737is created:
38- elekto_emails_GHorgName_YYYYMMDD .csv
38+ elekto_emails_YYYYMMDD .csv
3939
40- A message with the number of email addresses found out of the total voters
41- is printed to the screen
40+ Each email address found is printed to the screen as a way to indicate progress,
41+ and a message with the number of email addresses found out of the total voters
42+ along with the name of the results csv file is printed to the screen at the end.
4243
4344Parameters
4445----------
45- org_name : str
46- The primary GitHub organization for the vote.
47- Used to gather email address from commits
4846file_name : str
4947 This should be an Elekto yaml file starting with "eligible_voters:"
5048"""
5149
5250def read_args ():
53- """Reads the org name and yaml filename where the votes can be found.
51+ """Reads the yaml filename where the voters can be found and prompts for a
52+ GitHub API Personal Access Token.
5453
5554 Parameters
5655 ----------
5756 None
5857
5958 Returns
6059 -------
61- org_name : str
62- The primary GitHub organization for the vote.
63- Used to gather email address from commits
6460 file_name : str
6561 This should be an Elekto yaml file (raw) starting with "eligible_voters:" Example:
66- https://raw.githubusercontent.com/knative/community/main/elections/2021 -TOC/voters.yaml
62+ https://raw.githubusercontent.com/knative/community/main/elections/2022 -TOC/voters.yaml
6763 """
6864 import sys
6965
70- # read org name and filename from command line or prompt if no
66+ # read filename from command line or prompt if no
7167 # arguments were given.
7268 try :
73- org_name = str (sys .argv [1 ])
74- file_name = str (sys .argv [2 ])
69+ file_name = str (sys .argv [1 ])
7570
7671 except :
77- print ("Please enter the org name and filename for voters.yaml." )
78- org_name = input ("Enter a GitHub org name (like kubernetes): " )
72+ print ("Please enter the filename for voters.yaml." )
7973 file_name = input ("Enter a file name (like https://raw.githubusercontent.com/knative/community/main/elections/2021-TOC/voters.yaml): " )
8074
8175 api_token = input ("Enter your GitHub Personal Access Token: " )
8276
83- return org_name , file_name , api_token
77+ return file_name , api_token
8478
85- def get_email (org , username , api_token ):
79+
80+ def email_query ():
81+ """This contains the GitHub GraphQL API Query to get an email address from the
82+ profile and commits
83+ Returns
84+ -------
85+ str
86+ """
87+ return """query pr_info_query($user_login: String!, $start_date: DateTime!, $end_date: DateTime!){
88+ user(login: $user_login) {
89+ email
90+ contributionsCollection(from: $start_date, to: $end_date){
91+ pullRequestContributions(first: 10){
92+ nodes{
93+ pullRequest{
94+ commits(first: 10){
95+ nodes{
96+ url
97+ commit{
98+ authoredByCommitter
99+ author{
100+ email
101+ }
102+ }
103+ }
104+ }
105+ }
106+ }
107+ }
108+ }
109+ }
110+ }"""
111+
112+ def get_email (username , api_token ):
86113 """Attempts to get an email address from the GitHub profile first.
87114 Otherwise, it attempts to find an email address from the most recent
88- commit, which is why the name of the GitHub org is required. If the
89- email contains the string 'noreply' it is not written to the csv file.
115+ commit. If the email contains the string 'noreply' it is not written
116+ to the csv file.
90117
91118 Parameters
92119 ----------
93- org : str
94- The primary org name where the users can be found
95120 username : str
96121 GitHub username
97122
@@ -100,38 +125,53 @@ def get_email(org, username, api_token):
100125 email : str
101126 """
102127
103- import sys
104- from github import Github # Uses https://github.com/PyGithub/
105-
106- try :
107- g = Github (api_token )
108- except :
109- print ("Cannot read gh_key file or does not contain a valid GitHub API token?" )
110- sys .exit ()
111-
112- try :
113- email = g .get_user (username ).email
128+ import requests
129+ import json
130+ from dateutil .relativedelta import relativedelta
114131
115- email_list = []
132+ # Set GitHub GraphQL API variables
133+ url = 'https://api.github.com/graphql'
134+ headers = {'Authorization' : 'token %s' % api_token }
116135
117- if email == None :
118- repo_list = g .get_organization (org ).get_repos ()
136+ # Set query variables including dates for past 12 months (req for query)
137+ today = datetime .now ()
138+ end_date = today .isoformat () #isoformat required for json serialization
139+ start_date = (today + relativedelta (months = - 12 )).isoformat ()
140+ variables = {"user_login" : username , "start_date" : start_date , "end_date" : end_date }
119141
120- for repo in repo_list :
121- commits = repo .get_commits (author = username )
142+ # Run query and load the results into a JSON file
143+ query = email_query ()
144+ r = requests .post (url = url , json = {'query' : query , 'variables' : variables }, headers = headers )
145+ json_data = json .loads (r .text )
122146
123- if commits . totalCount > 0 :
124- email_list . append ([ commits [ 0 ]. commit . author . email , commits [ 0 ]. commit . author . date , repo . name ])
147+ # Get email address
148+ email = None
125149
126- if len (email_list ) > 0 :
127- newest = sorted (email_list , key = lambda x : x [1 ], reverse = True )
128- email = newest [0 ][0 ]
129- else :
130- email = None
131- if 'noreply' in email :
132- email = None
150+ # Try to get the email address from the profile first
151+ # This will fail and return immediately if the user has been deleted.
152+ try :
153+ email = json_data ['data' ]['user' ]['email' ]
133154 except :
134- email = None
155+ print (username , "not found" )
156+ return email
157+
158+ # If the profile didn't have an email address, loop through the PRs and commits
159+ # until you find an email address in a commit where the commit was authored by
160+ # username (since PRs can have commits from other people) and does not contain
161+ # 'noreply' anywhere in the email address.
162+ if email == None or email == '' :
163+ try :
164+ for pr in json_data ['data' ]['user' ]['contributionsCollection' ]['pullRequestContributions' ]['nodes' ]:
165+ for commits in pr ['pullRequest' ]['commits' ]['nodes' ]:
166+ authoredBy = commits ['commit' ]['authoredByCommitter' ]
167+ if authoredBy :
168+ email = commits ['commit' ]['author' ]['email' ]
169+ if 'noreply' not in email :
170+ break
171+ else :
172+ email = None
173+ except :
174+ pass
135175
136176 return (email )
137177
@@ -141,7 +181,7 @@ def get_email(org, username, api_token):
141181import urllib .request
142182from datetime import datetime
143183
144- org_name , file_name , api_token = read_args ()
184+ file_name , api_token = read_args ()
145185
146186# Loads the yaml file and creates a list of voters
147187try :
@@ -154,30 +194,32 @@ def get_email(org, username, api_token):
154194 print ("Cannot load or process the yaml file. Did you use the raw link?" )
155195 sys .exit ()
156196
157- print ("Gathering email addresses from GitHub. This may take a while ." )
197+ print ("Gathering email addresses from GitHub. This will take ~3 minutes for 100 voters ." )
158198
159199# Create a list for the emails and initialize a counter for the
160200# number of emails found.
161201email_list = []
162202found_count = 0
163203
164204# Attempt to get an email address for each voter. If an email address is found
165- # append it to the list and increment the counter.
205+ # append it to the list and increment the counter. Also print to the screen to
206+ # show that the script is progressing.
166207for username in voter_list :
167- email = get_email (org_name , username , api_token )
208+ email = get_email (username , api_token )
168209 if email :
169210 email_list .append (email )
170211 found_count += 1
171212 print (email )
172213
173- # Print status and write emails to the csv file.
174- print ("Found emails for" , found_count , "out of" , len (voter_list ), "voters" )
175-
176214# Open the CSV file for writing
177215today = datetime .today ().strftime ('%Y-%m-%d' )
178- outfile_name = 'elekto_emails_' + org_name + "_" + today + '.csv'
216+ outfile_name = 'elekto_emails_' + today + '.csv'
179217f = open (outfile_name ,'w' )
180218csv_file = csv .writer (f )
181219
220+ # Print status and write emails to the csv file.
221+ print ("Found emails for" , found_count , "out of" , len (voter_list ), "voters" )
222+ print ("Your results can be found in" , outfile_name )
223+
182224csv_file .writerow (email_list )
183- f .close ()
225+ f .close ()
0 commit comments