1+ # Copyright 2022 The Elekto Authors
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+ #
15+ # Author(s): Dawn M. Foster <[email protected] > 16+
17+ """
18+ Before using this script, please make sure that you are adhering
19+ to the GitHub Acceptable Use Policies:
20+ https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies
21+ In particular, "You may not use information from the Service
22+ (whether scraped, collected through our API, or obtained otherwise)
23+ for spamming purposes, including for the purposes of sending unsolicited
24+ emails to users or selling User Personal Information (as defined in the
25+ GitHub Privacy Statement), such as to recruiters, headhunters, and job boards."
26+
27+ Takes an elekto voters.yaml file with a list of "eligible_voters:"
28+ GitHub logins, and attempts to use the GitHub API to get an email
29+ for each person to make it possible to send email reminders to eligible
30+ voters.
31+
32+ If an email address is in the GitHub profile, that is used first. Otherwise,
33+ it attempts to find an email address from the most recent commit. If the
34+ email contains the string 'noreply' it is not written to the csv file.
35+
36+ As output, a csv file of this format containing comma separated email addresses
37+ is created:
38+ elekto_emails_YYYYMMDD.csv
39+
40+ Each email address found is printed to the screen as a way to indicate progress,
41+ and a message with the number of email addresses found out of the total voters
42+ along with the name of the results csv file is printed to the screen at the end.
43+
44+ Parameters
45+ ----------
46+ file_name : str
47+ This should be an Elekto yaml file starting with "eligible_voters:"
48+ """
49+
50+ def read_args ():
51+ """Reads the yaml filename where the voters can be found and prompts for a
52+ GitHub API Personal Access Token.
53+
54+ Parameters
55+ ----------
56+ None
57+
58+ Returns
59+ -------
60+ file_name : str
61+ This should be an Elekto yaml file (raw) starting with "eligible_voters:" Example:
62+ https://raw.githubusercontent.com/knative/community/main/elections/2022-TOC/voters.yaml
63+ """
64+ import sys
65+
66+ # read filename from command line or prompt if no
67+ # arguments were given.
68+ try :
69+ file_name = str (sys .argv [1 ])
70+
71+ except :
72+ print ("Please enter the filename for voters.yaml." )
73+ file_name = input ("Enter a file name (like https://raw.githubusercontent.com/knative/community/main/elections/2021-TOC/voters.yaml): " )
74+
75+ api_token = input ("Enter your GitHub Personal Access Token: " )
76+
77+ return file_name , api_token
78+
79+
80+ def email_query ():
81+ """This contains the GitHub GraphQL API Query to get an email address from the
82+ profile and commits
83+ Returns
84+ -------
85+ str
86+ """
87+ return """query pr_info_query($user_login: String!, $start_date: DateTime!, $end_date: DateTime!){
88+ user(login: $user_login) {
89+ email
90+ contributionsCollection(from: $start_date, to: $end_date){
91+ pullRequestContributions(first: 10){
92+ nodes{
93+ pullRequest{
94+ commits(first: 10){
95+ nodes{
96+ url
97+ commit{
98+ authoredByCommitter
99+ author{
100+ email
101+ }
102+ }
103+ }
104+ }
105+ }
106+ }
107+ }
108+ }
109+ }
110+ }"""
111+
112+ def get_email (username , api_token ):
113+ """Attempts to get an email address from the GitHub profile first.
114+ Otherwise, it attempts to find an email address from the most recent
115+ commit. If the email contains the string 'noreply' it is not written
116+ to the csv file.
117+
118+ Parameters
119+ ----------
120+ username : str
121+ GitHub username
122+
123+ Returns
124+ -------
125+ email : str
126+ """
127+
128+ import requests
129+ import json
130+ from dateutil .relativedelta import relativedelta
131+
132+ # Set GitHub GraphQL API variables
133+ url = 'https://api.github.com/graphql'
134+ headers = {'Authorization' : 'token %s' % api_token }
135+
136+ # Set query variables including dates for past 12 months (req for query)
137+ today = datetime .now ()
138+ end_date = today .isoformat () #isoformat required for json serialization
139+ start_date = (today + relativedelta (months = - 12 )).isoformat ()
140+ variables = {"user_login" : username , "start_date" : start_date , "end_date" : end_date }
141+
142+ # Run query and load the results into a JSON file
143+ query = email_query ()
144+ r = requests .post (url = url , json = {'query' : query , 'variables' : variables }, headers = headers )
145+ json_data = json .loads (r .text )
146+
147+ # Get email address
148+ email = None
149+
150+ # Try to get the email address from the profile first
151+ # This will fail and return immediately if the user has been deleted.
152+ try :
153+ email = json_data ['data' ]['user' ]['email' ]
154+ except :
155+ print (username , "not found" )
156+ return email
157+
158+ # If the profile didn't have an email address, loop through the PRs and commits
159+ # until you find an email address in a commit where the commit was authored by
160+ # username (since PRs can have commits from other people) and does not contain
161+ # 'noreply' anywhere in the email address.
162+ if email == None or email == '' :
163+ try :
164+ for pr in json_data ['data' ]['user' ]['contributionsCollection' ]['pullRequestContributions' ]['nodes' ]:
165+ for commits in pr ['pullRequest' ]['commits' ]['nodes' ]:
166+ authoredBy = commits ['commit' ]['authoredByCommitter' ]
167+ if authoredBy :
168+ email = commits ['commit' ]['author' ]['email' ]
169+ if 'noreply' not in email :
170+ break
171+ else :
172+ email = None
173+ except :
174+ pass
175+
176+ return (email )
177+
178+ import sys
179+ import yaml
180+ import csv
181+ import urllib .request
182+ from datetime import datetime
183+
184+ file_name , api_token = read_args ()
185+
186+ # Loads the yaml file and creates a list of voters
187+ try :
188+
189+ voters_file = urllib .request .urlopen (file_name )
190+ voters = yaml .safe_load (voters_file )
191+ voter_list = voters ['eligible_voters' ]
192+
193+ except :
194+ print ("Cannot load or process the yaml file. Did you use the raw link?" )
195+ sys .exit ()
196+
197+ print ("Gathering email addresses from GitHub. This will take ~3 minutes for 100 voters." )
198+
199+ # Create a list for the emails and initialize a counter for the
200+ # number of emails found.
201+ email_list = []
202+ found_count = 0
203+
204+ # Attempt to get an email address for each voter. If an email address is found
205+ # append it to the list and increment the counter. Also print to the screen to
206+ # show that the script is progressing.
207+ for username in voter_list :
208+ email = get_email (username , api_token )
209+ if email :
210+ email_list .append (email )
211+ found_count += 1
212+ print (email )
213+
214+ # Open the CSV file for writing
215+ today = datetime .today ().strftime ('%Y-%m-%d' )
216+ outfile_name = 'elekto_emails_' + today + '.csv'
217+ f = open (outfile_name ,'w' )
218+ csv_file = csv .writer (f )
219+
220+ # Print status and write emails to the csv file.
221+ print ("Found emails for" , found_count , "out of" , len (voter_list ), "voters" )
222+ print ("Your results can be found in" , outfile_name )
223+
224+ csv_file .writerow (email_list )
225+ f .close ()
0 commit comments