Skip to content

Commit b2c6dd8

Browse files
authored
Merge pull request #59 from geekygirldawn/emails
Generate emails from eligible voters in voters.yaml
2 parents d094bc1 + 2c2fd3c commit b2c6dd8

File tree

1 file changed

+225
-0
lines changed

1 file changed

+225
-0
lines changed

scripts/elekto_emails.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Copyright 2022 The Elekto Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# Author(s): Dawn M. Foster <[email protected]>
16+
17+
"""
18+
Before using this script, please make sure that you are adhering
19+
to the GitHub Acceptable Use Policies:
20+
https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies
21+
In particular, "You may not use information from the Service
22+
(whether scraped, collected through our API, or obtained otherwise)
23+
for spamming purposes, including for the purposes of sending unsolicited
24+
emails to users or selling User Personal Information (as defined in the
25+
GitHub Privacy Statement), such as to recruiters, headhunters, and job boards."
26+
27+
Takes an elekto voters.yaml file with a list of "eligible_voters:"
28+
GitHub logins, and attempts to use the GitHub API to get an email
29+
for each person to make it possible to send email reminders to eligible
30+
voters.
31+
32+
If an email address is in the GitHub profile, that is used first. Otherwise,
33+
it attempts to find an email address from the most recent commit. If the
34+
email contains the string 'noreply' it is not written to the csv file.
35+
36+
As output, a csv file of this format containing comma separated email addresses
37+
is created:
38+
elekto_emails_YYYYMMDD.csv
39+
40+
Each email address found is printed to the screen as a way to indicate progress,
41+
and a message with the number of email addresses found out of the total voters
42+
along with the name of the results csv file is printed to the screen at the end.
43+
44+
Parameters
45+
----------
46+
file_name : str
47+
This should be an Elekto yaml file starting with "eligible_voters:"
48+
"""
49+
50+
def read_args():
51+
"""Reads the yaml filename where the voters can be found and prompts for a
52+
GitHub API Personal Access Token.
53+
54+
Parameters
55+
----------
56+
None
57+
58+
Returns
59+
-------
60+
file_name : str
61+
This should be an Elekto yaml file (raw) starting with "eligible_voters:" Example:
62+
https://raw.githubusercontent.com/knative/community/main/elections/2022-TOC/voters.yaml
63+
"""
64+
import sys
65+
66+
# read filename from command line or prompt if no
67+
# arguments were given.
68+
try:
69+
file_name = str(sys.argv[1])
70+
71+
except:
72+
print("Please enter the filename for voters.yaml.")
73+
file_name = input("Enter a file name (like https://raw.githubusercontent.com/knative/community/main/elections/2021-TOC/voters.yaml): ")
74+
75+
api_token = input("Enter your GitHub Personal Access Token: ")
76+
77+
return file_name, api_token
78+
79+
80+
def email_query():
81+
"""This contains the GitHub GraphQL API Query to get an email address from the
82+
profile and commits
83+
Returns
84+
-------
85+
str
86+
"""
87+
return """query pr_info_query($user_login: String!, $start_date: DateTime!, $end_date: DateTime!){
88+
user(login: $user_login) {
89+
email
90+
contributionsCollection(from: $start_date, to: $end_date){
91+
pullRequestContributions(first: 10){
92+
nodes{
93+
pullRequest{
94+
commits(first: 10){
95+
nodes{
96+
url
97+
commit{
98+
authoredByCommitter
99+
author{
100+
email
101+
}
102+
}
103+
}
104+
}
105+
}
106+
}
107+
}
108+
}
109+
}
110+
}"""
111+
112+
def get_email(username, api_token):
113+
"""Attempts to get an email address from the GitHub profile first.
114+
Otherwise, it attempts to find an email address from the most recent
115+
commit. If the email contains the string 'noreply' it is not written
116+
to the csv file.
117+
118+
Parameters
119+
----------
120+
username : str
121+
GitHub username
122+
123+
Returns
124+
-------
125+
email : str
126+
"""
127+
128+
import requests
129+
import json
130+
from dateutil.relativedelta import relativedelta
131+
132+
# Set GitHub GraphQL API variables
133+
url = 'https://api.github.com/graphql'
134+
headers = {'Authorization': 'token %s' % api_token}
135+
136+
# Set query variables including dates for past 12 months (req for query)
137+
today = datetime.now()
138+
end_date = today.isoformat() #isoformat required for json serialization
139+
start_date = (today + relativedelta(months=-12)).isoformat()
140+
variables = {"user_login": username, "start_date": start_date, "end_date": end_date}
141+
142+
# Run query and load the results into a JSON file
143+
query = email_query()
144+
r = requests.post(url=url, json={'query': query, 'variables': variables}, headers=headers)
145+
json_data = json.loads(r.text)
146+
147+
# Get email address
148+
email = None
149+
150+
# Try to get the email address from the profile first
151+
# This will fail and return immediately if the user has been deleted.
152+
try:
153+
email = json_data['data']['user']['email']
154+
except:
155+
print(username, "not found")
156+
return email
157+
158+
# If the profile didn't have an email address, loop through the PRs and commits
159+
# until you find an email address in a commit where the commit was authored by
160+
# username (since PRs can have commits from other people) and does not contain
161+
# 'noreply' anywhere in the email address.
162+
if email == None or email == '':
163+
try:
164+
for pr in json_data['data']['user']['contributionsCollection']['pullRequestContributions']['nodes']:
165+
for commits in pr['pullRequest']['commits']['nodes']:
166+
authoredBy = commits['commit']['authoredByCommitter']
167+
if authoredBy:
168+
email = commits['commit']['author']['email']
169+
if 'noreply' not in email:
170+
break
171+
else:
172+
email = None
173+
except:
174+
pass
175+
176+
return(email)
177+
178+
import sys
179+
import yaml
180+
import csv
181+
import urllib.request
182+
from datetime import datetime
183+
184+
file_name, api_token = read_args()
185+
186+
# Loads the yaml file and creates a list of voters
187+
try:
188+
189+
voters_file = urllib.request.urlopen(file_name)
190+
voters = yaml.safe_load(voters_file)
191+
voter_list = voters['eligible_voters']
192+
193+
except:
194+
print("Cannot load or process the yaml file. Did you use the raw link?")
195+
sys.exit()
196+
197+
print("Gathering email addresses from GitHub. This will take ~3 minutes for 100 voters.")
198+
199+
# Create a list for the emails and initialize a counter for the
200+
# number of emails found.
201+
email_list = []
202+
found_count = 0
203+
204+
# Attempt to get an email address for each voter. If an email address is found
205+
# append it to the list and increment the counter. Also print to the screen to
206+
# show that the script is progressing.
207+
for username in voter_list:
208+
email = get_email(username, api_token)
209+
if email:
210+
email_list.append(email)
211+
found_count+=1
212+
print(email)
213+
214+
# Open the CSV file for writing
215+
today = datetime.today().strftime('%Y-%m-%d')
216+
outfile_name = 'elekto_emails_' + today + '.csv'
217+
f = open(outfile_name,'w')
218+
csv_file = csv.writer(f)
219+
220+
# Print status and write emails to the csv file.
221+
print("Found emails for", found_count, "out of", len(voter_list), "voters")
222+
print("Your results can be found in", outfile_name)
223+
224+
csv_file.writerow(email_list)
225+
f.close()

0 commit comments

Comments
 (0)