-
Notifications
You must be signed in to change notification settings - Fork 134
Expand file tree
/
Copy patholympic_medals.py
More file actions
68 lines (50 loc) · 2.18 KB
/
olympic_medals.py
File metadata and controls
68 lines (50 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
filename = 'all_medalists.csv'
medals = pd.read_csv(filename)
USA_edition_grouped = medals.loc[medals.NOC == 'USA'].groupby('Edition')
print(USA_edition_grouped['Medal'].count())
country_names = medals['NOC']
medal_counts = country_names.value_counts()
print(medal_counts.head(15))
counted = medals.pivot_table(aggfunc='count', index='NOC', values='Athlete', columns='Medal')
counted['totals'] = counted.sum(axis='columns')
counted = counted.sort_values('totals', ascending=False)
print(counted.head(15))
ev_gen = medals[['Event_gender', 'Gender']]
ev_gen_uniques = ev_gen.drop_duplicates()
print(ev_gen_uniques)
medals_by_gender = medals.groupby(['Event_gender', 'Gender'])
medal_count_by_gender = medals_by_gender.count()
print(medal_count_by_gender)
sus = (medals.Event_gender == 'W') & (medals.Gender == 'Men')
suspect = medals[sus]
print(suspect)
country_grouped = medals.groupby('NOC')
Nsports = country_grouped['Sport'].nunique()
Nsports = Nsports.sort_values(ascending=False)
print(Nsports.head(15))
during_cold_war = (medals['Edition'] >= 1952) & (medals['Edition'] <= 1988)
is_usa_urs = medals.NOC.isin(['USA', 'URS'])
cold_war_medals = medals.loc[during_cold_war & is_usa_urs]
country_grouped = cold_war_medals.groupby('NOC')
Nsports = country_grouped['Sport'].nunique()
print(Nsports)
medals_won_by_country = medals.pivot_table(index='Edition', columns='NOC', values='Athlete', aggfunc='count')
cold_war_usa_usr_medals = medals_won_by_country.loc[1952:1988, ['USA','URS']]
most_medals = cold_war_usa_usr_medals.idxmax(axis='columns')
print(most_medals.value_counts())
usa = medals[medals.NOC == 'USA']
usa_medals_by_year = usa.groupby(['Edition', 'Medal'])['Athlete'].count()
usa_medals_by_year = usa_medals_by_year.unstack(level='Medal')
usa_medals_by_year.plot()
plt.show()
usa_medals_by_year.plot.area()
plt.show()
medals.Medal = pd.Categorical(values=medals.Medal, categories=['Bronze', 'Silver', 'Gold'], ordered=True)
usa = medals[medals.NOC == 'USA']
usa_medals_by_year = usa.groupby(['Edition', 'Medal'])['Athlete'].count()
usa_medals_by_year = usa_medals_by_year.unstack(level='Medal')
usa_medals_by_year.plot.area()
plt.show()