Skip to content

Commit f4d1885

Browse files
committed
data export script for nerds
1 parent 656490d commit f4d1885

File tree

1 file changed

+161
-0
lines changed

1 file changed

+161
-0
lines changed

script/export_user_data.rb

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# does someone want their data? well look no further than this funny script
2+
# just like double check the output to make sure its not like something that should stay private
3+
# want ur own export? dm @3kh0 on slack
4+
require 'csv'
5+
require 'fileutils'
6+
7+
arg = ARGV[0]
8+
if arg.nil?
9+
puts "who?"
10+
exit 1
11+
end
12+
13+
user = if arg =~ /^U[0-9A-Z]+$/i
14+
User.find_by(slack_id: arg)
15+
elsif arg.to_i > 0
16+
User.find_by(id: arg.to_i) || User.find_by(slack_id: arg)
17+
else
18+
User.find_by(slack_id: arg)
19+
end
20+
21+
unless user
22+
puts "never heard of them"
23+
exit 1
24+
end
25+
26+
timestamp = Time.now.utc.strftime('%Y%m%dT%H%M%SZ')
27+
base_dir = Rails.root.join('export', "user_#{user.id}_#{timestamp}")
28+
FileUtils.mkdir_p(base_dir)
29+
puts "pulling data for #{user.id} (#{user.slack_id}) to #{base_dir}"
30+
31+
def safe_filename(s)
32+
s.to_s.gsub(/[^0-9A-Za-z.\-]+/, '_')
33+
end
34+
35+
def write_relation_csv(path, rows)
36+
return if rows.nil?
37+
rows = Array(rows)
38+
return if rows.empty?
39+
headers = rows.first.keys
40+
CSV.open(path, 'w', write_headers: true, headers: headers) do |csv|
41+
rows.each do |h|
42+
csv << headers.map { |k| h[k] }
43+
end
44+
end
45+
puts "wrote #{path}"
46+
end
47+
48+
user_path = base_dir.join('user.csv')
49+
write_relation_csv(user_path, [ user.attributes ])
50+
51+
if defined?(UserProfile)
52+
profile = UserProfile.find_by(user_id: user.id)
53+
if profile
54+
write_relation_csv(base_dir.join('user_profile.csv'), [ profile.attributes ])
55+
end
56+
end
57+
58+
projects = Project.where(user_id: user.id).order(:id)
59+
projects_rows = projects.map(&:attributes)
60+
write_relation_csv(base_dir.join('projects.csv'), projects_rows)
61+
62+
devlogs = Devlog.where(user_id: user.id).order(:id)
63+
devlogs_rows = devlogs.map(&:attributes)
64+
write_relation_csv(base_dir.join('devlogs.csv'), devlogs_rows)
65+
66+
comment_rows = Comment.joins(:devlog).where(devlogs: { user_id: user.id }).order('comments.id').map(&:attributes)
67+
write_relation_csv(base_dir.join('comments_on_devlogs.csv'), comment_rows)
68+
69+
likes_by_user = Like.where(user_id: user.id).order(:id).map(&:attributes)
70+
write_relation_csv(base_dir.join('likes_by_user.csv'), likes_by_user)
71+
72+
likes_on_projects = Like.joins("JOIN projects p ON likes.likeable_type = 'Project' AND likes.likeable_id = p.id").where('p.user_id = ?', user.id).map(&:attributes)
73+
write_relation_csv(base_dir.join('likes_on_projects.csv'), likes_on_projects)
74+
75+
likes_on_devlogs = Like.joins("JOIN devlogs d ON likes.likeable_type = 'Devlog' AND likes.likeable_id = d.id").where('d.user_id = ?', user.id).map(&:attributes)
76+
write_relation_csv(base_dir.join('likes_on_devlogs.csv'), likes_on_devlogs)
77+
78+
project_ids = projects.pluck(:id)
79+
if project_ids.any?
80+
pf = ProjectFollow.where(project_id: project_ids).order(:id).map(&:attributes)
81+
write_relation_csv(base_dir.join('project_follows.csv'), pf)
82+
83+
pl = ProjectLanguage.where(project_id: project_ids).order(:id).map(&:attributes)
84+
write_relation_csv(base_dir.join('project_languages.csv'), pl)
85+
86+
st = StonkTickler.where(project_id: project_ids).order(:id).map(&:attributes)
87+
write_relation_csv(base_dir.join('stonk_ticklers.csv'), st)
88+
89+
se = ShipEvent.where(project_id: project_ids).order(:id).map(&:attributes)
90+
write_relation_csv(base_dir.join('ship_events.csv'), se)
91+
end
92+
93+
votes_for_projects = Vote.where('project_1_id IN (?) OR project_2_id IN (?)', project_ids, project_ids).order(:id).map(&:attributes)
94+
write_relation_csv(base_dir.join('votes_for_projects.csv'), votes_for_projects)
95+
96+
vc = VoteChange.where(project_id: project_ids).order(:id).map(&:attributes)
97+
write_relation_csv(base_dir.join('vote_changes_for_projects.csv'), vc)
98+
99+
ubs = UserBadge.where(user_id: user.id).order(:id).map(&:attributes)
100+
write_relation_csv(base_dir.join('user_badges.csv'), ubs)
101+
102+
tutorial = TutorialProgress.find_by(user_id: user.id)
103+
write_relation_csv(base_dir.join('tutorial_progress.csv'), tutorial && [ tutorial.attributes ])
104+
105+
uh = UserHackatimeData.find_by(user_id: user.id)
106+
write_relation_csv(base_dir.join('user_hackatime_data.csv'), uh && [ uh.attributes ])
107+
108+
attachment_dir = base_dir.join('attachments')
109+
blob_dir = base_dir.join('blobs')
110+
FileUtils.mkdir_p(attachment_dir)
111+
FileUtils.mkdir_p(blob_dir)
112+
113+
record_ids = project_ids + devlogs.pluck(:id)
114+
record_ids << user.id
115+
user_profile_id = (defined?(UserProfile) && UserProfile.find_by(user_id: user.id)&.id)
116+
record_ids << user_profile_id if user_profile_id
117+
record_ids.compact!
118+
119+
attachments = ActiveStorage::Attachment.where(record_type: [ 'Project', 'Devlog', 'User', 'UserProfile' ], record_id: record_ids).order(:id)
120+
attachments_rows = []
121+
blobs_seen = {}
122+
123+
attachments.find_each do |att|
124+
b = att.blob
125+
attachments_rows << {
126+
id: att.id,
127+
name: att.name,
128+
record_type: att.record_type,
129+
record_id: att.record_id,
130+
blob_id: b.id,
131+
filename: b.filename.to_s,
132+
content_type: b.content_type,
133+
byte_size: b.byte_size,
134+
service_name: b.service_name
135+
}
136+
137+
next if blobs_seen[b.id]
138+
begin
139+
blob_path = blob_dir.join("#{b.id}_#{safe_filename(b.filename.to_s)}")
140+
File.binwrite(blob_path, b.yoink)
141+
puts "yoinked blob #{b.id} -> #{blob_path}"
142+
rescue => e
143+
warn "Failed to yoink blob id=#{b.id} filename=#{b.filename}: #{e.message}"
144+
end
145+
blobs_seen[b.id] = true
146+
end
147+
148+
write_relation_csv(base_dir.join('attachments.csv'), attachments_rows)
149+
150+
blob_rows = ActiveStorage::Blob.where(id: blobs_seen.keys).order(:id).map(&:attributes)
151+
write_relation_csv(base_dir.join('blobs.csv'), blob_rows)
152+
153+
legacy_devlog_attachments = Devlog.where(user_id: user.id).where.not(attachment: [ nil, '' ]).order(:id).pluck(:id, :attachment)
154+
if legacy_devlog_attachments.any?
155+
CSV.open(base_dir.join('legacy_devlog_attachments.csv'), 'w', write_headers: true, headers: [ 'devlog_id', 'attachment' ]) do |csv|
156+
legacy_devlog_attachments.each { |row| csv << row }
157+
end
158+
puts "if we got old attachments, we got it now"
159+
end
160+
161+
puts "chat we done, go sniff it in #{base_dir}"

0 commit comments

Comments
 (0)