DBMS/voting_sets_joins.sql at master · bforoura/DBMS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/* *******************************************************************************
   The Voting relational model allows for various analyses, such as determining
   voting trends, calculating candidate support, and understanding voting behavior
   through SQL operations such as joins, set operations, groupings, and aggregations.

   The model consists of three tables:

	+------------------+          +--------------------+
	|      voters      |          |      candidates    |
	+------------------+          +--------------------+
	| * voterid (PK)   |          | * candidateid (PK) |
	|   name           |          |   name             |
	|   age            |          |   party            |
	|   city           |          +--------------------+
	+------------------+
			+----------------+--+
			|       votes       |
			+-----------------+-+
			| * voterid (FK)    |
			| * candidateid (FK)|
			|   votedate        |
			+-------------+-----+


******************************************************************************* */


-- *********************************************************************************
-- create and populate the voters table
-- *********************************************************************************
create table voters (
    voterid int primary key,   -- PK constaint

    name varchar(50),
    age int,
    city varchar(50)
);

-- insert data into voters table
insert into voters (voterid, name, age, city) values
(1, 'Alice', 30, 'New York'),
(2, 'Bob', 25, 'Los Angeles'),
(3, 'Charlie', 35, 'Chicago'),
(4, 'David', 28, 'Miami'),
(5, 'Eva', 22, 'San Francisco'),
(6, 'Frank', 40, 'New York'),
(7, 'Grace', 32, 'Los Angeles'),
(8, 'Hannah', 29, 'Chicago'),
(9, 'Ian', 24, 'Miami'),
(10, 'Jack', 36, 'San Francisco');


-- *********************************************************************************
-- create and populate the candidates table
-- *********************************************************************************
create table candidates (
    candidateid int primary key,   -- PK constaint

    name varchar(50),
    party varchar(50)
);

-- insert data into candidates table
insert into candidates (candidateid, name, party) values
(1, 'John Smith', 'Democratic'),
(2, 'Jane Doe', 'Republican'),
(3, 'Alex Johnson', 'Independent'),
(4, 'Maria Garcia', 'Green'),
(5, 'Olivia Brown', 'Democratic'),
(6, 'Ethan White', 'Republican'),
(7, 'Sophia Green', 'Independent'),
(8, 'Liam Black', 'Green');


-- *********************************************************************************
-- create and populate the votes table
-- *********************************************************************************
create table votes (
    voterid int,                         -- FK
    candidateid int,                     -- FK

    votedate date,                       -- descriptive attribute

    -- ensures each voter can vote only once for each candidate
    unique (voterid, candidateid),


    -- foreign key constraints
    foreign key (voterid) references voters(voterid)
              on delete cascade on update cascade,
    foreign key (candidateid) references candidates(candidateid)
              on delete cascade on update cascade
);

-- insert data into votes table
insert into votes (voterid, candidateid, votedate) values
(1, 1, '2024-09-01'),
(2, 2, '2024-09-01'),
(3, 1, '2024-09-01'),
(4, 3, '2024-09-01'),
(5, 2, '2024-09-01'),
(1, 5, '2024-09-02'),
(2, 6, '2024-09-02'),
(3, 5, '2024-09-02'),
(4, 7, '2024-09-02'),
(5, 6, '2024-09-02'),
(6, 1, '2024-09-03'),
(7, 2, '2024-09-03'),
(8, 3, '2024-09-03'),
(9, 4, '2024-09-03'),
(10,1, '2024-09-03');


/* ***********************************************************************************
                                      SQL JOINS

   CROSS JOIN
   ==========
   When you perform a CROSS JOIN between two tables, every row from the first table is
   combined with every row from the second table.

   This results in a new table where the number of rows is the product of the number
   of rows in the two original tables.

   CROSS JOIN should be used craefully due to the potential size of the output.

   Assuming relations r(R) and s(S):
   -- r has schema R with arity n and contains n_r rows
   -- s has schema S with arity m and contains n_s rows
   - -r x s has arity (n+m) and contains n_r * n_s rows

   SELECT * FROM table1 CROSS JOIN table2;
   or, alternatively:
   SELECT * FROM table1, table2;


   NATURAL JOIN
   ============
   Automatically joins tables based on all columns with the same names and compatible
   data types in both tables.

   SELECT * FROM table1 NATURAL JOIN table2;


   INNER JOIN ON
   =============
   Returns only the rows that have matching values in both tables based on a specified
   condition.

   SELECT columns
   FROM table1 INNER JOIN table2 ON table1.column_name = table2.column_name;

   which is the same as

   SELECT columns
   FROM table1 JOIN table2 ON table1.column_name = table2.column_name;


   INNER JOIN USING
   ================
   The USING clause can simplify the syntax when performing an INNER JOIN (or just a JOIN)
   if you are joining on columns with the same name in both tables.

   SELECT columns
   FROM table1 JOIN table2
   USING (common_column);


   NOTE
   ====
   Both keywords NATURAL and INNER are optional. You can choose to write them for clarity,
   but it's not necessary.

********************************************************************************* */

-- *********************************************************************************
-- This query lists all possible combinations of voters and candidates
-- *********************************************************************************
select *
from voters, candidates;    -- same as "from voters cross join candidates;"


-- *********************************************************************************
-- This query counts all possible combinations of voters and candidates
-- *********************************************************************************
select count(*)
from voters, candidates;


-- *********************************************************************************
-- This query helps to visualize how voters might choose candidates
-- based on demographics
-- *********************************************************************************
select voters.name, voters.age, voters.city, candidates.name, candidates.party
from voters cross join candidates;


-- *********************************************************************************
-- Count the number of votes each candidate has received from the votes table,
-- grouped by candidate
-- *********************************************************************************
-- Version 1: CROSS JOIN
-- =====================
select candidates.name, count(*) as vote_count
from votes, candidates
where votes.candidateid = candidates.candidateid
group by candidates.name
order by vote_count desc;

-- Version 2: NATURAL JOIN
-- =======================
select candidates.name, count(*) as vote_count
from votes natural join candidates
group by candidates.name
order by vote_count desc;

-- Version 3: INNER JOIN ON
-- ========================
select candidates.name, count(*) as vote_count
from votes inner join candidates
     on votes.candidateid = candidates.candidateid
group by candidates.name
order by vote_count desc;

-- Version 4: INNER JOIN USING
-- ===========================
select candidates.name, count(*) as vote_count
from votes inner join candidates
     using(candidateid)
group by candidates.name
order by vote_count desc;


/* *********************************************************************************
                                GROUP BY and HAVING ALL | ANY | SOME

   The GROUP BY clause in SQL is used to aggregate rows that have the same values in
   specified columns into summary rows.

   This allows us to perform aggregate functions (like COUNT, SUM, AVG, etc.) on
   grouped data.

   The HAVING clause is used to filter groups created by the GROUP BY clause based on
   specified conditions. It is similar to the WHERE clause, but while WHERE filters
   rows before aggregation, HAVING filters groups after aggregation.

   The ALL keyword is used to compare a value to all values in a subquery
   or a set of values.

   Alternatives to ALL: SOME, ANY (identical)

********************************************************************************* */


-- *********************************************************************************
-- Find the candidate(s) with the most votes
-- *********************************************************************************
-- Version 1:

select candidates.name, count(*) as vote_count
from votes natural join candidates
group by candidates.name
order by vote_count desc
limit 1;


-- Version 2:
-- The HAVING clause is used to filter groups created by the GROUP BY
-- clause based on aggregate values.

select candidates.name, count(*) as vote_count
from votes natural join candidates
group by candidates.name
having (vote_count) >= all

   -- the subquery is formed here
   (select count(*)
    from votes natural join candidates
    group by candidates.name);


/* *********************************************************************************
                    Set operations: INTERSECT, UNION, EXCEPT

   The INTERSECT operation returns the common rows from two result sets. It is useful
   when you want to identify records that exist in both datasets.

   The UNION operation combines the results of two or more queries into a single result
   set. It removes duplicate rows by default.

   The EXCEPT operation returns rows from the first result set that do not appear in
   the second result set.

   ALL vs DISTINCT options:
       ALL: Includes all records, retaining duplicates.
       DISTINCT: Ensures only unique records are returned. This is dafault behavior.

********************************************************************************* */

-- Find the names of voters who have voted for both Democratic and Republican candidates.

select voters.name
from voters join votes on voters.voterid = votes.voterid
            join candidates on votes.candidateid = candidates.candidateid
where candidates.party = 'Democratic'

intersect

select voters.name
from voters join votes on voters.voterid = votes.voterid
            join candidates on votes.candidateid = candidates.candidateid
where candidates.party = 'Republican';


-- find the names of voters who have voted for democratic but not republican candidates.

select voters.name
from voters join votes on voters.voterid = votes.voterid
            join candidates on votes.candidateid = candidates.candidateid
where candidates.party = 'Democratic'

except

select voters.name
from voters join votes on voters.voterid = votes.voterid
            join candidates on votes.candidateid = candidates.candidateid
where candidates.party = 'Republican';