Kristofer Gisslén
Kristofer Gisslén

Reputation: 1320

Proper grouping using union in MySQL

I have scanned StackOverflow and the Internet for an answer to the following question but none of the answers I've found works for me. This is my problem.

I have two queries that I want to combine by using UNION. I've managed to combine them as expected but I can't get them to group the way I want. I want the duplicated rows with a redistributed value of 0 to be hidden by ordering the sub queries so that they are prioritized upon grouping... (bad explanation I know - I hope the graphical presentation below explains it better. The rows I want removed are marked with a little arrow in the right margin).

How on earth would I do this?

+-----------+-------+---------------+----------------+----------------------------+
| CANDIDATE | VOTES | RANKED_CHOICE | REDISTRIBUTION | VOTES_AFTER_REDISTRIBUTION |
+-----------+-------+---------------+----------------+----------------------------+
|         1 |     8 |             0 |              0 |                          8 |
|         2 |     1 |             6 |             -1 |                          0 |
|         2 |     1 |             0 |              0 |                          1 | >
|         3 |     2 |             0 |              0 |                          2 |
|         4 |     4 |             0 |              0 |                          4 |
|         5 |     2 |             0 |              0 |                          2 |
|         6 |     3 |             0 |              0 |                          3 | >
|         6 |     3 |             0 |              1 |                          4 |
+-----------+-------+---------------+----------------+----------------------------+

-- The resulting table that's shown on the screen
SELECT vote_candidate candidate, original_votes votes, ranked_choice, redistribution, (original_votes + redistribution) votes_after_redistribution
FROM (
  -- Create the first table with original information
  SELECT c.vote_candidate, c.original_votes, '0' ranked_choice, '0' redistribution
  FROM (
    SELECT o.vote_candidate, COUNT(*) original_votes
    FROM vote_orders o
    WHERE o.vote_order = 1
    GROUP BY o.vote_candidate
  ) c
  GROUP BY c.vote_candidate
  -- Union a second table containing the second ranked choice of an eliminated candidate and the redistribution.
  -- This is done in two steps. In the first step we find out the ranking. In the second step we union the ranked
  -- candidate and its' redistribution with each other
  UNION
  SELECT vote_candidate, original_votes, ranked_choice, redistribution
  FROM ((
    SELECT vote_candidate, IFNULL(d.original_votes, 0) original_votes, IFNULL(COUNT(*), 0) ranked_choice, (0 - IFNULL(d.original_votes, 0)) redistribution
    FROM vote_orders a
    -- Get the second favored vote from each eliminated candidates ballots
    INNER JOIN (
      SELECT vote_id, c, MIN(minimum_vote)
      FROM (
        SELECT vote_id, vote_candidate c, COUNT(*) minimum_vote
        FROM vote_orders
        WHERE vote_order = 1
        GROUP BY vote_candidate
      ) t1
      WHERE minimum_vote = (
        SELECT MIN(minimum_vote)
        FROM (
          SELECT COUNT(*) minimum_vote
          FROM vote_orders
          WHERE vote_order = 1
          GROUP BY vote_candidate
        ) t2
      )
      GROUP BY c
    ) b
    ON a.vote_id = b.vote_id
    -- Get the eliminated candidates votes at the beginning of this round
    LEFT OUTER JOIN
    (
      SELECT vote_candidate o, COUNT(*) original_votes
      FROM vote_orders
      WHERE vote_order = 1
      GROUP BY vote_candidate
    ) d
    ON a.vote_candidate = d.o
    GROUP BY vote_candidate
    ORDER BY redistribution DESC
    LIMIT 1
    -- Union the candidates redistribution
    UNION
    (
      SELECT vote_candidate, d.original_votes, '0' ranked_choice, (CASE
                                                                   WHEN IFNULL(d.original_votes, 0) = 0
                                                                   THEN (0 - IFNULL(d.original_votes, 0))
                                                                   ELSE (
                                                                     SELECT MIN(minimum_vote)
                                                                     FROM (
                                                                       SELECT vote_candidate c, COUNT(*) minimum_vote
                                                                       FROM vote_orders
                                                                       WHERE vote_order = 1
                                                                       GROUP BY vote_candidate
                                                                     ) t1
                                                                     WHERE minimum_vote = (
                                                                       SELECT MIN(minimum_vote)
                                                                       FROM (
                                                                         SELECT COUNT(*) minimum_vote
                                                                         FROM vote_orders
                                                                         WHERE vote_order = 1
                                                                         GROUP BY vote_candidate
                                                                       ) t2
                                                                     )
                                                                     GROUP BY c
                                                                   )
                                                                   END) redistribution
      FROM vote_orders a
      INNER JOIN (
        SELECT vote_id, MIN(minimum_vote)
        FROM (
          SELECT vote_id, COUNT(*) minimum_vote
          FROM vote_orders
          WHERE vote_order = 1
          GROUP BY vote_candidate
        ) t1
        WHERE minimum_vote = (
          SELECT MIN(minimum_vote)
          FROM (
            SELECT COUNT(*) minimum_vote
            FROM vote_orders
            WHERE vote_order = 1
            GROUP BY vote_candidate
          ) t2
        )
      ) b
      ON a.vote_id = b.vote_id
      LEFT OUTER JOIN
      (
        SELECT vote_candidate o, COUNT(*) original_votes
        FROM vote_orders
        WHERE vote_order = 1
        GROUP BY vote_candidate
      ) d
      ON a.vote_candidate = d.o
      -- Determine which candidate to add the redistribution to
      WHERE vote_candidate = (
        SELECT IFNULL(COUNT(*), 0) ranked_choice
        FROM vote_orders a
        INNER JOIN (
          SELECT vote_id, c, MIN(minimum_vote)
          FROM (
            SELECT vote_id, vote_candidate c, COUNT(*) minimum_vote
            FROM vote_orders
            WHERE vote_order = 1
            GROUP BY vote_candidate
          ) t1
          WHERE minimum_vote = (
            SELECT MIN(minimum_vote)
            FROM (
              SELECT COUNT(*) minimum_vote
              FROM vote_orders
              WHERE vote_order = 1
              GROUP BY vote_candidate
            ) t2
          )
          GROUP BY c
        ) b
        ON a.vote_id = b.vote_id
      )
      GROUP BY vote_candidate
      ORDER BY redistribution DESC
      LIMIT 1
    )
  )) y    
) z
-- This is where the grouping fails on me
-- GROUP BY vote_candidate
ORDER BY vote_candidate ASC;

This is the schema:

CREATE TABLE votes
(
                             vote_id INT NOT NULL AUTO_INCREMENT,
                             vote_candidate_a INT,
                             vote_candidate_b INT,
                             vote_candidate_c INT,
                             vote_candidate_d INT,
                             vote_candidate_e INT,
                             vote_candidate_f INT,
                             PRIMARY KEY vote_id(vote_id)
);

INSERT INTO votes
VALUES
(NULL, 1, 3, 2, 5, 4, 6),
(NULL, 1, 2, 4, 6, 3, 5),
(NULL, 5, 3, 2, 1, 4, 6),
(NULL, 6, 1, 5, 3, 4, 2),
(NULL, 2, 3, 5, 6, 1, 4),
(NULL, 4, 1, 6, 3, 2, 5),
(NULL, 3, 2, 6, 1, 5, 4),
(NULL, 4, 3, 1, 6, 2, 5),
(NULL, 1, 2, 4, 3, 6, 5),
(NULL, 1, 5, 3, 2, 4, 6),
(NULL, 4, 5, 6, 2, 3, 1),
(NULL, 1, 4, 2, 3, 5, 6),
(NULL, 1, 2, 3, 4, 5, 6),
(NULL, 3, 6, 5, 1, 4, 2),
(NULL, 1, 2, 3, 4, 5, 6),
(NULL, 6, 5, 4, 3, 2, 1),
(NULL, 4, 3, 1, 5, 6, 2),
(NULL, 6, 3, 1, 2, 5, 4),
(NULL, 1, 4, 6, 3, 2, 5),
(NULL, 5, 3, 6, 4, 2, 1);


CREATE TABLE vote_orders
(
                             id INT NOT NULL AUTO_INCREMENT,
                             vote_id INT,
                             vote_order INT,
                             vote_candidate INT,
                             PRIMARY KEY id(id)
);

INSERT INTO vote_orders (id, vote_id, vote_order, vote_candidate)
SELECT NULL, vote_id, 1, vote_candidate_a FROM votes
UNION
SELECT NULL, vote_id, 2, vote_candidate_b FROM votes
UNION
SELECT NULL, vote_id, 3, vote_candidate_c FROM votes
UNION
SELECT NULL, vote_id, 4, vote_candidate_d FROM votes
UNION
SELECT NULL, vote_id, 5, vote_candidate_e FROM votes
UNION
SELECT NULL, vote_id, 6, vote_candidate_f FROM votes;

Upvotes: 1

Views: 70

Answers (1)

Jenn
Jenn

Reputation: 795

In reality, you already know what the vote_candidate and the number of votes is right off the bat. That is a simple query,

Select vote_candidate, count(*)
From vote_orders
Where vote_order = 1
Group by vote_candidate

This is the foundation of the whole query since the only things missing in the select are the ranked_choice, the redistribution, and the votes_after_redistribution (although this is pretty negligible since it is a calculation from two other columns). So, I would recommend using all of the work you have done to find the two missing columns. Essentially, you should change your query to be a sub-select to find only the missing columns.

I changed your sub-select to only find the ranked_choice and redistribution values if they are not 0. Then, I am grabbing these values (using the left outer join). If there is nothing in the sub-select, then we will default the value to 0.

Your previous query started by defaulting all the values to 0 and then returning the information for the rows that aren't 0. Let's skip all the extra work. Note, I still recommend cleaning up the sub-select, especially since I removed the count of original votes. Some of the joins may be extra since you no longer need to find that information. fiddle: http://sqlfiddle.com/#!2/1b0cb/51

-- The resulting table that's shown on the screen
SELECT v.vote_candidate candidate,
        count(*) votes,
        IfNull(z.ranked_choice, 0) ranked_choice,
        IfNull(z.redistribution, 0) redistribution,
        (count(*) + IfNull(z.redistribution, 0)) votes_after_redistribution
FROM vote_orders v left outer join

(

  -- Union a second table containing the second ranked choice of an eliminated candidate and the redistribution.
  -- This is done in two steps. In the first step we find out the ranking. In the second step we union the ranked
  -- candidate and its' redistribution with each other
  SELECT vote_candidate, ranked_choice, redistribution
  FROM ((
    SELECT  vote_candidate,
            IFNULL(COUNT(*), 0) ranked_choice,
            (0 - IFNULL(d.original_votes, 0)) redistribution
    FROM vote_orders a
    -- Get the second favored vote from each eliminated candidates ballots
    INNER JOIN (
        SELECT vote_id, vote_candidate c, MIN(minimum_vote)
        FROM (
          SELECT vote_id, vote_candidate, COUNT(*) minimum_vote
          FROM vote_orders
          WHERE vote_order = 1
          GROUP BY vote_candidate
        ) t2
    ) b
    ON a.vote_id = b.vote_id
    -- Get the eliminated candidates votes at the beginning of this round
    LEFT OUTER JOIN
    (
      SELECT vote_candidate o, COUNT(*) original_votes
      FROM vote_orders
      WHERE vote_order = 1
      GROUP BY vote_candidate
    ) d
    ON a.vote_candidate = d.o
    GROUP BY vote_candidate
    ORDER BY redistribution DESC
    LIMIT 1
    -- Union the candidates redistribution
    UNION
    (
      SELECT  vote_candidate,
              0 ranked_choice,
              (CASE
                   WHEN IFNULL(d.original_votes, 0) = 0
                   THEN (0 - IFNULL(d.original_votes, 0))
                   ELSE (
                          SELECT MIN(minimum_vote)
                                FROM (
                                      SELECT COUNT(*) minimum_vote
                                      FROM vote_orders
                                      WHERE vote_order = 1
                                      GROUP BY vote_candidate
                                      ) t2
                    )
                END) redistribution
      FROM vote_orders a
      INNER JOIN (
        SELECT vote_id, MIN(minimum_vote)
        FROM (
          SELECT vote_id, COUNT(*) minimum_vote
          FROM vote_orders
          WHERE vote_order = 1
          GROUP BY vote_candidate
        ) t1
        WHERE minimum_vote = (
          SELECT MIN(minimum_vote)
          FROM (
            SELECT COUNT(*) minimum_vote
            FROM vote_orders
            WHERE vote_order = 1
            GROUP BY vote_candidate
          ) t2
        )
      ) b
      ON a.vote_id = b.vote_id
      LEFT OUTER JOIN
      (
        SELECT vote_candidate o, COUNT(*) original_votes
        FROM vote_orders
        WHERE vote_order = 1
        GROUP BY vote_candidate
      ) d
      ON a.vote_candidate = d.o
      -- Determine which candidate to add the redistribution to
      WHERE vote_candidate = (
        SELECT IFNULL(COUNT(*), 0) ranked_choice
        FROM vote_orders a
        INNER JOIN (
          SELECT vote_id, c, MIN(minimum_vote)
          FROM (
            SELECT vote_id, vote_candidate c, COUNT(*) minimum_vote
            FROM vote_orders
            WHERE vote_order = 1
            GROUP BY vote_candidate
          ) t1
          WHERE minimum_vote = (
            SELECT MIN(minimum_vote)
            FROM (
              SELECT COUNT(*) minimum_vote
              FROM vote_orders
              WHERE vote_order = 1
              GROUP BY vote_candidate
            ) t2
          )
          GROUP BY c
        ) b
        ON a.vote_id = b.vote_id
      )
      GROUP BY vote_candidate
      ORDER BY redistribution DESC
      LIMIT 1
    )
  )) y    
) z
on v.vote_candidate = z.vote_candidate
Where v.vote_order = 1
GROUP BY v.vote_candidate
ORDER BY v.vote_candidate ASC;

Upvotes: 1

Related Questions