Reputation: 502
(SELECT
id,
SUM(hits / ab) AS HAB
FROM batting
GROUP BY id
) b
SELECT id, bmonth, bstate FROM master a
WHERE bmonth >= 0 AND bstate is NOT NULL
GROUP By bmonth,bstate
So far i have this jibberish, but I am getting lost on how to form joins and then proceed. I am not sure where to start as far getting things. Should we join or use subquery? Please assist
Look below for schema:
CREATE EXTERNAL TABLE IF NOT EXISTS batting
(id STRING, year INT, team STRING,
league STRING, games INT, ab INT, runs INT, hits INT, doubles INT, triples INT,
homeruns INT, rbi INT, sb INT, cs INT, walks INT, strikeouts INT, ibb INT,
hbp INT, sh INT, sf INT, gidp INT)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ',' LOCATION '/home/hduser/hivetest/batting';
CREATE EXTERNAL TABLE IF NOT EXISTS master
(id STRING, byear INT, bmonth INT, bday INT, bcountry STRING, bstate STRING,
bcity STRING, dyear INT, dmonth INT, dday INT, dcountry STRING, dstate STRING,
dcity STRING, fname STRING, lname STRING, name STRING, weight INT, height INT,
bats STRING, throws STRING, debut STRING, finalgame STRING, retro STRING,
bbref STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/home/hduser/hivetest/master';
Upvotes: 1
Views: 224
Reputation: 11090
First ensure atleast 3 players come from the same state and same month.You will have to get the set from the master table.Count the ids for each state/month and filter the result where count(id) >=3
select bstate,bmonth from master
group by bstate,bmonth
having count(id) >=3
You will have to then join the batting table with the above set,group by month,state and order by sum(hits)/sum(bats) and get the first row.
select a.bmonth,a.bstate,SUM(c.hits)/SUM(b.bats) hb
from (select bmonth,bstate from master
group by bmonth,bstate
having count(id) >=3) a
join master b on a.bstate=b.state and a.month = b.month
join batting c on b.id = c.id
group by a.bmonth,a.bstate
order by hb
limit 1;
Upvotes: 1
Reputation: 844
Here is the query
select id, sum(hits)/sum(ab) as output from (select m.id, b.ab, b.hits from master m, batting b where m.id = b.id and m.bmonth >= 0 AND m.bstate is NOT NULL) group by id
Upvotes: 0