Reputation: 11
I was building a query but i recently realiced that i have more than 1 value but i need only the last one
it is the result of 2 tables, i have 2 times 2200339 with 2 catcode's, the old and the new. how can i filter it?
"id" "typecode" "catcode" "sampledatetime" "somevalue"
"2200338" 6 "3101" "2020-03-02 07:17:54.047" 0.02000
"2200338" 7 "3101" "2020-03-02 07:17:54.067" 0.02000
"2200338" 8 "3101" "2020-03-02 07:17:54.087" 0.02500
"2200338" 9 "3101" "2020-03-02 07:17:55.007" 0.06000
"2200338" 10 "3101" "2020-03-02 07:17:55.021" 0.06000
"2200339" 6 "9603" "2020-03-02 07:57:11.068" 0.01500
"2200339" 7 "9603" "2020-03-02 07:57:11.091" 0.01500
"2200339" 8 "9603" "2020-03-02 07:57:12.009" 0.02500
"2200339" 9 "9603" "2020-03-02 07:57:12.028" 0.06000
"2200339" 10 "9603" "2020-03-02 07:57:12.039" 0.06000
"2200339" 6 "4712" "2020-03-04 15:09:43.061" 0.02000
"2200339" 7 "4712" "2020-03-04 15:09:43.064" 0.02000
"2200339" 8 "4712" "2020-03-04 15:09:43.067" 0.02500
"2200339" 9 "4712" "2020-03-04 15:09:43.072" 0.06000
"2200339" 10 "4712" "2020-03-04 15:09:43.075" 0.06000
but i only need this:
"id" "typecode" "catcode" "sampledatetime" "somevalue"
"2200338" 6 "3101" "2020-03-02 07:17:54.047" 0.02000
"2200338" 7 "3101" "2020-03-02 07:17:54.067" 0.02000
"2200338" 8 "3101" "2020-03-02 07:17:54.087" 0.02500
"2200338" 9 "3101" "2020-03-02 07:17:55.007" 0.06000
"2200338" 10 "3101" "2020-03-02 07:17:55.021" 0.06000
"2200339" 6 "4712" "2020-03-04 15:09:43.061" 0.02000
"2200339" 7 "4712" "2020-03-04 15:09:43.064" 0.02000
"2200339" 8 "4712" "2020-03-04 15:09:43.067" 0.02500
"2200339" 9 "4712" "2020-03-04 15:09:43.072" 0.06000
"2200339" 10 "4712" "2020-03-04 15:09:43.075" 0.06000
QUERY:
SELECT ranalysis.id,
ranalysis.typecode,
ranalysis.catcode,
ranalysis.sampledatetime
rchemistry.somevalue1, rchemistry.somevalue2, rchemistry.somevalue3,.......
FROM ranalysis
JOIN rchemistry ON ranalysis.oid = rchemistry.oid AND ranalysis.typecode >= 6 AND ranalysis.typecode <= 10
ORDER BY ranalysis.oid;
Thanks in advance
Upvotes: 0
Views: 59
Reputation: 6721
WITH
-- your input
input(id,typecode,catcode,sampledatetime,somevalue) AS (
SELECT 2200338, 6,3101,TIMESTAMP '2020-03-02 07:17:54.047',0.02000
UNION ALL SELECT 2200338, 7,3101,TIMESTAMP '2020-03-02 07:17:54.067',0.02000
UNION ALL SELECT 2200338, 8,3101,TIMESTAMP '2020-03-02 07:17:54.087',0.02500
UNION ALL SELECT 2200338, 9,3101,TIMESTAMP '2020-03-02 07:17:55.007',0.06000
UNION ALL SELECT 2200338,10,3101,TIMESTAMP '2020-03-02 07:17:55.021',0.06000
UNION ALL SELECT 2200339, 6,9603,TIMESTAMP '2020-03-02 07:57:11.068',0.01500
UNION ALL SELECT 2200339, 7,9603,TIMESTAMP '2020-03-02 07:57:11.091',0.01500
UNION ALL SELECT 2200339, 8,9603,TIMESTAMP '2020-03-02 07:57:12.009',0.02500
UNION ALL SELECT 2200339, 9,9603,TIMESTAMP '2020-03-02 07:57:12.028',0.06000
UNION ALL SELECT 2200339,10,9603,TIMESTAMP '2020-03-02 07:57:12.039',0.06000
UNION ALL SELECT 2200339, 6,4712,TIMESTAMP '2020-03-04 15:09:43.061',0.02000
UNION ALL SELECT 2200339, 7,4712,TIMESTAMP '2020-03-04 15:09:43.064',0.02000
UNION ALL SELECT 2200339, 8,4712,TIMESTAMP '2020-03-04 15:09:43.067',0.02500
UNION ALL SELECT 2200339, 9,4712,TIMESTAMP '2020-03-04 15:09:43.072',0.06000
UNION ALL SELECT 2200339,10,4712,TIMESTAMP '2020-03-04 15:09:43.075',0.06000
)
,
-- timestamps between catcodes not overlapping
-- so min timestamp per id/catcode combination
-- is good for choosing
relevant_groups AS (
SELECT
id
, catcode
, MIN(sampledatetime) AS sampledatetime
FROM input
GROUP BY
id
, catcode
)
,
-- now, need newest sampledatetime per id/catcode
-- combination. OLAP func with ROW_NUMBER() descending
with_filter_val AS (
SELECT
id
, catcode
, ROW_NUMBER() OVER(PARTITION BY id ORDER BY sampledatetime DESC) AS rn
FROM relevant_groups
)
-- finally , join the last Common Table Expression back to input
-- filtering by the obtained row number = 1 - and re-ordering
SELECT
i.*
FROM with_filter_val
JOIN input i USING(id,catcode)
WHERE rn=1
ORDER BY id,sampledatetime;
-- out id | typecode | catcode | sampledatetime | somevalue
-- out ---------+----------+---------+-------------------------+-----------
-- out 2200338 | 6 | 3101 | 2020-03-02 07:17:54.047 | 0.02000
-- out 2200338 | 7 | 3101 | 2020-03-02 07:17:54.067 | 0.02000
-- out 2200338 | 8 | 3101 | 2020-03-02 07:17:54.087 | 0.02500
-- out 2200338 | 9 | 3101 | 2020-03-02 07:17:55.007 | 0.06000
-- out 2200338 | 10 | 3101 | 2020-03-02 07:17:55.021 | 0.06000
-- out 2200339 | 6 | 4712 | 2020-03-04 15:09:43.061 | 0.02000
-- out 2200339 | 7 | 4712 | 2020-03-04 15:09:43.064 | 0.02000
-- out 2200339 | 8 | 4712 | 2020-03-04 15:09:43.067 | 0.02500
-- out 2200339 | 9 | 4712 | 2020-03-04 15:09:43.072 | 0.06000
-- out 2200339 | 10 | 4712 | 2020-03-04 15:09:43.075 | 0.06000
Upvotes: 1
Reputation: 827
One way would be to ORDER BY ranalysis.oid DESC
(to reverse the order of your query) and LIMIT 1
.
Note that this most likely isn't the fastest approach.. but it does solve the problem.
Upvotes: 0
Reputation: 1269463
Use distinct on
:
SELECT DISTINCT ON (ra.id, ra.typecode) ra.*,
rc.somevalue1, rc.somevalue2, . . .
FROM ranalysis ra JOIN
rchemistry rc
ON ra.oid = rc.oid AND ra.typecode >= 6 AND ra.typecode <= 10
ORDER BY ra.id, ra.typecode, ra.sampledatetime DESC;
I simplified the SELECT
to focus on the important parts of the query. DISTINCT ON
is a (convenient) Postgres extension that returns one value per "group"; the group is defined by the DISTINCT ON
. Which row is determined by the keys following the grouping columns in the ORDER BY
.
Upvotes: 1