Reputation: 1192
I have a SQL table as follows
id |date_accessed
----------+------------
1 | 16/10/2014
1 | 28/10/2014
1 | 25/11/2014
1 | 16/12/2014
2 | 30/09/2014
2 | 03/10/2014
2 | 17/10/2014
2 | 03/01/2015
I need to group the data by the month and year, but I also want to know how many months since the user accessed the system for the 1st time
id | month | year | length_in_month
----------+------------+------------+-------------------
1 | 10 | 2014 | 1
1 | 11 | 2014 | 2
1 | 12 | 2014 | 3
2 | 09 | 2014 | 1
2 | 10 | 2014 | 2
2 | 01 | 2015 | 5
My query is as follows
select
id,
Extract(MONTH from "date_accessed") as month,
Extract(year from "date_accessed") as year
from
exampleTable
group by
1, 2, 3
order by
1, 3, 2
But I do not have access to the min(date_accessed)
when I do group by, to obtain the length of the length_in_month
column.
Is there a solution for this?
Upvotes: 5
Views: 123
Reputation: 39393
Shortest query if Postgres has built-in DATEDIFF.
Query can be made more idiomatic by using DISTINCT ON.
DISTINCT ON is specific to Postgres. It discards duplicate rows and keeps one only, and it sort rows based on the parameter(s) passed to it.
-- http://www.sqlines.com/postgresql/how-to/datediff
create or replace function month_diff (start_month date, end_month date)
returns int as $$
begin
return (date_part('year', end_month) - date_part('year', start_month))*12 +
date_part('month', end_month) - date_part('month', start_month);
end;
$$ language 'plpgsql' immutable;
select
distinct on (id, date_trunc('month', date_accessed))
id,
date_part('year', date_accessed) as year,
date_part('month', date_accessed) as month,
month_diff( min(date_accessed) over(partition by id), date_accessed ) + 1
as length_in_month
from t;
Output:
Upvotes: 0
Reputation: 39393
Use the following
Live test: http://sqlfiddle.com/#!17/7c833/6
-- drop table t;
/*
create table t as
select id, date_accessed::date
from (values
(1, '2014-10-16'),
(1, '2014-10-28'),
(1, '2014-11-25'),
(1, '2014-12-16'),
(2, '2014-09-30'),
(2, '2014-10-03'),
(2, '2014-10-17'),
(2, '2015-01-03')
) as x(id, date_accessed)
*/
with unique_months as
(
select
id,
date_trunc('month', date_accessed) as monthify
from t
group by id, monthify
)
, compute_length as
(
select
id, monthify,
(
(
extract(year from monthify) - extract(year from min(monthify) over(partition by id))
) * 12
)
+
(
extract(month from monthify) - extract(month from min(monthify) over(partition by id))
)
+
1 as length_in_month
from unique_months
)
select id,
extract(year from monthify) "year",
extract(month from monthify) "month",
length_in_month
from compute_length
order by id, monthify
Results:
| id | year | month | length_in_month |
|----|------|-------|-----------------|
| 1 | 2014 | 10 | 1 |
| 1 | 2014 | 11 | 2 |
| 1 | 2014 | 12 | 3 |
| 2 | 2014 | 9 | 1 |
| 2 | 2014 | 10 | 2 |
| 2 | 2015 | 1 | 5 |
Upvotes: 0
Reputation: 39393
Another approach
Live test: http://sqlfiddle.com/#!17/7c833/2
-- drop table t;
/*
create table t as
select id, date_accessed::date
from (values
(1, '2014-10-16'),
(1, '2014-10-28'),
(1, '2014-11-25'),
(1, '2014-12-16'),
(2, '2014-09-30'),
(2, '2014-10-03'),
(2, '2014-10-17'),
(2, '2015-01-03')
) as x(id, date_accessed)
*/
with unique_months as
(
select
id,
extract(year from date_accessed) "year",
extract(month from date_accessed) "month",
min(date_accessed) as month_representative
from t
group by id, year, month
)
, compute_length as
(
select
id, year, month,
(
(
extract(year from month_representative) - extract(year from min(month_representative) over(partition by id))
) * 12
)
+
(
extract(month from month_representative) - extract(month from min(month_representative) over(partition by id))
)
+
1 as length_in_month
from unique_months
)
select *
from compute_length
order by id, year, month
Results:
| id | year | month | length_in_month |
|----|------|-------|-----------------|
| 1 | 2014 | 10 | 1 |
| 1 | 2014 | 11 | 2 |
| 1 | 2014 | 12 | 3 |
| 2 | 2014 | 9 | 1 |
| 2 | 2014 | 10 | 2 |
| 2 | 2015 | 1 | 5 |
Upvotes: 0
Reputation: 170
Use a subquery, as shown below:
SELECT
exampleTable.id,
EXTRACT(month FROM "date_accessed") AS month,
EXTRACT(year FROM "date_accessed") AS year,
/* Calculate # months since the user accessed the system for the 1st time */
(EXTRACT(year from "date_accessed") - EXTRACT(year from firstTimeAccessDatesTable.firstAccessDate)) * 12
+ (EXTRACT(month from "date_accessed") - EXTRACT(month from firstTimeAccessDatesTable.firstAccessDate)) + 1 AS length_in_month
FROM
/* Join exampleTable with firstTimeAccessDatesTable by id */
exampleTable
INNER JOIN(
/* Perform subquery to obtain the date a given user accessed the system for the first time */
SELECT
id,
MIN("date_accessed") AS firstAccessDate
FROM
exampleTable
GROUP BY
1
) AS firstTimeAccessDatesTable
ON exampleTable.id = firstTimeAccessDatesTable.id
GROUP BY
1, 2, 3, 4
ORDER BY
1, 3, 2
Upvotes: 1
Reputation: 31666
I have used AGE
function to determine the difference between start date of month of first accessed and end date of actual date accessed to give an interval that could be fairly considered as a month and then add 1 to it, as you mentioned. This gives expected result.
first_access
is calculated separately in a CTE as it is a single value for each id and not per id, month, year.
with m AS
(
select id, min(date_accessed)
as first_access from t
group by id
)
select t.id, Extract(MONTH from "date_accessed") as month,
Extract(year from "date_accessed") as year,
EXTRACT ( month from
MIN( AGE( date_trunc('month', date_accessed)
+ interval '1 month - 1 day', --last day of month
date_trunc('month', first_access) --first day of month
))
) + 1 as length_in_month
from t join m on t.id = m.id
group by t.id,month,year
order by 1,3,2;
Upvotes: 2
Reputation: 98
The query below gives you the exact duration in months. Keeping your above sample input in mind the query will give you length_in_months duration as 0 if the time difference is less than 30 days. The multiplication by -1 is to convert the negative duration to be displayed as positive values.
create table Test(id integer, date_accessed date);
insert into Test values(1, "2014-10-16");
insert into Test values(1, "2014-10-28");
insert into Test values(1, "2014-11-25");
insert into Test values(1, "2014-12-16");
insert into Test values(2, "2014-09-30");
insert into Test values(2, "2014-10-03");
insert into Test values(2, "2014-10-17");
insert into Test values(2, "2015-10-16");
select a.id, a.month, a.year, a.date_accessed, (timestampdiff(MONTH,
a.date_accessed, a.min_date)) * -1 as length_in_month from (
select id, EXTRACT(MONTH FROM date_accessed) as MONTH, EXTRACT(YEAR FROM
date_accessed) as YEAR, date_accessed, (select MIN(date_accessed) from Test) as
min_date from Test order by date_accessed) a order by a.id asc;
Output
1 10 2014 2014-10-16 0
1 10 2014 2014-10-28 0
1 11 2014 2014-11-25 1
1 12 2014 2014-12-16 2
2 9 2014 2014-09-30 0
2 10 2014 2014-10-03 0
2 10 2014 2014-10-17 0
2 10 2015 2015-10-16 12
Upvotes: 0
Reputation: 1298
I think that you need first to select Id and min(month) group by Id, so you will get the first date for each Id. Then another select like the one you did plus the select I suggest above.
Upvotes: 0