Jeremy R. Johnson
Jeremy R. Johnson

Reputation: 305

DPLYR Equivalent to n_distinct with multiple columns

I have a data frame with dates and locations, what I need is a sum of the number of Location-Days. While summarizing the number of units sold. I'm pulling this data from a SQL database so something that is compatible with DBPLYR is preferred but not required.

Looking for something that looks like this:

[![enter image description here][1]][1]

Reproducible DF

foo <- structure(list(Location_ID = c(5L, 5L, 5L, 5L, 5L, 5L, 38L, 9003L,9003L, 9003L), Date = structure(c(1624147200, 1624233600, 1624320000,1624406400, 1624492800, 1624579200, 1624147200, 1624406400, 1624492800,1624579200), tzone = "UTC", class = c("POSIXct", "POSIXt")),units = c(11, 4, 10, 13, 17, 13, 7, 13, 14, 10)), row.names = c(NA,-10L), groups = structure(list(Location_ID = c(5L, 38L, 9003L), .rows = structure(list(1:6, 7L, 8:10), ptype = integer(0), class = c("vctrs_list_of","vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df","tbl", "data.frame"), .drop = TRUE), class = c("grouped_df","tbl_df", "tbl", "data.frame")) 

Original Code Block

foo <- item_sales_fact %>%
  left_join(product, by = "Product_Key") %>%
  left_join(calendar, by = "Calendar_Key") %>%
  left_join(organization, by = "Organization_Key") %>%
  filter(Product_Key == 32976, Date >= as.Date("2021-06-20"), Date <= as.Date("2021-06-25")) %>%
  select(Location_ID, Date, Qty_Sold) %>%
  collect()
SELECT "Location_ID", "Date", "Qty_Sold"
FROM (SELECT "Calendar_Key", "LHS"."Organization_Key" AS "Organization_Key", "Product_Key", "Promotion_Key", "Shift_Key", "ETL_source_system_key", "Pack_Size", "Qty_Sold", "Inv_Unit_Qty", "Extended_Cost", "Extended_Purchase_Rebate", "Extended_Sales_Rebate", "Extended_Sales", "Ent_Source_Hdr_Key", "Ent_Source_Dtl_Key", "UPC_Sell_Unit_Desc", "UPC", "Sell_Unit_Desc", "Sell_Unit_Qty", "Item_ID", "Item_Desc", "Sub_Category_ID", "Sub_Category_Desc", "Category_ID", "Category_Desc", "Department_ID", "Department_Desc", "Size_Desc", "Item_Type", "GrP_level_4_ID", "GrP_level_4_Desc", "Alt_GrP_level_1_ID", "Alt_GrP_level_1_Desc", "Alt_GrP_level_2_ID", "Alt_GrP_level_2_Desc", "Alt_GrP_level_3_ID", "Alt_GrP_level_3_Desc", "Create_Date", "Ent_Item_Number", "UPC_Discontinue_Date", "Scan_Modifier", "IsPurchasable", "Purchase_Discontinue_Date", "IsSellable", "Sales_Discontinue_Date", "Audit_Flag", "Date", "Day_Of_Week_ID", "Day_Of_Week", "Holiday", "Type_Of_Day", "Calendar_Month_No", "Calendar_Month_Name", "Calendar_Qtr_No", "Calendar_Qtr_Desc", "Calendar_Year", "Fiscal_Week", "Fiscal_Period_No", "Fiscal_Period_Desc", "Fiscal_Year", "Location_ID", "Location_Desc", "Last_Tank_Reading_Date", "First_Transaction_Date", "Last_Remodel_Date", "Closing_Date", "GPS_Latitude", "GPS_Longitude", "GPS_Address1", "GPS_Address2", "GPS_City", "GPS_State", "GPS_Zip", "Corp_site", "TimeZone_ID", "Site_ID_Formatted", "Business Entity_desc", "Business Entity_id", "PB OTP Pricing zones_desc", "PB OTP Pricing zones_id", "PB Entity ID_desc", "PB Entity ID_id", "PB Cigarette Pricing_desc", "PB Cigarette Pricing_id", "PB Ice Cream_desc", "PB Ice Cream_id", "PB Uses Pricebook_desc", "PB Uses Pricebook_id", "AD State_desc", "AD State_id", "PB Reward Programs_desc", "PB Reward Programs_id", "PB PKG BEV_desc", "PB PKG BEV_id", "PB Cooler DR P_desc", "PB Cooler DR P_id", "PB COOLER 3105_desc", "PB COOLER 3105_id", "PB Sugar Tax_desc", "PB Sugar Tax_id", "PB Tobacco Min Age_desc", "PB Tobacco Min Age_id", "PB Seasonal Promos Beach_desc", "PB Seasonal Promos Beach_id", "PB Region_desc", "PB Region_id", "PB WIC level_desc", "PB WIC level_id", "AU Paperwork Set_desc", "AU Paperwork Set_id", "AU Armored Car Pickups Per Week_desc", "AU Armored Car Pickups Per Week_id", "AU Trucker Cash_desc", "AU Trucker Cash_id", "IT Desktop Assignment_desc", "IT Desktop Assignment_id", "IT POS Type_desc", "IT POS Type_id", "AD C Store Brand_desc", "AD C Store Brand_id", "AD Site Status_desc", "AD Site Status_id", "AD Site Type_desc", "AD Site Type_id", "AD Duns Number 10 Digits_desc", "AD Duns Number 10 Digits_id", "AD Turnpikes TollRoads Airports_desc", "AD Turnpikes TollRoads Airports_id", "AD Duns Number 8 Digits_desc", "AD Duns Number 8 Digits_id", "RE Real Estate Type of Thing_desc", "RE Real Estate Type of Thing_id", "RE 3rd Party Lease Name_desc", "RE 3rd Party Lease Name_id", "MK Grocery Loyalty Martins_desc", "MK Grocery Loyalty Martins_id", "MK Plenti Loyalty_desc", "MK Plenti Loyalty_id", "MK APlus Rewards Loyalty_desc", "MK APlus Rewards Loyalty_id", "RE Acquisition Name_desc", "RE Acquisition Name_id", "RE Property Ownership_desc", "RE Property Ownership_id", "RE Property Owner_desc", "RE Property Owner_id", "RE Property Operator_desc", "RE Property Operator_id", "RE Lease Type_desc", "RE Lease Type_id", "RE Lease Company_desc", "RE Lease Company_id", "AD County_desc", "AD County_id", "CM Site Bank Name_desc", "CM Site Bank Name_id", "OP Market Sales Area_desc", "OP Market Sales Area_id", "AD Car Wash Type_desc", "AD Car Wash Type_id", "AD FF Subway Franchisee_desc", "AD FF Subway Franchisee_id", "FP Same Store_desc", "FP Same Store_id", "MK ATM_desc", "MK ATM_id", "MK Air Machine_desc", "MK Air Machine_id", "MK Vacuum_desc", "MK Vacuum_id", "CM Telecheck_desc", "CM Telecheck_id", "RM MACS Altria MLP_desc", "RM MACS Altria MLP_id", "AD Site Number_desc", "AD Site Number_id", "AD Cash Acceptor Fuel_desc", "AD Cash Acceptor Fuel_id", "AD Class of Trade_desc", "AD Class of Trade_id", "AD Tax Authority_desc", "AD Tax Authority_id", "FP Store Square Feet_desc", "FP Store Square Feet_id", "FP Store Open Time_desc", "FP Store Open Time_id", "FP Store Close Time_desc", "FP Store Close Time_id", "FP Acquisition Name_desc", "FP Acquisition Name_id", "MK Grocery Loyalty Redners_desc", "MK Grocery Loyalty Redners_id", "MK Grocery Loyalty Fuelperks_desc", "MK Grocery Loyalty Fuelperks_id", "MK Grocery Loyalty Price Chopper_desc", "MK Grocery Loyalty Price Chopper_id", "MK Grocery Loyalty SuperValu_desc", "MK Grocery Loyalty SuperValu_id", "MK Grocery Loyalty SuperValu WV_desc", "MK Grocery Loyalty SuperValu WV_id", "MK USC Loyalty_desc", "MK USC Loyalty_id", "AD Alcohol Sales Permit _desc", "AD Alcohol Sales Permit _id", "FP Daily Hours of Operation_desc", "FP Daily Hours of Operation_id", "PB Wholesale_desc", "PB Wholesale_id", "PB Linked Fee_desc", "PB Linked Fee_id", "AD Old Site Number_desc", "AD Old Site Number_id", "AD Lottery Vending Machine_desc", "AD Lottery Vending Machine_id", "AD FF Laredo Taco Company_desc", "AD FF Laredo Taco Company_id", "PB Flat Grill Ladson Legacy _desc", "PB Flat Grill Ladson Legacy _id", "RM SUN LOR RDA_desc", "RM SUN LOR RDA_id", "RM SUN States_desc", "RM SUN States_id", "AD Fiscal Systems_desc", "AD Fiscal Systems_id", "RM State with Entity Parcing _desc", "RM State with Entity Parcing _id", "PB City_desc", "PB City_id", "PB Beer Cooler Promo_desc", "PB Beer Cooler Promo_id", "RM Aplus Reverse Locs for Feb 2016 _desc", "RM Aplus Reverse Locs for Feb 2016 _id", "MK Fresh Made Smoothies_desc", "MK Fresh Made Smoothies_id", "MK Gourmet Latte Espresso_desc", "MK Gourmet Latte Espresso_id", "RM Mid Atlantic State Parcing UM_desc", "RM Mid Atlantic State Parcing UM_id", "AU On Line Lottery Income _desc", "AU On Line Lottery Income _id", "AU Lottery Paid Out Income _desc", "AU Lottery Paid Out Income _id", "PB LTC Kitchen_desc", "PB LTC Kitchen_id", "RM John Middleton Store Groupings_desc", "RM John Middleton Store Groupings_id", "PB McLane Fresh_desc", "PB McLane Fresh_id", "PB STRIPES WATER_desc", "PB STRIPES WATER_id", "PB TROLLI PROMOTION EXPIRES 4 4 2017_desc", "PB TROLLI PROMOTION EXPIRES 4 4 2017_id", "PB STRIPES DR PEPPER NO COKE OR PEPSI_desc", "PB STRIPES DR PEPPER NO COKE OR PEPSI_id", "PB STRIPES BIG RED_desc", "PB STRIPES BIG RED_id", "AD FF Godfather s Pizza Franchise_desc", "AD FF Godfather s Pizza Franchise_id", "AD CAT Scale_desc", "AD CAT Scale_id", "AD Foreign Currency Accepted_desc", "AD Foreign Currency Accepted_id", "PB Beer_desc", "PB Beer_id", "MK Grocery Loyalty Safeway_desc", "MK Grocery Loyalty Safeway_id", "CM Shadow Account Number_desc", "CM Shadow Account Number_id", "RM Altria Groups_desc", "RM Altria Groups_id", "RM ITG Rebates_desc", "RM ITG Rebates_id", "RM Cigar Grouping_desc", "RM Cigar Grouping_id", "RM USST CANS_desc", "RM USST CANS_id", "PB Acquisition_desc", "PB Acquisition_id", "MK Grocery Loyalty ACME_desc", "MK Grocery Loyalty ACME_id", "MK Grocery Loyalty Shaws_desc", "MK Grocery Loyalty Shaws_id", "MK Grill 44_desc", "MK Grill 44_id", "MK Desserts_desc", "MK Desserts_id", "MK Beer On Tap_desc", "MK Beer On Tap_id", "MK Oven Fried_desc", "MK Oven Fried_id", "RE Lot Size_desc", "RE Lot Size_id", "RE Construction Year_desc", "RE Construction Year_id", "RE Remodel Year_desc", "RE Remodel Year_id", "AD Subway Sales POS_desc", "AD Subway Sales POS_id", "AD FF Subway Number_desc", "AD FF Subway Number_id", "PB STRIPES PKG BEV_desc", "PB STRIPES PKG BEV_id", "PB FS Fresh On Site_desc", "PB FS Fresh On Site_id", "MK Roller Grill_desc", "MK Roller Grill_id", "AD Open Date_desc", "AD Open Date_id", "AD Close Date_desc", "AD Close Date_id", "MK US Food Div_desc", "MK US Food Div_id", "MK McLane DC_desc", "MK McLane DC_id", "RM Altria MST_desc", "RM Altria MST_id", "AD FF Chicken Deli_desc", "AD FF Chicken Deli_id", "IT Kalibrate Import_desc", "IT Kalibrate Import_id", "MK Grocery Loyalty AmBucks_desc", "MK Grocery Loyalty AmBucks_id", "AD DUNS 7 Eleven_desc", "AD DUNS 7 Eleven_id", "AD Co Op Divestment_desc", "AD Co Op Divestment_id", "AD RGIS _desc", "AD RGIS _id", "RM Altria Numark_desc", "RM Altria Numark_id", "Site Association_desc", "Site Association_id", "PB Restricted Cigar_desc", "PB Restricted Cigar_id", "Corporate_desc", "Corporate_id", "Director_desc", "Director_id", "Region Manager_desc", "Region Manager_id", "Area_desc", "Area_id", "Site_desc", "Site_id", "DNU PB Cooler Coke_id", "PB Grand Opening_id", "DNU PB Cooler Coke_desc", "DNU PB Disp Bev_id", "DNU PB Aloha Kitchen_desc", "DNU PB Highway Turnpike Optima Kiosk_id", "DNU PB Cooler 3110_id", "DNU PB Dairy_desc", "DNU PB Highway Turnpike Optima Kiosk_desc", "DNU PB Disp Bev_desc", "DNU PB Aloha Kitchen_id", "DNU PB Dairy_id", "PB Grand Opening_desc", "DNU PB Cooler 3110_desc", "T_Site_id", "T_Site_desc", "PB Beer NEW_id", "PB Beer NEW_desc", "FIN Rest Region_id", "FIN Rest Region_desc", "PB PROMO DAY 0 DAY 1_id", "PB PROMO DAY 0 DAY 1_desc", "PB Fees_id", "PB Fees_desc"
FROM (SELECT "LHS"."Calendar_Key" AS "Calendar_Key", "Organization_Key", "Product_Key", "Promotion_Key", "Shift_Key", "ETL_source_system_key", "Pack_Size", "Qty_Sold", "Inv_Unit_Qty", "Extended_Cost", "Extended_Purchase_Rebate", "Extended_Sales_Rebate", "Extended_Sales", "Ent_Source_Hdr_Key", "Ent_Source_Dtl_Key", "UPC_Sell_Unit_Desc", "UPC", "Sell_Unit_Desc", "Sell_Unit_Qty", "Item_ID", "Item_Desc", "Sub_Category_ID", "Sub_Category_Desc", "Category_ID", "Category_Desc", "Department_ID", "Department_Desc", "Size_Desc", "Item_Type", "GrP_level_4_ID", "GrP_level_4_Desc", "Alt_GrP_level_1_ID", "Alt_GrP_level_1_Desc", "Alt_GrP_level_2_ID", "Alt_GrP_level_2_Desc", "Alt_GrP_level_3_ID", "Alt_GrP_level_3_Desc", "Create_Date", "Ent_Item_Number", "UPC_Discontinue_Date", "Scan_Modifier", "IsPurchasable", "Purchase_Discontinue_Date", "IsSellable", "Sales_Discontinue_Date", "Audit_Flag", "Date", "Day_Of_Week_ID", "Day_Of_Week", "Holiday", "Type_Of_Day", "Calendar_Month_No", "Calendar_Month_Name", "Calendar_Qtr_No", "Calendar_Qtr_Desc", "Calendar_Year", "Fiscal_Week", "Fiscal_Period_No", "Fiscal_Period_Desc", "Fiscal_Year"
FROM (SELECT "Calendar_Key", "Organization_Key", "LHS"."Product_Key" AS "Product_Key", "Promotion_Key", "Shift_Key", "ETL_source_system_key", "Pack_Size", "Qty_Sold", "Inv_Unit_Qty", "Extended_Cost", "Extended_Purchase_Rebate", "Extended_Sales_Rebate", "Extended_Sales", "Ent_Source_Hdr_Key", "Ent_Source_Dtl_Key", "UPC_Sell_Unit_Desc", "UPC", "Sell_Unit_Desc", "Sell_Unit_Qty", "Item_ID", "Item_Desc", "Sub_Category_ID", "Sub_Category_Desc", "Category_ID", "Category_Desc", "Department_ID", "Department_Desc", "Size_Desc", "Item_Type", "GrP_level_4_ID", "GrP_level_4_Desc", "Alt_GrP_level_1_ID", "Alt_GrP_level_1_Desc", "Alt_GrP_level_2_ID", "Alt_GrP_level_2_Desc", "Alt_GrP_level_3_ID", "Alt_GrP_level_3_Desc", "Create_Date", "Ent_Item_Number", "UPC_Discontinue_Date", "Scan_Modifier", "IsPurchasable", "Purchase_Discontinue_Date", "IsSellable", "Sales_Discontinue_Date", "Audit_Flag"
FROM (SELECT "Calendar_key" AS "Calendar_Key", "Organization_key" AS "Organization_Key", "Product_Key", "Promotion_Key", "Shift_Key", "ETL_source_system_key", "Pack_Size", "Qty_Sold", "Inv_Unit_Qty", "Extended_Cost", "Extended_Purchase_Rebate", "Extended_Sales_Rebate", "Extended_Sales", "Ent_Source_Hdr_Key", "Ent_Source_Dtl_Key"
FROM "item_sales_fact") "LHS"
LEFT JOIN (SELECT "Product_Key", "UPC_Sell_Unit_Desc", "UPC", "Sell_Unit_Desc", "Sell_Unit_Qty", "Item_ID", "Item_Desc", "Sub_Category_ID", "Sub_Category_Desc", "Category_ID", "Category_Desc", "Department_ID", "Department_Desc", "Size_Desc", "Item_Type", "GrP_level_4_ID", "GrP_level_4_Desc", "Alt_GrP_level_1_ID", "Alt_GrP_level_1_Desc", "Alt_GrP_level_2_ID", "Alt_GrP_level_2_Desc", "Alt_GrP_level_3_ID", "Alt_GrP_level_3_Desc", "Create_Date", "Ent_Item_Number", "UPC_Discontinue_Date", "Scan_Modifier", "IsPurchasable", "Purchase_Discontinue_Date", "IsSellable", "Sales_Discontinue_Date", "Audit_Flag"
FROM "product") "RHS"
ON ("LHS"."Product_Key" = "RHS"."Product_Key")
) "LHS"
LEFT JOIN (SELECT "calendar_key" AS "Calendar_Key", "Day_Date" AS "Date", "Day_Of_Week_ID", "Day_Of_Week", "Holiday", "Type_Of_Day", "Calendar_Month_No", "Calendar_Month_Name", "Calendar_Qtr_No", "Calendar_Qtr_Desc", "Calendar_Year", "Fiscal_Week", "Fiscal_Period_No", "Fiscal_Period_Desc", "Fiscal_Year"
FROM "calendar") "RHS"
ON ("LHS"."Calendar_Key" = "RHS"."Calendar_Key")
) "LHS"
LEFT JOIN (SELECT "organization_key" AS "Organization_Key", "Location_ID", "Location_Desc", "Last_Tank_Reading_Date", "First_Transaction_Date", "Last_Remodel_Date", "Closing_Date", "GPS_Latitude", "GPS_Longitude", "GPS_Address1", "GPS_Address2", "GPS_City", "GPS_State", "GPS_Zip", "Corp_site", "TimeZone_ID", "Site_ID_Formatted", "Business Entity_desc", "Business Entity_id", "PB OTP Pricing zones_desc", "PB OTP Pricing zones_id", "PB Entity ID_desc", "PB Entity ID_id", "PB Cigarette Pricing_desc", "PB Cigarette Pricing_id", "PB Ice Cream_desc", "PB Ice Cream_id", "PB Uses Pricebook_desc", "PB Uses Pricebook_id", "AD State_desc", "AD State_id", "PB Reward Programs_desc", "PB Reward Programs_id", "PB PKG BEV_desc", "PB PKG BEV_id", "PB Cooler DR P_desc", "PB Cooler DR P_id", "PB COOLER 3105_desc", "PB COOLER 3105_id", "PB Sugar Tax_desc", "PB Sugar Tax_id", "PB Tobacco Min Age_desc", "PB Tobacco Min Age_id", "PB Seasonal Promos Beach_desc", "PB Seasonal Promos Beach_id", "PB Region_desc", "PB Region_id", "PB WIC level_desc", "PB WIC level_id", "AU Paperwork Set_desc", "AU Paperwork Set_id", "AU Armored Car Pickups Per Week_desc", "AU Armored Car Pickups Per Week_id", "AU Trucker Cash_desc", "AU Trucker Cash_id", "IT Desktop Assignment_desc", "IT Desktop Assignment_id", "IT POS Type_desc", "IT POS Type_id", "AD C Store Brand_desc", "AD C Store Brand_id", "AD Site Status_desc", "AD Site Status_id", "AD Site Type_desc", "AD Site Type_id", "AD Duns Number 10 Digits_desc", "AD Duns Number 10 Digits_id", "AD Turnpikes TollRoads Airports_desc", "AD Turnpikes TollRoads Airports_id", "AD Duns Number 8 Digits_desc", "AD Duns Number 8 Digits_id", "RE Real Estate Type of Thing_desc", "RE Real Estate Type of Thing_id", "RE 3rd Party Lease Name_desc", "RE 3rd Party Lease Name_id", "MK Grocery Loyalty Martins_desc", "MK Grocery Loyalty Martins_id", "MK Plenti Loyalty_desc", "MK Plenti Loyalty_id", "MK APlus Rewards Loyalty_desc", "MK APlus Rewards Loyalty_id", "RE Acquisition Name_desc", "RE Acquisition Name_id", "RE Property Ownership_desc", "RE Property Ownership_id", "RE Property Owner_desc", "RE Property Owner_id", "RE Property Operator_desc", "RE Property Operator_id", "RE Lease Type_desc", "RE Lease Type_id", "RE Lease Company_desc", "RE Lease Company_id", "AD County_desc", "AD County_id", "CM Site Bank Name_desc", "CM Site Bank Name_id", "OP Market Sales Area_desc", "OP Market Sales Area_id", "AD Car Wash Type_desc", "AD Car Wash Type_id", "AD FF Subway Franchisee_desc", "AD FF Subway Franchisee_id", "FP Same Store_desc", "FP Same Store_id", "MK ATM_desc", "MK ATM_id", "MK Air Machine_desc", "MK Air Machine_id", "MK Vacuum_desc", "MK Vacuum_id", "CM Telecheck_desc", "CM Telecheck_id", "RM MACS Altria MLP_desc", "RM MACS Altria MLP_id", "AD Site Number_desc", "AD Site Number_id", "AD Cash Acceptor Fuel_desc", "AD Cash Acceptor Fuel_id", "AD Class of Trade_desc", "AD Class of Trade_id", "AD Tax Authority_desc", "AD Tax Authority_id", "FP Store Square Feet_desc", "FP Store Square Feet_id", "FP Store Open Time_desc", "FP Store Open Time_id", "FP Store Close Time_desc", "FP Store Close Time_id", "FP Acquisition Name_desc", "FP Acquisition Name_id", "MK Grocery Loyalty Redners_desc", "MK Grocery Loyalty Redners_id", "MK Grocery Loyalty Fuelperks_desc", "MK Grocery Loyalty Fuelperks_id", "MK Grocery Loyalty Price Chopper_desc", "MK Grocery Loyalty Price Chopper_id", "MK Grocery Loyalty SuperValu_desc", "MK Grocery Loyalty SuperValu_id", "MK Grocery Loyalty SuperValu WV_desc", "MK Grocery Loyalty SuperValu WV_id", "MK USC Loyalty_desc", "MK USC Loyalty_id", "AD Alcohol Sales Permit _desc", "AD Alcohol Sales Permit _id", "FP Daily Hours of Operation_desc", "FP Daily Hours of Operation_id", "PB Wholesale_desc", "PB Wholesale_id", "PB Linked Fee_desc", "PB Linked Fee_id", "AD Old Site Number_desc", "AD Old Site Number_id", "AD Lottery Vending Machine_desc", "AD Lottery Vending Machine_id", "AD FF Laredo Taco Company_desc", "AD FF Laredo Taco Company_id", "PB Flat Grill Ladson Legacy _desc", "PB Flat Grill Ladson Legacy _id", "RM SUN LOR RDA_desc", "RM SUN LOR RDA_id", "RM SUN States_desc", "RM SUN States_id", "AD Fiscal Systems_desc", "AD Fiscal Systems_id", "RM State with Entity Parcing _desc", "RM State with Entity Parcing _id", "PB City_desc", "PB City_id", "PB Beer Cooler Promo_desc", "PB Beer Cooler Promo_id", "RM Aplus Reverse Locs for Feb 2016 _desc", "RM Aplus Reverse Locs for Feb 2016 _id", "MK Fresh Made Smoothies_desc", "MK Fresh Made Smoothies_id", "MK Gourmet Latte Espresso_desc", "MK Gourmet Latte Espresso_id", "RM Mid Atlantic State Parcing UM_desc", "RM Mid Atlantic State Parcing UM_id", "AU On Line Lottery Income _desc", "AU On Line Lottery Income _id", "AU Lottery Paid Out Income _desc", "AU Lottery Paid Out Income _id", "PB LTC Kitchen_desc", "PB LTC Kitchen_id", "RM John Middleton Store Groupings_desc", "RM John Middleton Store Groupings_id", "PB McLane Fresh_desc", "PB McLane Fresh_id", "PB STRIPES WATER_desc", "PB STRIPES WATER_id", "PB TROLLI PROMOTION EXPIRES 4 4 2017_desc", "PB TROLLI PROMOTION EXPIRES 4 4 2017_id", "PB STRIPES DR PEPPER NO COKE OR PEPSI_desc", "PB STRIPES DR PEPPER NO COKE OR PEPSI_id", "PB STRIPES BIG RED_desc", "PB STRIPES BIG RED_id", "AD FF Godfather s Pizza Franchise_desc", "AD FF Godfather s Pizza Franchise_id", "AD CAT Scale_desc", "AD CAT Scale_id", "AD Foreign Currency Accepted_desc", "AD Foreign Currency Accepted_id", "PB Beer_desc", "PB Beer_id", "MK Grocery Loyalty Safeway_desc", "MK Grocery Loyalty Safeway_id", "CM Shadow Account Number_desc", "CM Shadow Account Number_id", "RM Altria Groups_desc", "RM Altria Groups_id", "RM ITG Rebates_desc", "RM ITG Rebates_id", "RM Cigar Grouping_desc", "RM Cigar Grouping_id", "RM USST CANS_desc", "RM USST CANS_id", "PB Acquisition_desc", "PB Acquisition_id", "MK Grocery Loyalty ACME_desc", "MK Grocery Loyalty ACME_id", "MK Grocery Loyalty Shaws_desc", "MK Grocery Loyalty Shaws_id", "MK Grill 44_desc", "MK Grill 44_id", "MK Desserts_desc", "MK Desserts_id", "MK Beer On Tap_desc", "MK Beer On Tap_id", "MK Oven Fried_desc", "MK Oven Fried_id", "RE Lot Size_desc", "RE Lot Size_id", "RE Construction Year_desc", "RE Construction Year_id", "RE Remodel Year_desc", "RE Remodel Year_id", "AD Subway Sales POS_desc", "AD Subway Sales POS_id", "AD FF Subway Number_desc", "AD FF Subway Number_id", "PB STRIPES PKG BEV_desc", "PB STRIPES PKG BEV_id", "PB FS Fresh On Site_desc", "PB FS Fresh On Site_id", "MK Roller Grill_desc", "MK Roller Grill_id", "AD Open Date_desc", "AD Open Date_id", "AD Close Date_desc", "AD Close Date_id", "MK US Food Div_desc", "MK US Food Div_id", "MK McLane DC_desc", "MK McLane DC_id", "RM Altria MST_desc", "RM Altria MST_id", "AD FF Chicken Deli_desc", "AD FF Chicken Deli_id", "IT Kalibrate Import_desc", "IT Kalibrate Import_id", "MK Grocery Loyalty AmBucks_desc", "MK Grocery Loyalty AmBucks_id", "AD DUNS 7 Eleven_desc", "AD DUNS 7 Eleven_id", "AD Co Op Divestment_desc", "AD Co Op Divestment_id", "AD RGIS _desc", "AD RGIS _id", "RM Altria Numark_desc", "RM Altria Numark_id", "Site Association_desc", "Site Association_id", "PB Restricted Cigar_desc", "PB Restricted Cigar_id", "Corporate_desc", "Corporate_id", "Director_desc", "Director_id", "Region Manager_desc", "Region Manager_id", "Area_desc", "Area_id", "Site_desc", "Site_id", "DNU PB Cooler Coke_id", "PB Grand Opening_id", "DNU PB Cooler Coke_desc", "DNU PB Disp Bev_id", "DNU PB Aloha Kitchen_desc", "DNU PB Highway Turnpike Optima Kiosk_id", "DNU PB Cooler 3110_id", "DNU PB Dairy_desc", "DNU PB Highway Turnpike Optima Kiosk_desc", "DNU PB Disp Bev_desc", "DNU PB Aloha Kitchen_id", "DNU PB Dairy_id", "PB Grand Opening_desc", "DNU PB Cooler 3110_desc", "T_Site_id", "T_Site_desc", "PB Beer NEW_id", "PB Beer NEW_desc", "FIN Rest Region_id", "FIN Rest Region_desc", "PB PROMO DAY 0 DAY 1_id", "PB PROMO DAY 0 DAY 1_desc", "PB Fees_id", "PB Fees_desc"
FROM "organization") "RHS"
ON ("LHS"."Organization_Key" = "RHS"."Organization_Key")
) "q01"
WHERE (("Product_Key" = 32976.0) AND ("Date" >= TRY_CAST('2021-06-20' AS DATE)) AND ("Date" <= TRY_CAST('2021-06-25' AS DATE)))

Upvotes: 3

Views: 1251

Answers (2)

Simon.S.A.
Simon.S.A.

Reputation: 6931

Doing this directly with dbplyr requires that the database counts distinct values over multiple columns. I have not seen this feature in any SQL database I have worked with (SELECT COUNT(DISTINCT col1, col2) AS num_dist FROM table gives errors).

The work-around I would suggest is concatenating your columns together into a single column, something like this:

remote_table %>%
  mutate(conc = paste0(Date, "_", units)) %>%
  group_by(Location_ID) %>%
  summarise(num = n_distinct(conc))

edit, full test code with show_query

library(dplyr)
library(dbplyr)


df <- structure(list(Location_ID = c(5L, 5L, 5L, 5L, 5L, 5L, 38L, 9003L,9003L, 9003L), Date = structure(c(1624147200, 1624233600, 1624320000,1624406400, 1624492800, 1624579200, 1624147200, 1624406400, 1624492800,1624579200), tzone = "UTC", class = c("POSIXct", "POSIXt")),units = c(11, 4, 10, 13, 17, 13, 7, 13, 14, 10)), row.names = c(NA,-10L), groups = structure(list(Location_ID = c(5L, 38L, 9003L), .rows = structure(list(1:6, 7L, 8:10), ptype = integer(0), class = c("vctrs_list_of","vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df","tbl", "data.frame"), .drop = TRUE), class = c("grouped_df","tbl_df", "tbl", "data.frame")) 
remote_table = tbl_lazy(df, con = simulate_mssql())

answer = remote_table %>%
  mutate(conc = paste0(Date, "_", units)) %>%
  group_by(Location_ID) %>%
  summarise(num = n_distinct(conc))

show_query(answer)

Gives SQL:

SELECT `Location_ID`, COUNT(DISTINCT `conc`) AS `num`
FROM (SELECT `Location_ID`, `Date`, `units`, `Date` + '_' + `units` AS `conc`
FROM `df`) `q01`
GROUP BY `Location_ID`

Upvotes: 3

Daniel
Daniel

Reputation: 1134

n_distinct() supports multiple columns.

foo %>%
    dplyr::group_by(
        Location_ID
    ) %>% 
    dplyr::summarise(
        count = dplyr::n_distinct(Date, units, na.rm = TRUE)
    )

The example data that you provide generates the following df

> foo
# A tibble: 10 x 3
# Groups:   Location_ID [3]
   Location_ID Date                units
         <int> <dttm>              <dbl>
 1           5 2021-06-20 00:00:00    11
 2           5 2021-06-21 00:00:00     4
 3           5 2021-06-22 00:00:00    10
 4           5 2021-06-23 00:00:00    13
 5           5 2021-06-24 00:00:00    17
 6           5 2021-06-25 00:00:00    13
 7          38 2021-06-20 00:00:00     7
 8        9003 2021-06-23 00:00:00    13
 9        9003 2021-06-24 00:00:00    14
10        9003 2021-06-25 00:00:00    10

The code I provide generates the following counts

# A tibble: 3 x 2
  Location_ID count
        <int> <int>
1           5     6
2          38     1
3        9003     3

Upvotes: 0

Related Questions