How to find all rows that have certain columns duplicated in Sequelize?

This question is exactly the same as Show all rows that have certain columns duplicated but for Sequelize.

Here's a minimal "fill in the missing query reproducer" with the same data as that question:

main.js

#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
  sequelize = new Sequelize('tmp', undefined, undefined, {
    dialect: 'postgres',
    host: '/var/run/postgresql',
  })
} else {
  sequelize = new Sequelize({
    dialect: 'sqlite',
    storage: 'tmp.sqlite',
  })
}
;(async () => {
const User = sequelize.define('User', {
  firstName: {
    type: DataTypes.STRING,
  },
  lastName: {
    type: DataTypes.STRING,
  },
  active: {
    type: DataTypes.BOOLEAN,
  },
}, {})
await User.sync({force: true})
await User.bulkCreate([
  { id: 1, firstName: 'test0', lastName: 'test0', active: false, },
  { id: 2, firstName: 'test0', lastName: 'test0', active: true, },
  { id: 3, firstName: 'test1', lastName: 'test1', active: true, },
  { id: 4, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 5, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
const rows = await User.findAll({ TODO FILL YOUR QUERY HERE })
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })

package.json

{
  "name": "tmp",
  "private": true,
  "version": "1.0.0",
  "dependencies": {
    "pg": "8.5.1",
    "pg-hstore": "2.3.3",
    "sequelize": "6.14.0",
    "sql-formatter": "4.0.2",
    "sqlite3": "5.0.2"
  }
}

Upvotes: 2

Views: 3551

Answers (1)

Because Sequelize doesn't support subqueries without literal string queries, which defeats the point of the ORM, I tried to stay away from GROUP BY COUNT(*) >= 2 solutions, and went for the JOIN solution from: Show all rows that have certain columns duplicated instead:

const rows = await User.findAll({
  include: {
    model: User,
    as: 'duplicate',
    on: {
      '$User.firstName$': { [Op.col]: 'duplicate.firstName' },
      '$User.lastName$': { [Op.col]: 'duplicate.lastName' },
      '$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
    },
    required: true,
    orderBy: [['id', 'ASC']],
  }
})

Unfortunately that also requires setting up a dummy association, even though we are using a custom on:, otherwise sequelize complains:

User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });

Full working example:

main.js

#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
  sequelize = new Sequelize('tmp', undefined, undefined, {
    dialect: 'postgres',
    host: '/var/run/postgresql',
  })
} else {
  sequelize = new Sequelize({
    dialect: 'sqlite',
    storage: 'tmp.sqlite',
  })
}
;(async () => {
const User = sequelize.define('User', {
  firstName: {
    type: DataTypes.STRING,
  },
  lastName: {
    type: DataTypes.STRING,
  },
  active: {
    type: DataTypes.BOOLEAN,
  },
}, {})
// We need this dummy association here.
User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });
await User.sync({force: true})
await User.bulkCreate([
  { id: 1, firstName: 'test0', lastName: 'test0', active: false, },
  { id: 2, firstName: 'test0', lastName: 'test0', active: true, },
  { id: 3, firstName: 'test1', lastName: 'test1', active: true, },
  { id: 4, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 5, firstName: 'test2', lastName: 'test2', active: false, },
  { id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
// Since Sequelize can't handle subqueries, we can do the JOIN approach from:
// https://stackoverflow.com/questions/10324107/show-all-rows-that-have-certain-columns-duplicated/10324160#10324160
const rows = await User.findAll({
  include: {
    model: User,
    as: 'duplicate',
    on: {
      '$User.firstName$': { [Op.col]: 'duplicate.firstName' },
      '$User.lastName$': { [Op.col]: 'duplicate.lastName' },
      '$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
    },
    required: true,
    orderBy: [['id', 'ASC']],
  }
})
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })

GitHub upstream.

Queries generated:

Executing (default): DROP TABLE IF EXISTS `Users`;
Executing (default): CREATE TABLE IF NOT EXISTS `Users` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, `firstName` VARCHAR(255), `lastName` VARCHAR(255), `active` TINYINT(1), `createdAt` DATETIME NOT NULL, `updatedAt` DATETIME NOT NULL);
Executing (default): PRAGMA INDEX_LIST(`Users`)
Executing (default): INSERT INTO `Users` (`id`,`firstName`,`lastName`,`active`,`createdAt`,`updatedAt`) VALUES (1,'test0','test0',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(2,'test0','test0',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(3,'test1','test1',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(4,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(5,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(6,'test3','test3',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00');
Executing (default): SELECT `User`.`id`, `User`.`firstName`, `User`.`lastName`, `User`.`active`, `User`.`createdAt`, `User`.`updatedAt`, `duplicate`.`id` AS `duplicate.id`, `duplicate`.`firstName` AS `duplicate.firstName`, `duplicate`.`lastName` AS `duplicate.lastName`, `duplicate`.`active` AS `duplicate.active`, `duplicate`.`createdAt` AS `duplicate.createdAt`, `duplicate`.`updatedAt` AS `duplicate.updatedAt` FROM `Users` AS `User` INNER JOIN `Users` AS `duplicate` ON `User`.`firstName` = `duplicate`.`firstName` AND `User`.`lastName` = `duplicate`.`lastName` AND `User`.`id` != `duplicate`.`id`;

Tested on Ubuntu 21.10.

Upvotes: 1

Related Questions