Reputation: 382762
This question is exactly the same as Show all rows that have certain columns duplicated but for Sequelize.
Here's a minimal "fill in the missing query reproducer" with the same data as that question:
main.js
#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
sequelize = new Sequelize('tmp', undefined, undefined, {
dialect: 'postgres',
host: '/var/run/postgresql',
})
} else {
sequelize = new Sequelize({
dialect: 'sqlite',
storage: 'tmp.sqlite',
})
}
;(async () => {
const User = sequelize.define('User', {
firstName: {
type: DataTypes.STRING,
},
lastName: {
type: DataTypes.STRING,
},
active: {
type: DataTypes.BOOLEAN,
},
}, {})
await User.sync({force: true})
await User.bulkCreate([
{ id: 1, firstName: 'test0', lastName: 'test0', active: false, },
{ id: 2, firstName: 'test0', lastName: 'test0', active: true, },
{ id: 3, firstName: 'test1', lastName: 'test1', active: true, },
{ id: 4, firstName: 'test2', lastName: 'test2', active: false, },
{ id: 5, firstName: 'test2', lastName: 'test2', active: false, },
{ id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
const rows = await User.findAll({ TODO FILL YOUR QUERY HERE })
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })
package.json
{
"name": "tmp",
"private": true,
"version": "1.0.0",
"dependencies": {
"pg": "8.5.1",
"pg-hstore": "2.3.3",
"sequelize": "6.14.0",
"sql-formatter": "4.0.2",
"sqlite3": "5.0.2"
}
}
Upvotes: 2
Views: 3551
Reputation: 382762
Because Sequelize doesn't support subqueries without literal string queries, which defeats the point of the ORM, I tried to stay away from GROUP BY COUNT(*) >= 2
solutions, and went for the JOIN solution from: Show all rows that have certain columns duplicated instead:
const rows = await User.findAll({
include: {
model: User,
as: 'duplicate',
on: {
'$User.firstName$': { [Op.col]: 'duplicate.firstName' },
'$User.lastName$': { [Op.col]: 'duplicate.lastName' },
'$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
},
required: true,
orderBy: [['id', 'ASC']],
}
})
Unfortunately that also requires setting up a dummy association, even though we are using a custom on:
, otherwise sequelize complains:
User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });
Full working example:
main.js
#!/usr/bin/env node
const assert = require('assert')
const path = require('path')
const { DataTypes, Sequelize, Op } = require('sequelize')
let sequelize
if (process.argv[2] === 'p') {
sequelize = new Sequelize('tmp', undefined, undefined, {
dialect: 'postgres',
host: '/var/run/postgresql',
})
} else {
sequelize = new Sequelize({
dialect: 'sqlite',
storage: 'tmp.sqlite',
})
}
;(async () => {
const User = sequelize.define('User', {
firstName: {
type: DataTypes.STRING,
},
lastName: {
type: DataTypes.STRING,
},
active: {
type: DataTypes.BOOLEAN,
},
}, {})
// We need this dummy association here.
User.hasMany(User, { as: 'duplicate', foreignKey: 'firstName', sourceKey: 'firstName', constraints: false });
await User.sync({force: true})
await User.bulkCreate([
{ id: 1, firstName: 'test0', lastName: 'test0', active: false, },
{ id: 2, firstName: 'test0', lastName: 'test0', active: true, },
{ id: 3, firstName: 'test1', lastName: 'test1', active: true, },
{ id: 4, firstName: 'test2', lastName: 'test2', active: false, },
{ id: 5, firstName: 'test2', lastName: 'test2', active: false, },
{ id: 6, firstName: 'test3', lastName: 'test3', active: true, },
])
// Since Sequelize can't handle subqueries, we can do the JOIN approach from:
// https://stackoverflow.com/questions/10324107/show-all-rows-that-have-certain-columns-duplicated/10324160#10324160
const rows = await User.findAll({
include: {
model: User,
as: 'duplicate',
on: {
'$User.firstName$': { [Op.col]: 'duplicate.firstName' },
'$User.lastName$': { [Op.col]: 'duplicate.lastName' },
'$User.id$': { [Op.ne]: { [Op.col]: 'duplicate.id' } },
},
required: true,
orderBy: [['id', 'ASC']],
}
})
assert.strictEqual(rows[0].id, 1)
assert.strictEqual(rows[1].id, 2)
assert.strictEqual(rows[2].id, 4)
assert.strictEqual(rows[3].id, 5)
assert.strictEqual(rows.length, 4)
})().finally(() => { return sequelize.close() })
Queries generated:
Executing (default): DROP TABLE IF EXISTS `Users`;
Executing (default): CREATE TABLE IF NOT EXISTS `Users` (`id` INTEGER PRIMARY KEY AUTOINCREMENT, `firstName` VARCHAR(255), `lastName` VARCHAR(255), `active` TINYINT(1), `createdAt` DATETIME NOT NULL, `updatedAt` DATETIME NOT NULL);
Executing (default): PRAGMA INDEX_LIST(`Users`)
Executing (default): INSERT INTO `Users` (`id`,`firstName`,`lastName`,`active`,`createdAt`,`updatedAt`) VALUES (1,'test0','test0',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(2,'test0','test0',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(3,'test1','test1',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(4,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(5,'test2','test2',0,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00'),(6,'test3','test3',1,'2022-02-23 10:51:31.665 +00:00','2022-02-23 10:51:31.665 +00:00');
Executing (default): SELECT `User`.`id`, `User`.`firstName`, `User`.`lastName`, `User`.`active`, `User`.`createdAt`, `User`.`updatedAt`, `duplicate`.`id` AS `duplicate.id`, `duplicate`.`firstName` AS `duplicate.firstName`, `duplicate`.`lastName` AS `duplicate.lastName`, `duplicate`.`active` AS `duplicate.active`, `duplicate`.`createdAt` AS `duplicate.createdAt`, `duplicate`.`updatedAt` AS `duplicate.updatedAt` FROM `Users` AS `User` INNER JOIN `Users` AS `duplicate` ON `User`.`firstName` = `duplicate`.`firstName` AND `User`.`lastName` = `duplicate`.`lastName` AND `User`.`id` != `duplicate`.`id`;
Tested on Ubuntu 21.10.
Upvotes: 1