Reputation: 8745
Yes, there is already the question Bulk Insert Optimization in .NET / EF Core. But this question is different.
I ran into a scenario where I have to create a DB and populate it with processed/calculated/generated data. The amount of records is between 100 millions and 200 millions of records.
Since seeding the data takes ages I can not do it on startup (of an web api). So I created a separated console application migrating the database and seeding it.
It still takes ages (I even haven't finished a single run of seeding yet).
Anything else what I can optimize in the main method (except removing console logs which are only there for debugging at the moment).
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
using Microsoft.EntityFrameworkCore.Migrations;
using System;
using System.Diagnostics;
namespace EFCoreBulkInsert
{
class EntityA
{
public int Id { get; set; }
public int ColA { get; set; }
public string ColB { get; set; }
public EntityB EntityB { get; set; }
public EntityC EntityC { get; set; }
}
class EntityB
{
public int Id { get; set; }
public string Col { get; set; }
public EntityA EntityA { get; set; }
}
class EntityC
{
public int Id { get; set; }
public int Col { get; set; }
public EntityA EntityA { get; set; }
}
class MyDbContext : DbContext
{
public DbSet EntityA { get; set; }
public DbSet EntityB { get; set; }
public DbSet EntityC { get; set; }
public MyDbContext(DbContextOptions options) : base(options) { }
protected override void OnModelCreating(ModelBuilder builder)
{
builder.Entity(BuildEntityA);
builder.Entity(BuildEntityB);
builder.Entity(BuildEntityC);
}
protected virtual void BuildEntityA(EntityTypeBuilder entity)
{
entity
.ToTable("EntityA")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
.UseIdentityColumn(1, 1)
.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore);
entity.Property(e => e.ColA)
.HasColumnName("ColA")
.IsRequired();
entity.HasIndex(e => e.ColA)
.HasName("IX_EntityA_ColA");
entity.Property(e => e.ColB)
.HasColumnName("ColB")
.IsRequired();
entity.HasIndex(e => e.ColB)
.HasName("IX_EntityA_ColB");
entity.HasOne(e => e.EntityB)
.WithOne(e => e.EntityA);
entity.HasOne(e => e.EntityC)
.WithOne(e => e.EntityA);
}
protected virtual void BuildEntityB(EntityTypeBuilder entity)
{
entity
.ToTable("EntityB")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
//.UseIdentityColumn(1, 1)
//.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
;
entity.Property(e => e.Col)
.HasColumnName("Col")
.IsRequired();
entity.HasIndex(e => e.Col)
.HasName("IX_EntityB_Col");
entity.HasOne(e => e.EntityA)
.WithOne(e => e.EntityB)
.HasForeignKey(e => e.Id)
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade);
}
protected virtual void BuildEntityC(EntityTypeBuilder entity)
{
entity
.ToTable("EntityC")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
//.UseIdentityColumn(1, 1)
//.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
;
entity.Property(e => e.Col)
.HasColumnName("Col")
.IsRequired();
entity.HasIndex(e => e.Col)
.HasName("IX_EntityC_Col");
entity.HasOne(e => e.EntityA)
.WithOne(e => e.EntityC)
.HasForeignKey(e => e.Id)
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade);
}
}
[DbContext(typeof(MyDbContext))]
partial class MyDbContextModelSnapshot : ModelSnapshot
{
protected override void BuildModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "3.1.0")
.HasAnnotation("Relational:MaxIdentifierLength", 128)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
.HasColumnName("Id")
.HasColumnType("int")
.HasAnnotation("SqlServer:IdentityIncrement", 1)
.HasAnnotation("SqlServer:IdentitySeed", 1)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property("ColA")
.HasColumnName("ColA")
.HasColumnType("int");
b.Property("ColB")
.IsRequired()
.HasColumnName("ColB")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("ColA")
.HasName("IX_EntityA_ColA");
b.HasIndex("ColB")
.HasName("IX_EntityA_ColB");
b.ToTable("EntityA");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.IsRequired()
.HasColumnName("Col")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityB_Col");
b.ToTable("EntityB");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.HasColumnName("Col")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityC_Col");
b.ToTable("EntityC");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityB")
.HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityC")
.HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
#pragma warning restore 612, 618
}
}
public partial class Initial : Migration
{
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "EntityA",
columns: table => new
{
Id = table.Column(nullable: false)
.Annotation("SqlServer:Identity", "1, 1"),
ColA = table.Column(nullable: false),
ColB = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityA", x => x.Id);
});
migrationBuilder.CreateTable(
name: "EntityB",
columns: table => new
{
Id = table.Column(nullable: false),
Col = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityB", x => x.Id);
table.ForeignKey(
name: "FK_EntityB-Id_EntityA-Id",
column: x => x.Id,
principalTable: "EntityA",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateTable(
name: "EntityC",
columns: table => new
{
Id = table.Column(nullable: false),
Col = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityC", x => x.Id);
table.ForeignKey(
name: "FK_EntityC-Id_EntityA-Id",
column: x => x.Id,
principalTable: "EntityA",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateIndex(
name: "IX_EntityA_ColA",
table: "EntityA",
column: "ColA");
migrationBuilder.CreateIndex(
name: "IX_EntityA_ColB",
table: "EntityA",
column: "ColB");
migrationBuilder.CreateIndex(
name: "IX_EntityB_Col",
table: "EntityB",
column: "Col");
migrationBuilder.CreateIndex(
name: "IX_EntityC_Col",
table: "EntityC",
column: "Col");
}
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "EntityB");
migrationBuilder.DropTable(
name: "EntityC");
migrationBuilder.DropTable(
name: "EntityA");
}
}
[DbContext(typeof(MyDbContext))]
[Migration("20200103182223_Initial")]
partial class Initial
{
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "3.1.0")
.HasAnnotation("Relational:MaxIdentifierLength", 128)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
.HasColumnName("Id")
.HasColumnType("int")
.HasAnnotation("SqlServer:IdentityIncrement", 1)
.HasAnnotation("SqlServer:IdentitySeed", 1)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property("ColA")
.HasColumnName("ColA")
.HasColumnType("int");
b.Property("ColB")
.IsRequired()
.HasColumnName("ColB")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("ColA")
.HasName("IX_EntityA_ColA");
b.HasIndex("ColB")
.HasName("IX_EntityA_ColB");
b.ToTable("EntityA");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.IsRequired()
.HasColumnName("Col")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityB_Col");
b.ToTable("EntityB");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.HasColumnName("Col")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityC_Col");
b.ToTable("EntityC");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityB")
.HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityC")
.HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
#pragma warning restore 612, 618
}
}
class Program
{
static int EntityCount = 0;
const int EntityLimit = 200_000_000;
///
/// Just a fake factory method mimicking the real procedural generation of the db records
///
/// The next enity to store or null if no more entities can be calculated.
static EntityA CalculateNextEntity()
{
if (EntityCount >= EntityLimit) return null;
EntityCount++;
return new EntityA
{
// id is generated by the db
ColA = EntityCount,
ColB = EntityCount.ToString(),
EntityB = new EntityB
{
// id is generated by the db
Col = EntityCount.ToString()
},
EntityC = new EntityC
{
// id is generated by the db
Col = EntityCount
}
};
}
static void Main(string[] args)
{
string connectionString = "Server=(localdb)\\mssqllocaldb;Database=TestDB;Trusted_Connection=True;MultipleActiveResultSets=true";
DbContextOptionsBuilder builder = new DbContextOptionsBuilder();
builder.UseSqlServer(connectionString);
using (MyDbContext dbContext = new MyDbContext(builder.Options))
{
Console.Write("migrating ... ");
dbContext.Database.Migrate();
Console.WriteLine(" done");
dbContext.ChangeTracker.AutoDetectChangesEnabled = false;
EntityA entity;
int count = 0;
int batchSize = 1000;
Stopwatch watch = new Stopwatch();
watch.Start();
while (true)
{
entity = CalculateNextEntity();
if (entity == null) break;
count++;
dbContext.Add(entity);
if (count == batchSize)
{
count = 0;
dbContext.SaveChanges();
}
Console.WriteLine($"{entity.Id} {entity.ColA} {entity.ColB} {entity.EntityB.Col} {entity.EntityC.Col}");
}
dbContext.SaveChanges();
Console.WriteLine("---");
int entityCount = dbContext.EntityA.CountAsync().Result;
Console.WriteLine($"inserted {entityCount} within {watch.ElapsedMilliseconds} milliseconds");
}
}
}
}
Note this is the full working code. Copy and paste into a console app Program.cs and it compiles and runs. Required nugets:
Upvotes: 2
Views: 6359
Reputation: 38094
In my view it is better to avoid EF to insert such amount of rows. It is better to give a try to use bulk insert using DataTable:
using (SqlConnection connection = new SqlConnection(connString))
{
SqlBulkCopy bulkCopy = new SqlBulkCopy(
connection,
SqlBulkCopyOptions.TableLock |
SqlBulkCopyOptions.FireTriggers |
SqlBulkCopyOptions.UseInternalTransaction,
null
);
bulkCopy.DestinationTableName = tableName;
connection.Open();
bulkCopy.WriteToServer(dataTable);
connection.Close();
}
Then just reuse the above code for all entities.
It has some advantages:
BuildEntityB
Upvotes: 4