Skip to content

Commit

Permalink
#11 - Add support for bulk records creation
Browse files Browse the repository at this point in the history
  • Loading branch information
ellmetha committed Jan 31, 2024
1 parent 0da1330 commit 18bc7f9
Show file tree
Hide file tree
Showing 15 changed files with 582 additions and 44 deletions.
51 changes: 51 additions & 0 deletions docs/docs/models-and-databases/reference/query-set.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,57 @@ The value passed to `#using` must be a valid database alias that was used to con

Query sets also provide a set of methods that will usually result in specific SQL queries to be executed in order to return values that don't correspond to new query sets.

### `bulk_create`

Bulk inserts the passed model instances into the database.

This method allows to insert multiple model instances into the database in a single query. This can be useful when dealing with large amounts of data that need to be inserted into the database. For example:

```crystal
query_set = Post.all
query_set.bulk_create(
[
Post.new(title: "First post"),
Post.new(title: "Second post"),
Post.new(title: "Third post"),
]
)
```

An optional `batch_size` argument can be passed to this method in order to specify the number of records that should be inserted in a single query. By default, all records are inserted in a single query (except for SQLite databases where the limit of variables in a single query is 999). For example:

```crystal
query_set = Post.all
query_set.bulk_create(
[
Post.new(title: "First post"),
Post.new(title: "Second post"),
Post.new(title: "Third post"),
],
batch_size: 2
)
```

:::tip
The `#bulk_create` model can also be called directly on model classes:

```crystal
Post.bulk_create(
[
Post.new(title: "First post"),
Post.new(title: "Second post"),
Post.new(title: "Third post"),
]
)
```
:::

It is worth mentioning that this method has a few caveats:

* The specified records are assumed to be valid and no [callbacks](../callbacks.md) will be called on them.
* Bulk-creating records making use of multi-table inheritance is not supported.
* If the model's primary key field is auto-incremented at the database level, the newly inserted primary keys will only be assigned to records on certain databases that support retrieving bulk-inserted rows (namely PostgreSQL and SQLite).

### `count`

Returns the number of records that are targeted by the current query set.
Expand Down
7 changes: 7 additions & 0 deletions spec/marten/db/connection/mysql_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ require "./spec_helper"

for_mysql do
describe Marten::DB::Connection::MySQL do
describe "#bulk_batch_size" do
it "returns the specified records count" do
conn = Marten::DB::Connection.default
conn.bulk_batch_size(records_count: 1000, values_count: 10).should eq 1000
end
end

describe "#distinct_clause_for" do
it "returns the expected distinct clause if no column names are specified" do
conn = Marten::DB::Connection.default
Expand Down
7 changes: 7 additions & 0 deletions spec/marten/db/connection/postgresq_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ require "./spec_helper"

for_postgresql do
describe Marten::DB::Connection::PostgreSQL do
describe "#bulk_batch_size" do
it "returns the specified records count" do
conn = Marten::DB::Connection.default
conn.bulk_batch_size(records_count: 1000, values_count: 10).should eq 1000
end
end

describe "#distinct_clause_for" do
it "returns the expected distinct clause if no column names are specified" do
conn = Marten::DB::Connection.default
Expand Down
17 changes: 17 additions & 0 deletions spec/marten/db/connection/sqlite_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@ require "./spec_helper"

for_sqlite do
describe Marten::DB::Connection::PostgreSQL do
describe "#bulk_batch_size" do
it "returns 500 if the values count is 1" do
conn = Marten::DB::Connection.default
conn.bulk_batch_size(records_count: 1000, values_count: 1).should eq 500
end

it "returns the expected value if the values count is greater than 1" do
conn = Marten::DB::Connection.default
conn.bulk_batch_size(records_count: 1000, values_count: 89).should eq 999 // 89
end

it "returns the records count if no values will be inserted" do
conn = Marten::DB::Connection.default
conn.bulk_batch_size(records_count: 1000, values_count: 0).should eq 1000
end
end

describe "#distinct_clause_for" do
it "returns the expected distinct clause if no column names are specified" do
conn = Marten::DB::Connection.default
Expand Down
24 changes: 24 additions & 0 deletions spec/marten/db/model/querying_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,30 @@ describe Marten::DB::Model::Querying do
end
end

describe "::bulk_create" do
it "allows to insert an array of records without specifying a batch size" do
objects = (1..100).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Tag.bulk_create(objects)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end

it "allows to insert a small array of records while specifying a batch size" do
objects = (1..100).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Tag.bulk_create(objects, batch_size: 10)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end
end

describe "::count" do
it "returns the expected number of records when no field is specified" do
Tag.create!(name: "ruby", is_active: true)
Expand Down
153 changes: 153 additions & 0 deletions spec/marten/db/query/set_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,159 @@ describe Marten::DB::Query::Set do
end
end

describe "#bulk_create" do
it "allows to insert a small array of records without specifying a batch size" do
objects = (1..100).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Marten::DB::Query::Set(Tag).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end

it "allows to insert a large array of records without specifying a batch size" do
objects = (1..5_000).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Marten::DB::Query::Set(Tag).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end

it "allows to insert a small array of records while specifying a batch size" do
objects = (1..100).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Marten::DB::Query::Set(Tag).new.bulk_create(objects, batch_size: 10)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end

it "allows to insert a large array of records while specifying a batch size" do
objects = (1..5_000).map do |i|
Tag.new(name: "tag #{i}", is_active: true)
end

inserted_objects = Marten::DB::Query::Set(Tag).new.bulk_create(objects, batch_size: 500)

inserted_objects.size.should eq objects.size
Tag.filter(name__in: objects.map(&.name)).count.should eq objects.size
end

it "properly calls the fields' before_save logic to ensure they can set default values on records" do
objects = (1..10).map do |i|
TestUser.new(username: "jd#{i}", email: "jd#{i}@example.com", first_name: "John", last_name: "Doe")
end

inserted_objects = Marten::DB::Query::Set(TestUser).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
TestUser.filter(username__in: objects.map(&.username)).count.should eq objects.size
inserted_objects.all? { |o| !o.created_at.nil? }.should be_true
end

it "properly marks created objects as persisted" do
objects = (1..10).map do |i|
TestUser.new(username: "jd#{i}", email: "jd#{i}@example.com", first_name: "John", last_name: "Doe")
end

inserted_objects = Marten::DB::Query::Set(TestUser).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
TestUser.filter(username__in: objects.map(&.username)).count.should eq objects.size
inserted_objects.all?(&.persisted?).should be_true
end

it "inserts records with already assigned pks when no batch size is specified" do
objects = (1..100).map do |i|
Marten::DB::Query::SetSpec::TagWithUUID.new(label: "tag #{i}")
end

inserted_objects = Marten::DB::Query::Set(Marten::DB::Query::SetSpec::TagWithUUID).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
Marten::DB::Query::SetSpec::TagWithUUID.filter(label__in: objects.map(&.label)).count.should eq objects.size
inserted_objects.all?(&.persisted?).should be_true
inserted_objects.all?(&.pk?).should be_true
end

it "inserts records with already assigned pks when a batch size is specified" do
objects = (1..100).map do |i|
Marten::DB::Query::SetSpec::TagWithUUID.new(label: "tag #{i}")
end

inserted_objects = Marten::DB::Query::Set(Marten::DB::Query::SetSpec::TagWithUUID).new.bulk_create(
objects,
batch_size: 10
)

inserted_objects.size.should eq objects.size
Marten::DB::Query::SetSpec::TagWithUUID.filter(label__in: objects.map(&.label)).count.should eq objects.size
inserted_objects.all?(&.persisted?).should be_true
inserted_objects.all?(&.pk?).should be_true
end

it "inserts records that have null values" do
objects = (1..10).map do |i|
user = TestUser.create!(username: "jd#{i}", email: "jd#{i}@example.com", first_name: "John", last_name: "Doe")
TestUserProfile.new(user: user, bio: i % 2 == 0 ? "Bio #{i}" : nil)
end

inserted_objects = Marten::DB::Query::Set(TestUserProfile).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
TestUserProfile.filter(user_id__in: objects.map(&.user_id)).count.should eq objects.size
inserted_objects.all?(&.persisted?).should be_true
end

for_db_backends :postgresql, :sqlite do
it "#properly assigns the returned objects' pks when they don't have one already" do
objects = (1..10).map do |i|
TestUser.new(username: "jd#{i}", email: "jd#{i}@example.com", first_name: "John", last_name: "Doe")
end

inserted_objects = Marten::DB::Query::Set(TestUser).new.bulk_create(objects)

inserted_objects.size.should eq objects.size
TestUser.filter(username__in: objects.map(&.username)).count.should eq objects.size
inserted_objects.all?(&.pk?).should be_true
end
end

it "raises an ArgumentError if the specified batch size is less than 1" do
expect_raises(ArgumentError, "Batch size must be greater than 1") do
Marten::DB::Query::Set(Tag).new.bulk_create([] of Tag, batch_size: 0)
end

expect_raises(ArgumentError, "Batch size must be greater than 1") do
Marten::DB::Query::Set(Tag).new.bulk_create([] of Tag, batch_size: -1)
end
end

it "raises the expected exception if the targeted model inherits from concrete models" do
address = Marten::DB::Query::SetSpec::Address.create!(street: "Street 1")
student = Marten::DB::Query::SetSpec::Student.create!(
name: "Student 1",
email: "[email protected]",
address: address,
grade: "10"
)

expect_raises(
Marten::DB::Errors::UnmetQuerySetCondition,
"Bulk creation is not supported for multi table inherited model records"
) do
Marten::DB::Query::Set(Marten::DB::Query::SetSpec::Student).new.bulk_create([student])
end
end
end

describe "#count" do
it "returns the expected number of record for an unfiltered query set" do
Tag.create!(name: "ruby", is_active: true)
Expand Down
12 changes: 12 additions & 0 deletions spec/marten/db/query/set_spec/models/tag_with_uuid.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
module Marten::DB::Query::SetSpec
class TagWithUUID < Marten::Model
field :id, :uuid, primary_key: true
field :label, :string, max_size: 128

after_initialize :initialize_id

def initialize_id
@id ||= UUID.random
end
end
end
15 changes: 15 additions & 0 deletions src/marten/db/connection/base.cr
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ module Marten
@url = build_url
end

# Returns the batch size to use when inserting multiple rows in a specific table.
abstract def bulk_batch_size(records_count : Int32, values_count : Int32) : Int32

# Allows to insert multiple rows in a specific table and returns the primary key values for the inserted rows.
#
# This method allow inserting individual rows defined in `values` in the `table_name` table. When
# `pk_column_to_fetch` is specified, the primary key values for the inserted rows will be returned. Note that
# this method can return `nil` if the underlying database does not support returning primary key values for bulk
# inserts.
abstract def bulk_insert(
table_name : String,
values : Array(Hash(String, ::DB::Any)),
pk_column_to_fetch : String? = nil
) : Array(::DB::Any)?

# Returns a distinct clause to remove duplicates from a query's results.
#
# If column names are specified, only these specific columns will be checked to identify duplicates.
Expand Down
29 changes: 29 additions & 0 deletions src/marten/db/connection/mysql.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,35 @@ module Marten
module DB
module Connection
class MySQL < Base
def bulk_batch_size(records_count : Int32, values_count : Int32) : Int32
records_count
end

def bulk_insert(
table_name : String,
values : Array(Hash(String, ::DB::Any)),
pk_column_to_fetch : String? = nil
) : Array(::DB::Any)?
column_names = values[0].keys.join(", ") { |column_name| "#{quote(column_name)}" }

index = 0
numbered_values = values.map do |raw_values|
raw_values.keys.map do |_c|
index += 1
parameter_id_for_ordered_argument(index)
end.join(", ")
end

statement = "INSERT INTO #{quote(table_name)} (#{column_names}) " \
"VALUES #{numbered_values.map { |v| "(#{v})" }.join(", ")}"

open do |db|
db.exec(statement, args: values.flat_map(&.values))
end

nil
end

def distinct_clause_for(columns : Array(String)) : String
return DISTINCT_CLAUSE if columns.empty?
raise NotImplementedError.new("DISTINCT ON columns is not supported by this connection implementation")
Expand Down
Loading

0 comments on commit 18bc7f9

Please sign in to comment.