-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate_dataset_captcha.rb
54 lines (45 loc) · 1.75 KB
/
generate_dataset_captcha.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
require 'parallel'
require 'rmagick'
require 'securerandom'
puts 'Generating captchas...'
# Generate the diretories
dir_path = "./captcha_dataset"
unless Dir.exist?(dir_path)
Dir.mkdir(dir_path)
Dir.mkdir("#{dir_path}/train")
Dir.mkdir("#{dir_path}/validation")
Dir.mkdir("#{dir_path}/test")
end
# Function for generate and save captchas
def save_random_captcha(text, dataset)
image = Magick::Image.new(160, 80)
image.format = "jpg"
image.gravity = Magick::CenterGravity
image.background_color = 'white'
draw = Magick::Draw.new
draw.annotate(image, image.columns, image.rows, 0, 0, text) {
self.gravity = Magick::CenterGravity
self.pointsize = 28
self.fill = 'darkblue'
self.stroke = 'transparent'
self.rotation = rand(-20..20)
}
image = image.gaussian_blur(0.0, rand)
image = image.implode((rand(1)) / 10.0)
image = image.swirl(rand(15))
image = image.radial_blur(rand(5))
image = image.add_noise(Magick::ImpulseNoise)
image = image.wave(2 + rand(1), 20 + rand(10))
image = image.crop(0,0,160,80)
File.write("./captcha_dataset/#{dataset}/#{text}.jpg", image.to_blob)
end
# Start saving the captcha_dataset
texts = []
(1..200_000).each { texts << SecureRandom.alphanumeric(6).upcase }
Parallel.each(texts.uniq, in_processes: 30, progress: "Creating train captchas") { |text| save_random_captcha(text, 'train') }
texts = []
(1..5_000).each { texts << SecureRandom.alphanumeric(6).upcase }
Parallel.each(texts.uniq, in_processes: 30, progress: "Creating validation captchas") { |text| save_random_captcha(text, 'validation') }
texts = []
(1..5_000).each { texts << SecureRandom.alphanumeric(6).upcase }
Parallel.each(texts.uniq, in_processes: 30, progress: "Creating test captchas") { |text| save_random_captcha(text, 'test') }