Skip to content

Commit

Permalink
Add Innodb::MysqlCollation for MySQL character sets/collations
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremycole committed Nov 26, 2024
1 parent bc203da commit 7ad09f9
Show file tree
Hide file tree
Showing 5 changed files with 512 additions and 0 deletions.
96 changes: 96 additions & 0 deletions bin/innodb_ruby_generate_mysql_collations
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

# To update lib/innodb/mysql_collations.rb, run this with a path to the MySQL source directory
# containing CHARSET_INFO collation definitions, e.g.:

# bundle exec bin/innodb_ruby_generate_mysql_collations ~/git/mysql-server > lib/innodb/mysql_collations.rb

MysqlCharsetInfo = Struct.new(
:number,
:primary_number,
:binary_number,
:state,
:csname,
:m_coll_name,
:comment,
:tailoring,
:coll_param,
:ctype,
:to_lower,
:to_upper,
:sort_order,
:uca,
:tab_to_uni,
:tab_from_uni,
:caseinfo,
:state_maps,
:ident_map,
:strxfrm_multiply,
:caseup_multiply,
:casedn_multiply,
:mbminlen,
:mbmaxlen,
:mbmaxlenlen,
:min_sort_char,
:max_sort_char,
:pad_char,
:escape_with_backslash_is_dangerous,
:levels_for_compare,
:cset,
:coll,
:pad_attribute
)

charset_infos = []

raise "First argument must be the path to a modern MySQL source tree" unless (ARGV.size == 1) && Dir.exist?(ARGV[0])

Dir.glob(File.join(ARGV[0], "strings/ctype-**.cc")).each do |filename|
content = File.read(filename)
warn "Parsing #{filename}..."

# Global individual constants e.g. CHARSET_INFO my_charset_utf8mb4_general_ci = { ... }
charset_info_strings = content.scan(/^CHARSET_INFO \w+ = ({.*?})/m).flatten

# Global array of constants e.g. CHARSET_INFO compiled_charsets[] = { { ... }, { ... } };
content.match(/CHARSET_INFO \w+\[\] = {\s*(?:{.*?}\s*,\s*)+/m)
&.match(0)
&.gsub(/CHARSET_INFO \w+\[\] = {/, "")
&.scan(/{.*?}/m)
&.each do |s|
charset_info_strings.push(s)
end

charset_info_strings = charset_info_strings.map do |x|
x.gsub(%r{/\*.*?\*/}, "").gsub(%r{//.*?$}, "").gsub(/\s+/, " ").gsub(/["']/, "")
end

charset_infos += charset_info_strings.map do |charset_info_string|
matches = charset_info_string.match(/{(?<definition>.*?)}/)

MysqlCharsetInfo.new(*matches[:definition].split(",").map(&:strip).map { |x| x =~ /^[0-9]+$/ ? x.to_i : x })
end
end

if charset_infos.empty?
warn "No MySQL collations found... bad path provided?"
exit 1
end

warn "Found #{charset_infos.size} collations, generating output."

puts "# frozen_string_literal: true"
puts
puts "# Generated at #{Time.now.utc} using innodb_ruby_generate_mysql_collations. Do not edit!"
puts

puts "# rubocop:disable all"
charset_infos.sort_by(&:number).each do |charset_info|
puts format("Innodb::MysqlCollation.add(id: %d, name: %s, character_set_name: %s, mbminlen: %i, mbmaxlen: %i)",
charset_info.number,
charset_info.m_coll_name.inspect,
charset_info.csname.inspect,
charset_info.mbminlen,
charset_info.mbmaxlen)
end
2 changes: 2 additions & 0 deletions lib/innodb.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def self.debug=(value)
require "innodb/version"
require "innodb/stats"
require "innodb/checksum"
require "innodb/mysql_collation"
require "innodb/mysql_collations"
require "innodb/mysql_type"
require "innodb/record_describer"
require "innodb/sys_data_dictionary"
Expand Down
62 changes: 62 additions & 0 deletions lib/innodb/mysql_collation.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# frozen_string_literal: true

module Innodb
class MysqlCollation
class DuplicateIdError < StandardError; end
class DuplicateNameError < StandardError; end

@collations = []
@collations_by_id = {}
@collations_by_name = {}

class << self
attr_reader :collations
end

def self.add(kwargs)
raise DuplicateIdError if @collations_by_id.key?(kwargs[:id])
raise DuplicateNameError if @collations_by_name.key?(kwargs[:name])

collation = new(**kwargs)
@collations.push(collation)
@collations_by_id[collation.id] = collation
@collations_by_name[collation.name] = collation
@all_fixed_ids = nil
collation
end

def self.by_id(id)
@collations_by_id[id]
end

def self.by_name(name)
@collations_by_name[name]
end

def self.all_fixed_ids
@all_fixed_ids ||= Innodb::MysqlCollation.collations.select(&:fixed?).map(&:id).sort
end

attr_reader :id
attr_reader :name
attr_reader :character_set_name
attr_reader :mbminlen
attr_reader :mbmaxlen

def initialize(id:, name:, character_set_name:, mbminlen:, mbmaxlen:)
@id = id
@name = name
@character_set_name = character_set_name
@mbminlen = mbminlen
@mbmaxlen = mbmaxlen
end

def fixed?
mbminlen == mbmaxlen
end

def variable?
!fixed?
end
end
end
Loading

0 comments on commit 7ad09f9

Please sign in to comment.