add logic to handle Unicode beta period file names

In downloader.rb, add logic to handle file names of the form
UnicodeData-12.0.0d6.txt. To find the right file, we download
the index of the directory. Then we download the files by finding
the file names from the index. Files are always checked for changes,
because changes might be frequent during the beta period.
We also check whether any index.html files are left when we are not
in the beta period. This would indicate that we might have stale
data from the beta period rather than the actual release data.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66737 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
duerst 2019-01-06 07:50:18 +00:00
parent dd3ed41cd0
commit b991ccc210

View File

@ -70,8 +70,34 @@ class Downloader
Gems = RubyGems
class Unicode < self
def self.download(name, *rest)
super("http://www.unicode.org/Public/#{name}", name, *rest)
INDEX = {} # cache index file information across files in the same directory
UNICODE_PUBLIC = "http://www.unicode.org/Public/"
def self.download(name, dir = nil, since = true, options = {})
options = options.dup
unicode_beta = options.delete(:unicode_beta)
name_dir_part = name.sub(/[^\/]+$/, '')
if unicode_beta == 'YES'
if INDEX.size == 0
index_options = options.dup
index_options[:cache_save] = false # TODO: make sure caching really doesn't work for index file
index_file = super(UNICODE_PUBLIC+name_dir_part, "#{name_dir_part}index.html", dir, true, index_options)
INDEX[:index] = IO.read index_file
end
file_base = name.sub(/^.*\//, '').sub(/\.txt$/, '')
beta_name = INDEX[:index].match(/#{file_base}(-[0-9.]+d\d+)?\.txt/)[0]
# make sure we always check for new versions of files,
# because they can easily change in the beta period
super(UNICODE_PUBLIC+name_dir_part+beta_name, name, dir, true, options)
else
index_file = Pathname.new(under(dir, name_dir_part+'index.html'))
if index_file.exist?
raise "Although Unicode is not in beta, file #{index_file} exists. " +
"Remove all files in this directory and in .downloaded-cache/ " +
"because they may be leftovers from the beta period."
end
super(UNICODE_PUBLIC+name, name, dir, since, options)
end
end
end
@ -124,10 +150,7 @@ class Downloader
options = options.dup
url = URI(url)
dryrun = options.delete(:dryrun)
# remove from options (future use, see r66448), see L166
unicode_beta = options.delete(:unicode_beta)
puts "never" if unicode_beta == 'assigned but unused variable...'
options.delete(:unicode_beta) # just to be on the safe side for gems and gcc
if name
file = Pathname.new(under(dir, name))
@ -318,12 +341,6 @@ if $0 == __FILE__
when '--unicode-beta'
options[:unicode_beta] = ARGV[1]
ARGV.shift
# TODO: Move this code further down
if options[:unicode_beta]=='YES'
raise "Not yet able to deal with Unicode Data beta versions."
else
# TODO: deal with the case that we just switched from beta to 'regular'
end
when /\A--cache-dir=(.*)/m
options[:cache_dir] = $1
when /\A-/