Ten Interesting Ruby Standard Libraries


Author: Doug Beaver

Date: March 30, 2004


Simple libraries for simple tasks


1. Zlib

  • Read and write compressed files or data
  • Popular zlib format (gzip, etc)
  • Save disk space at the expense of decompressing when you want to read your data
require 'zlib'
log = ARGV.shift
unless FileTest.file? log
  STDERR.puts "usage: #{$0} <apache log>"
  exit 1
end

log_fd = nil
if log =~ /\.gz$/
  log_fd = Zlib::GzipReader.open log
else
  log_fd = File.open log
end

url_matches = {}
log_fd.each do |line|
  get_string = line.scan(/"GET ([^"]*)"/)[0]
  if get_string
    url_matches[get_string] ||= 0
    url_matches[get_string] += 1
  end
end

vals = url_matches.values.sort { |i,j| i <=> j }
printf "found %d unique URIs with a max count of %d " +
     "and a min count of %d\n",
     vals.size, vals.last, vals.first

2. Digest::MD5

  • A common way to create digital signatures/fingerprints of data
  • Hash digest is always the same size no matter what size of input
require 'digest/md5'

dictionary = ARGV.shift
unless FileTest.readable? dictionary
  STDERR.puts "usage: #{$0} <unix word dictionary>"
  exit 1
end

word_frequencies = {}
File.open(dictionary).each do |line|
  normalized = line.chomp.split(//).sort.join ''
  signature = Digest::MD5.digest normalized
  word_frequencies[signature] ||= 0
  word_frequencies[signature] += 1
end

vals = word_frequencies.values.sort
printf "found %d unique letter combos with a max count " +
       "of %d and a min count of %d\n",
       vals.size, vals.last, vals.first

3. StringIO

  • Perform file i/o operations on a string
  • Lets you use same code path for manipulating strings and files
class QuoteServer
  require 'stringio' 

  def sendResponse(responseSocket, fileID = nil)
    file = nil
    if $DEBUG
      file = StringIO.new(hardcodedServerResponse)
    else
      file = File.open(fileIDToPath(fileID), 'r')
    end

    # remove comments and send the quote back to the
    # client
    file.each do |line|
      line.gsub!(/^#.*/, '')
      responseSocket.write(line)
    end

    file.close
  end
end

4. Tempfile

  • Create a private temporary file with a guaranteed unique path
  • Temp file is cleaned up automatically for you when the object expires
class FooAppConfig
  require 'tempfile'

  def editConfig
    configText = renderConfigAsText()
    tmpfile = Tempfile.new "FooApp.config"
    system "#{ENV['EDITOR']} #{tmpfile}"
    updateConfigFromFile(tmpfile)
  end
end

5. Readline

  • Wrapper around GNU readline library
  • Input prompt that supports history and inline editing
  • Nicer way to grab input than just reading from stdin
require 'readline'

def prompt(prompt="> ")
  input = nil
  prompt += " " unless prompt =~ /\s$/
  loop do
    input = Readline.readline(prompt, true)
    break if input.length > 0
  end
  return input
end

apples = prompt("how many apples do you have?")
pears = prompt("how many pears do you have?")
nonsense = prompt("try my input history (up/down arrow)")

printf "there are %d items in our input history\n",
       Readline::HISTORY

6. Find

  • Walk a directory on disk and look for files that meet a given criteria
  • You prune entries from the tree that you're not interested in (nice metaphor)
require 'find'

dir = ARGV.shift || '.'
unless FileTest.directory? dir
  STDERR.puts "usage: #{$0} <dir>"
  exit 1
end

num_files, total_size = 0,0
Find.find(dir) do |path|
  next if FileTest.directory? path
  Find.prune unless path =~ /\.html$/
  num_files += 1
  total_size += File::stat(path).size
end

printf "found %d html files with an average size of %.1fKB\n",
       num_files, total_size.to_f/num_files.to_f/1024

7. YAML

  • YAML is a rich language for native representation of data structures
  • It also happens to be human readable and editable
  • Its dumper functionality helps with debugging a lot!
  • Creating your own YAML documents is outside the scope of this talk
require 'yaml'

complex_fella = [
  1, 2, 3, { 'foo' => [4,5,6] }, [7, [8,9]]
]

y complex_fella # dumps it to stdout similar to p()
STDERR.puts YAML.dump(complex_fella)

8. dbm

  • Simple disk-based key/value binary database, quick lookup time
  • Not to be confused with bdb modules (Berkeley DB)
  • Use this if you want to easily store a table of data to disk and you're not concerned with keeping it human-readable
require 'dbm'

db = DBM.new("/var/tmp/dict.dbm")
File.open('/usr/share/dict/words').each do |line|
  db[line.chomp] = 1
end
db.close

# and in another program...

require 'dbm'
word = ARGV.shift
db = DBM.new("/var/tmp/dict.dbm")
if db[word]
  puts "#{word} exists in the dictionary"
else
  puts "#{word} DOES NOT exist in the dictionary"
end

9. Pathname

  • Uber-class that bundles up all logic relating to file paths
  • Wraps methods from Dir, Find, IO, File, FileUtils, FileTest classes
  • Resolve symlinks, generate relative paths, safely concatenate path components
require 'pathname'

file = ARGV.shift
path = Pathname.new file
symlink = Pathname.new "#{file}.sym"
symlink.make_symlink path

puts "file and symlink are the same" if symlink == path
if symlink.realpath == path.realpath
  puts "symlink points back to #{path}!"
end

10. OptionParser

  • Add argument parsing to your application
  • Supports both short and long arguments (-h vs --help)
require 'optparse'

args = { 'num_days' => 1, 'timeout' => 2.0 }
argParser = OptionParser.new
argParser.banner = "usage: grab-host-statistics [flags]"

# boolean flag, no value required
argParser.on("-h", "--help", "Print usage message") do
  puts argParser
end

# argument is optional, note the casting of the arg
argParser.on("--time-window [DAYS]", Integer,
             "Number of days of statistics to grab") do |arg|
  args['num_days'] = arg
end
argParser.on("--timeout [TIMEOUT]", Float,
             "Timeout for requests") do |arg|
  args['timeout'] = arg
end

# argument is required
argParser.on("--hostname [HOST]",
             "Host to grab statistics from") do |arg|
  args['hostname'] = arg
end

# print usage if they called us with no args at all
if ARGV.empty?
  puts argParser
  exit
end

# empty and parse ARGV
argParser.parse!