Stream from the venti server instead of reading into memory.
[webvac] / lib / webvac.rb
1 %w(
2         redic
3         magic
4         json
5         time
6         cgi
7 ).each &method(:require)
8
9 # The namespace for WebVac.  See the README.
10 module WebVac
11         # Config object, intended to be used as a singleton.
12         class Config
13                 # The default config options.  See the README.
14                 Defaults = {
15                         redis_url: "redis://localhost:6379/0",
16
17                         server_path_strip: "/media",
18                         server_path_prepend: "/media/block/fse",
19
20                         venti_server: 'localhost',
21
22                         plan9bin: '/opt/plan9/bin',
23
24                         mime_substitutions: {
25                                 'text/html' => 'text/plain',
26                         },
27                 }
28                 attr_accessor *Defaults.keys
29
30                 # The sorted list of places where we will look for config files
31                 # to load.
32                 ConfigPaths = [
33                         ENV['WEBVAC_CONFIG'],
34                         "./config/webvac.json",
35                         "#{ENV['HOME']}/.webvac.json",
36                         "/etc/webvac.json",
37                 ].compact
38
39                 # Reads/parses config and instantiates an object
40                 def self.load
41                         f = ConfigPaths.find { |f| File.readable?(f) }
42                         cfg = if f
43                                 JSON.parse File.read(f)
44                         else
45                                 {}
46                         end
47                         new cfg
48                 end
49
50                 # Takes a config, replaces the defaults with it.
51                 # Will throw exceptions if you give it a bad config, you should probably
52                 # just call Config.load.
53                 def initialize cfg
54                         Defaults.each { |k,v|
55                                 send("#{k}=", v)
56                         }
57                         cfg.each { |k,v|
58                                 send("#{k}=", v)
59                         }
60                 end
61
62                 def path_fixup path
63                         @_path_rx ||= /^#{Regexp.escape(server_path_strip)}/
64                         path.sub(@_path_rx, server_path_prepend)
65                 end
66         end
67
68         # Stateless-ish client for venti.
69         # I completely punted on implementing a venti client, so it just calls
70         # the vac/unvac binaries.  Does the job!
71         class Vac
72                 attr_reader :config
73
74                 # Takes an instance of Config.
75                 def initialize cfg
76                         @config = cfg
77                 end
78
79                 def save! fn
80                         contents = File.read(fn)
81                         pi, po = IO.pipe
82                         io = IO.popen(
83                                 {'venti' => config.venti_server},
84                                 ["#{config.plan9bin}/vac", '-i', File.basename(fn)],
85                                 in: pi
86                         ).tap { |io| Thread.new { Process.wait(io.pid) } }
87                         po.write contents
88                         po.close
89                         io.read.chomp.sub(/^vac:/, '')
90                 end
91
92                 def load_io vac
93                         unless /^vac:[a-f0-9]{40}$/.match(vac)
94                                 raise ArgumentError, "#{vac.inspect} not a vac score?"
95                         end
96                         IO.popen(
97                                 {'venti' => config.venti_server},
98                                 ["#{config.plan9bin}/unvac", '-c', vac]
99                         ).tap { |io| Thread.new { Process.wait(io.pid) } }                      
100                 end
101
102                 def load! vac
103                         load_io(vac).read
104                 end
105         end
106
107         # Sits in front of Redis (just Redis right now), and handles the mapping
108         # of vac hashes to pathnames, as well as the metadata (in JSON and in the
109         # form of HTTP headers, which allows HEAD requests to be cheap).  Also does
110         # some of the bookkeeping necessary for that, like the interaction with
111         # libmagic.
112         #
113         # Relatively threadsafe, but maintains one Redis connection per active
114         # thread (created on demand).
115         class Table
116                 attr_reader :config
117
118                 # Takes an instance of Config.
119                 def initialize cfg
120                         @config = cfg
121                 end
122
123                 # Takes a filename, returns the filename's metadata.  Stateless-ish.
124                 def fn2md f
125                         s = File.stat(f)
126                         m = {
127                                 'Content-Type' => Magic.guess_file_mime_type(f),
128                                 'Content-Length' => s.size.to_s,
129                                 'Last-Modified' => s.mtime.rfc822,
130                         } rescue nil
131                 end
132
133                 def meta_save! fn, sc
134                         md = fn2md(fn)
135                         return unless md
136                         redis.call 'HSET', 'score2md', sc, md.to_json
137                 end
138
139                 def metadata score
140                         # Overall, doesn't really matter if this fails.
141                         JSON.parse(
142                                 redis.call('HGET', 'score2md', score.sub(/^vac:/, ''))
143                         ) rescue nil
144                 end
145
146                 def rec_score! fn, sc
147                         redis.call 'HSET', 'path2score', fn, sc
148                 end
149
150                 def redis
151                         Thread.current[:webvac_redis] ||= Redic.new(config.redis_url)
152                 end
153
154                 def path2score p
155                         r = redis.call 'HGET', 'path2score', p
156                         return "vac:#{r}" if r
157                 end
158
159                 def guess_mime contents
160                         Magic.guess_string_mime_type(contents)
161                 end
162         end
163 end