art with code

2008-12-05

Parsing du output with C#

Tossing this out for you to shake your head at. And du doesn't give file count info easily, so I'm not parsing that yet. It's frickin fast though. And integrates kinda badly with my current way of doing traversal updates. How about I just rewrite everything in OCaml and cry?

static void TraverseDir (string dirname)
{
if (TraversalCancelled) return;
FSEntry d = Get (dirname);
if (!StartTraversal (d)) return;
ProcessStartInfo psi = new ProcessStartInfo ();
psi.FileName = "du";
psi.Arguments = "-0 -P -b --apparent-size "+Helpers.EscapePath(dirname);
psi.UseShellExecute = false;
psi.RedirectStandardOutput = true;
Process p = Process.Start (psi);
using (BinaryReader b = new BinaryReader(p.StandardOutput.BaseStream)) {
while (true) {
string l = ReadNullTerminatedLine(b);
if (l.Length == 0) break;
ApplyDuString (l);
if (TraversalCancelled) {
p.Kill ();
return;
}
}
}
p.WaitForExit ();
}

static string ReadNullTerminatedLine(BinaryReader s)
{
byte[] buf = new byte[4096];
int i=0;
byte j;
try {
while ((j=s.ReadByte()) > 0) {
if (i > buf.Length) Array.Resize(ref buf, buf.Length*2);
buf[i] = j;
++i;
}
} catch (Exception) {}
Array.Resize(ref buf, i);
return new String(Array.ConvertAll(buf, Convert.ToChar));
}

static void ApplyDuString (string l)
{
char[] tab = {'\t'};
string[] size_date_path = l.Split(tab, 2);
Int64 size = Int64.Parse(size_date_path[0]);
string path = size_date_path[1];
lock (TraversalCache) {
TraversalCache[path] = new TraversalInfo(size, 0, DateTime.Now);
}
lock (Cache) {
if (Cache.ContainsKey(path))
SetCountAndSize(path, 0, size);
}
}

For the heck of it, here's an OCaml version of the input parsing part:

open Prelude

(* Hrm, I guess I do need something like gets *)
let input_nt_line ic =
let rec aux ic buf =
match optEOF input_char ic with
| Some '\000' -> buf
| Some b -> Buffer.add_char buf b; aux ic buf
| None -> buf in
let buf = aux ic (Buffer.create 256) in
if Buffer.length buf > 0
then Buffer.contents buf
else raise End_of_file


let traversal_cache = HHash.create 1000

let traverse_dir path =
let apply_du_line line =
let [size; path] = nsplit "\t" 2 line in
HHash.add traversal_cache path (parseInt size, 0, timeNow ()) in
withCmdStdout ["du"; "-0Pb"; "--apparent-size"; path]
(tokenizeIter input_nt_line apply_du_line)

let () =
traverse_dir ".";
puts (showInt @@ HHash.length traversal_cache)

And a Ruby version:

my_path = "."
traversal_cache = {}
IO.popen("du -0Pb --apparent-size '#{my_path.gsub("'", "\\\\'")}'", "r"){|f|
until f.eof?
sz, path = f.gets("\0").chomp("\0").split("\t",2)
traversal_cache[path] = [sz.to_i, 0, Time.now]
end
}
puts traversal_cache.size

No comments:

Blog Archive