Brainfart Saturday

I need to switch coffee brands…

Yesterday, for no apparent reason, I thought it may be a good idea to create a file transfer app that will asynchronously send files in slices(chunks) and still ensure the receiving party’s checksum will mach the sender’s. I was planning on adding public key security to the whole thing, but I can’t seem to get past step 1 without issues.

I tried testing splitting a file into slices and merging them immediately after and it seems to work just fine for small files.

Blob blob = FileUtils.GetBlob("C:\\Users\\Portable\\Downloads\\smalldoc.pdf");

FileUtils.SplitSlices(ref blob);

// Change the filename
blob.Path = "C:\\Users\\Portable\\Downloads\\smalldocCopy.pdf";
// Merge the slices back into one under the new filename
FileUtils.MergeSlices(ref blob);

The head-scratching starts when splitting and merging a large-ish file (50Mb+). The “Size on disk” identical to the original, but the “Size” is smaller than the original Size. Meaning it’s taking up the same disk allocation, but some bits got lost along the way. The funny thing is that if I then split the merged copy and merge it again into another copy, then this third copy is identical to the second. So original is still the odd one out.

I can’t seem to find the reason for this other than I’m missing something really obvious or this is a platform issue. I hope it’s the former because cursing at the latter feels… weird.

Here’s the “Slice” class where data would be stored and sent/received async.

public class Slice
{
	// Slice Id (Checksum / Currently not used)
	public string Id { get; set; }
	
	// File(Blob) Id (Checksum)
	public string SourceId { get; set; }

	// Blob location index
	public int Index { get; set; }

	// Slice byte length
	public int Size { get; set; }

	// Slice data
	public string Data { get; set; }

	public bool Complete { get; set; }

	public Slice()
	{
		Complete = false;
	}
}

And the “Blob” class that use the above slice(s)

public class Blob
{
	// File Id (Checksum)
	public string Id { get; set; }

	// Slice collection
	public SortedDictionary<int, Slice> Slices { get; set; }

	// Save path
	public string Path { get; set; }

	// File size
	public int Size { get; set; }

	// Assembled file size
	public int CompletedSize { get; set; }

	public Blob()
	{
		Slices = new SortedDictionary<int, Slice>();
		Size = 0;
		CompletedSize = 0;
	}
}

And of course, the uglier-than-sin FileUtils class (those with weak hearts, avert your eyes).

public static class FileUtils
{
	private static int _blockSize = 65536;

	public static void SplitSlices(ref Blob blob)
	{
		FileInfo info = new FileInfo(blob.Path);
		string source = info.FullName;
		string dir = info.DirectoryName;

		using (FileStream fs = new FileStream(source, FileMode.Open, FileAccess.Read))
		{
			foreach (KeyValuePair<int, Slice> kv in blob.Slices)
			{
				Slice slice = kv.Value;
				byte[] data = new byte[slice.Size];
				int read = 0;

				fs.Seek(slice.Index, SeekOrigin.Begin);
				if ((read = fs.Read(data, 0, slice.Size)) > 0)
				{
					WriteSlice(ref slice, data, dir);
				}
			}
		}
	}

	public static void WriteSlice(ref Slice slice, byte[] data, string dir)
	{
		string slicePath = SourceFromSlice(slice, dir);
		using (FileStream ofs =
			new FileStream(slicePath, FileMode.OpenOrCreate, FileAccess.ReadWrite))
		{
			ofs.Write(data, 0, slice.Size);
			slice.Complete = true;
		}
	}

	public static void MergeSlices(ref Blob blob)
	{
		FileInfo blobInfo = new FileInfo(blob.Path);
		string dir = blobInfo.DirectoryName;

		using (FileStream outfs =
			new FileStream(blobInfo.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite))
		{
			foreach (KeyValuePair<int, Slice> kv in blob.Slices)
			{
				Slice slice = kv.Value;
				if (slice.Complete)
				{
					byte[] bytes = ReadSlice(ref slice, dir, true);
					outfs.Seek(slice.Index, SeekOrigin.Begin);
					outfs.Write(bytes, 0, slice.Size);

					// Update the completed count
					blob.CompletedSize += slice.Size;
				}
			}
		}
	}

	public static byte[] ReadSlice(ref Slice slice, string dir, bool delAfterReading)
	{
		int read = 0;
		byte[] data = new byte[slice.Size];
		string slicePath = SourceFromSlice(slice, dir);

		using (FileStream ifs = new FileStream(slicePath, FileMode.Open, FileAccess.Read))
		{
			read = ifs.Read(data, 0, slice.Size);
		}

		if (delAfterReading)
			File.Delete(slicePath);

		return data;
	}

	public static void InitBlob(ref Blob blob)
	{
		int sliceCount = 0;
		int sliceSize;

		// Catch remaining byte length after splitting
		int remainder = (blob.Size > _blockSize)? (blob.Size % _blockSize) : 0;

		// If this is a big file that can be split...
		if (blob.Size > _blockSize)
		{
			sliceCount = blob.Size / _blockSize;
			sliceSize = blob.Size / sliceCount;
		}
		else // Slice size same as blob size and only one slice needed
		{
			sliceCount = 1;
			sliceSize = blob.Size;
		}

		for (int i = 0; i < sliceCount; i++)
		{
			Slice slice = new Slice();
			slice.SourceId = blob.Id;
			slice.Size = (i == 0) ? sliceSize + remainder : sliceSize;
			slice.Index = i * slice.Size;

			blob.Slices.Add(slice.Index, slice);
		}
	}

	public static Blob GetBlob(string source)
	{
		Blob blob = new Blob();
		FileInfo info = new FileInfo(source);

		blob.Id = FileId(source);
		blob.Size = LengthToInt(info.Length);
		blob.Path = info.FullName;
		blob.CompletedSize = LengthToInt(info.Length);

		InitBlob(ref blob);
		return blob;
	}

	public static string GetChecksum(string source, string mode = "md5", bool isFile = false)
	{
		byte[] bytes = { };
		Stream fs;

		if (isFile)
			fs = new BufferedStream(File.OpenRead(source), 120000);
		else
			fs = new MemoryStream(Encoding.UTF8.GetBytes(source));

		switch (mode.ToLower())
		{
			case "sha1":
				using (SHA1CryptoServiceProvider sha1 = new SHA1CryptoServiceProvider())
					bytes = sha1.ComputeHash(fs);
				break;

			case "sha256":
				using (SHA256CryptoServiceProvider sha256 = new SHA256CryptoServiceProvider())
					bytes = sha256.ComputeHash(fs);
				break;

			case "sha512":
				using (SHA512CryptoServiceProvider sha512 = new SHA512CryptoServiceProvider())
					bytes = sha512.ComputeHash(fs);
				break;

			case "md5":
			default:
				using (MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider())
					bytes = md5.ComputeHash(fs);
				break;
		}

		// Cleanup
		fs.Close();
		fs = null;

		return BitConverter
			.ToString(bytes)
			.Replace("-", "")
			.ToLower();
	}

	private static int LengthToInt(long length)
	{
		return (int)Math.Ceiling((double)length);
	}

	private static string FileId(string source)
	{
		return GetChecksum(new FileInfo(source).FullName, "sha256", true);
	}

	private static string SourceFromSlice(Slice slice, string dir)
	{
		return dir + "\\" + slice.SourceId + "_" + slice.Index + ".slice";
	}
}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s