I need to switch coffee brands…
Yesterday, for no apparent reason, I thought it may be a good idea to create a file transfer app that will asynchronously send files in slices(chunks) and still ensure the receiving party’s checksum will mach the sender’s. I was planning on adding public key security to the whole thing, but I can’t seem to get past step 1 without issues.
I tried testing splitting a file into slices and merging them immediately after and it seems to work just fine for small files.
Blob blob = FileUtils.GetBlob("C:\\Users\\Portable\\Downloads\\smalldoc.pdf"); FileUtils.SplitSlices(ref blob); // Change the filename blob.Path = "C:\\Users\\Portable\\Downloads\\smalldocCopy.pdf"; // Merge the slices back into one under the new filename FileUtils.MergeSlices(ref blob);
The head-scratching starts when splitting and merging a large-ish file (50Mb+). The “Size on disk” identical to the original, but the “Size” is smaller than the original Size. Meaning it’s taking up the same disk allocation, but some bits got lost along the way. The funny thing is that if I then split the merged copy and merge it again into another copy, then this third copy is identical to the second. So original is still the odd one out.
I can’t seem to find the reason for this other than I’m missing something really obvious or this is a platform issue. I hope it’s the former because cursing at the latter feels… weird.
Here’s the “Slice” class where data would be stored and sent/received async.
public class Slice { // Slice Id (Checksum / Currently not used) public string Id { get; set; } // File(Blob) Id (Checksum) public string SourceId { get; set; } // Blob location index public int Index { get; set; } // Slice byte length public int Size { get; set; } // Slice data public string Data { get; set; } public bool Complete { get; set; } public Slice() { Complete = false; } }
And the “Blob” class that use the above slice(s)
public class Blob { // File Id (Checksum) public string Id { get; set; } // Slice collection public SortedDictionary<int, Slice> Slices { get; set; } // Save path public string Path { get; set; } // File size public int Size { get; set; } // Assembled file size public int CompletedSize { get; set; } public Blob() { Slices = new SortedDictionary<int, Slice>(); Size = 0; CompletedSize = 0; } }
And of course, the uglier-than-sin FileUtils class (those with weak hearts, avert your eyes).
public static class FileUtils { private static int _blockSize = 65536; public static void SplitSlices(ref Blob blob) { FileInfo info = new FileInfo(blob.Path); string source = info.FullName; string dir = info.DirectoryName; using (FileStream fs = new FileStream(source, FileMode.Open, FileAccess.Read)) { foreach (KeyValuePair<int, Slice> kv in blob.Slices) { Slice slice = kv.Value; byte[] data = new byte[slice.Size]; int read = 0; fs.Seek(slice.Index, SeekOrigin.Begin); if ((read = fs.Read(data, 0, slice.Size)) > 0) { WriteSlice(ref slice, data, dir); } } } } public static void WriteSlice(ref Slice slice, byte[] data, string dir) { string slicePath = SourceFromSlice(slice, dir); using (FileStream ofs = new FileStream(slicePath, FileMode.OpenOrCreate, FileAccess.ReadWrite)) { ofs.Write(data, 0, slice.Size); slice.Complete = true; } } public static void MergeSlices(ref Blob blob) { FileInfo blobInfo = new FileInfo(blob.Path); string dir = blobInfo.DirectoryName; using (FileStream outfs = new FileStream(blobInfo.FullName, FileMode.OpenOrCreate, FileAccess.ReadWrite)) { foreach (KeyValuePair<int, Slice> kv in blob.Slices) { Slice slice = kv.Value; if (slice.Complete) { byte[] bytes = ReadSlice(ref slice, dir, true); outfs.Seek(slice.Index, SeekOrigin.Begin); outfs.Write(bytes, 0, slice.Size); // Update the completed count blob.CompletedSize += slice.Size; } } } } public static byte[] ReadSlice(ref Slice slice, string dir, bool delAfterReading) { int read = 0; byte[] data = new byte[slice.Size]; string slicePath = SourceFromSlice(slice, dir); using (FileStream ifs = new FileStream(slicePath, FileMode.Open, FileAccess.Read)) { read = ifs.Read(data, 0, slice.Size); } if (delAfterReading) File.Delete(slicePath); return data; } public static void InitBlob(ref Blob blob) { int sliceCount = 0; int sliceSize; // Catch remaining byte length after splitting int remainder = (blob.Size > _blockSize)? (blob.Size % _blockSize) : 0; // If this is a big file that can be split... if (blob.Size > _blockSize) { sliceCount = blob.Size / _blockSize; sliceSize = blob.Size / sliceCount; } else // Slice size same as blob size and only one slice needed { sliceCount = 1; sliceSize = blob.Size; } for (int i = 0; i < sliceCount; i++) { Slice slice = new Slice(); slice.SourceId = blob.Id; slice.Size = (i == 0) ? sliceSize + remainder : sliceSize; slice.Index = i * slice.Size; blob.Slices.Add(slice.Index, slice); } } public static Blob GetBlob(string source) { Blob blob = new Blob(); FileInfo info = new FileInfo(source); blob.Id = FileId(source); blob.Size = LengthToInt(info.Length); blob.Path = info.FullName; blob.CompletedSize = LengthToInt(info.Length); InitBlob(ref blob); return blob; } public static string GetChecksum(string source, string mode = "md5", bool isFile = false) { byte[] bytes = { }; Stream fs; if (isFile) fs = new BufferedStream(File.OpenRead(source), 120000); else fs = new MemoryStream(Encoding.UTF8.GetBytes(source)); switch (mode.ToLower()) { case "sha1": using (SHA1CryptoServiceProvider sha1 = new SHA1CryptoServiceProvider()) bytes = sha1.ComputeHash(fs); break; case "sha256": using (SHA256CryptoServiceProvider sha256 = new SHA256CryptoServiceProvider()) bytes = sha256.ComputeHash(fs); break; case "sha512": using (SHA512CryptoServiceProvider sha512 = new SHA512CryptoServiceProvider()) bytes = sha512.ComputeHash(fs); break; case "md5": default: using (MD5CryptoServiceProvider md5 = new MD5CryptoServiceProvider()) bytes = md5.ComputeHash(fs); break; } // Cleanup fs.Close(); fs = null; return BitConverter .ToString(bytes) .Replace("-", "") .ToLower(); } private static int LengthToInt(long length) { return (int)Math.Ceiling((double)length); } private static string FileId(string source) { return GetChecksum(new FileInfo(source).FullName, "sha256", true); } private static string SourceFromSlice(Slice slice, string dir) { return dir + "\\" + slice.SourceId + "_" + slice.Index + ".slice"; } }