1 module lz4; 2 /** 3 * CTFEable LZ4 decompressor 4 * Copyright © 2016 Stefan Koch 5 * All rights reserved 6 */ 7 enum Endianess 8 { 9 BigEndian, 10 LittleEndian 11 } 12 /// JUST TO DEMONSTRATE THAT IT IS UNLIKELY :) 13 auto unlikely(T)(T expressionValue) 14 { 15 return expressionValue; 16 } 17 18 T fromBytes(T, Endianess endianess = Endianess.LittleEndian)(const ubyte[] _data) 19 { 20 static assert(is(T : long)); // poor man's isIntegral 21 T result; 22 23 foreach (i; 0 .. T.sizeof) 24 { 25 static if (endianess == Endianess.LittleEndian) 26 { 27 result |= (_data[i] << i * 8); 28 } 29 else 30 { 31 result |= (_data[i] << (T.sizeof - 1 - i) * 8); 32 } 33 } 34 return result; 35 } 36 37 struct LZ4Header 38 { 39 //TODO: finish this! ("parsing" LZ4 Frame format header) 40 int end = 7; 41 ubyte flags; 42 43 bool hasBlockIndependence; 44 bool hasBlockChecksum; 45 bool hasContentChecksum; 46 47 ulong contentSize; 48 49 this(const ubyte[] data) pure 50 { 51 assert(((data[0] >> 6) & 0b11) == 0b01, "Format can not be read"); 52 53 hasBlockIndependence = ((data[0] >> 5) & 0b1); 54 hasBlockChecksum = ((data[0] >> 4) & 0b1); 55 56 bool hasContentSize = ((data[0] >> 3) & 0b1); 57 58 hasContentChecksum = ((data[0] >> 2) & 0b1); 59 60 if (hasContentSize) 61 { 62 contentSize = fromBytes!ulong(data[2 .. 2 + ulong.sizeof]); 63 assert(contentSize); 64 end = end + cast(uint)ulong.sizeof; 65 } 66 } 67 } 68 69 ubyte[] decodeLZ4File(const ubyte[] data) pure in { 70 assert(data.length > 11, "Any valid LZ4 File has ti be longer then 11 bytes"); 71 } body { 72 ubyte[] result; 73 assert(data[0 .. 4] == [0x04, 0x22, 0x4d, 0x18], "not a valid LZ4 file"); 74 auto lz4Header = LZ4Header(data[5 .. $]); 75 size_t decodedBytes = lz4Header.end; 76 77 78 while(true) { 79 uint length = fromBytes!uint(data[decodedBytes .. decodedBytes + uint.sizeof]); 80 if (length == 0) { 81 return result; 82 } 83 result ~= decodeLZ4Block(data[decodedBytes + uint.sizeof .. $], length); 84 decodedBytes += length + uint.sizeof; 85 } 86 assert(0); // "We can never get here" 87 } 88 89 ubyte[] decodeLZ4Block(const ubyte[] input, uint blockLength) pure in { 90 assert(input.length > 5, "empty or too short input passed to decodeLZ4Block"); 91 } body { 92 uint coffset; 93 ubyte[] output; 94 95 while (true) 96 { 97 auto bitfield = input[coffset++]; 98 auto highBits = (bitfield >> 4); 99 auto lowBits = bitfield & 0xF; 100 101 if (highBits) 102 { 103 uint literalsLength = 0xF; 104 105 if (highBits != 0xF) 106 { 107 literalsLength = highBits; 108 } 109 else 110 { 111 while (input[coffset++] == 0xFF) 112 { 113 literalsLength += 0xFF; 114 } 115 literalsLength += input[coffset - 1]; 116 } 117 118 output ~= input[coffset .. coffset + literalsLength]; 119 coffset += literalsLength; 120 } 121 122 if (coffset >= blockLength) 123 return output; 124 125 uint matchLength = 0xF + 4; 126 ushort offset = (input[coffset++] | (input[coffset++] << 8)); 127 128 if (lowBits != 0xF) 129 { 130 matchLength = lowBits + 4; 131 } 132 else 133 { 134 while (input[coffset++] == 0xFF) 135 { 136 matchLength += 0xFF; 137 } 138 matchLength += input[coffset - 1]; 139 } 140 141 if (unlikely(offset < matchLength)) 142 { 143 uint startMatch = cast(uint) output.length - offset; 144 145 // this works for now. Maybe it's even more complicated... 146 // e.g. lz4 widens the offset as the match gets longer 147 // but the docs seem to suggest that the following code is indeed correct 148 149 while (unlikely(offset < matchLength)) 150 { // TODO: IS IT REALLY _unlikely_ or could be _likely_ ? 151 output ~= output[startMatch .. startMatch + offset]; 152 matchLength -= offset; 153 } 154 155 output ~= output[startMatch .. startMatch + matchLength]; 156 } 157 else 158 { 159 output ~= output[$ - offset .. ($ - offset) + matchLength]; 160 } 161 } 162 }