lz4 source code

1 module lz4;
2 /**
3  * CTFEable LZ4 decompressor
4  * Copyright © 2016 Stefan Koch
5  * All rights reserved
6  */
7 enum Endianess
8 {
9 	BigEndian,
10 	LittleEndian
11 }
12 /// JUST TO DEMONSTRATE THAT IT IS UNLIKELY :)
13 auto unlikely(T)(T expressionValue)
14 {
15 	return expressionValue;
16 }
17 
18 T fromBytes(T, Endianess endianess = Endianess.LittleEndian)(const ubyte[] _data)
19 {
20 	static assert(is(T : long)); // poor man's isIntegral
21 	T result;
22 
23 	foreach (i; 0 .. T.sizeof)
24 	{
25 		static if (endianess == Endianess.LittleEndian)
26 		{
27 			result |= (_data[i] << i * 8);
28 		}
29 		else
30 		{
31 			result |= (_data[i] << (T.sizeof - 1 - i) * 8);
32 		}
33 	}
34 	return result;
35 }
36 
37 struct LZ4Header
38 {
39 	//TODO: finish this! ("parsing" LZ4 Frame format header)
40 	int end = 7;
41 	ubyte flags;
42 
43 	bool hasBlockIndependence;
44 	bool hasBlockChecksum;
45 	bool hasContentChecksum;
46 
47 	ulong contentSize;
48 
49 	this(const ubyte[] data) pure
50 	{
51 		assert(((data[0] >> 6) & 0b11) == 0b01, "Format can not be read");
52 
53 		hasBlockIndependence = ((data[0] >> 5) & 0b1);
54 		hasBlockChecksum = ((data[0] >> 4) & 0b1);
55 
56 		bool hasContentSize = ((data[0] >> 3) & 0b1);
57 
58 		hasContentChecksum = ((data[0] >> 2) & 0b1);
59 
60 		if (hasContentSize)
61 		{
62 			contentSize = fromBytes!ulong(data[2 .. 2 + ulong.sizeof]);
63 			assert(contentSize);
64 			end = end + cast(uint)ulong.sizeof;
65 		}
66 	}
67 }
68 
69 ubyte[] decodeLZ4File(const ubyte[] data) pure in {
70 	assert(data.length > 11, "Any valid LZ4 File has ti be longer then 11 bytes");
71 } body {
72 	ubyte[] result;
73 	assert(data[0 .. 4] == [0x04, 0x22, 0x4d, 0x18], "not a valid LZ4 file");
74 	auto lz4Header = LZ4Header(data[5 .. $]);
75 	size_t decodedBytes = lz4Header.end;
76 
77 
78 	while(true) {
79 		uint length = fromBytes!uint(data[decodedBytes .. decodedBytes + uint.sizeof]);
80 		if (length == 0) { 
81 			return result;
82 		}
83 		result ~= decodeLZ4Block(data[decodedBytes + uint.sizeof ..  $], length);
84 		decodedBytes += length + uint.sizeof;
85 	}
86 	assert(0); // "We can never get here"
87 }
88 
89 ubyte[] decodeLZ4Block(const ubyte[] input, uint blockLength) pure in {
90 	assert(input.length > 5, "empty or too short input passed to decodeLZ4Block");
91 } body {
92 	uint coffset;
93 	ubyte[] output;
94 	
95 	while (true)
96 	{
97 		auto bitfield = input[coffset++];
98 		auto highBits = (bitfield >> 4);
99 		auto lowBits = bitfield & 0xF;
100 
101 		if (highBits)
102 		{
103 			uint literalsLength = 0xF;
104 
105 			if (highBits != 0xF)
106 			{
107 				literalsLength = highBits;
108 			}
109 			else
110 			{
111 				while (input[coffset++] == 0xFF)
112 				{
113 					literalsLength += 0xFF;
114 				}
115 				literalsLength += input[coffset - 1];
116 			}
117 
118 			output ~= input[coffset .. coffset + literalsLength];
119 			coffset += literalsLength;
120 		}
121 
122 		if (coffset >= blockLength)
123 			return output;
124 
125 		uint matchLength = 0xF + 4;
126 		ushort offset = (input[coffset++] | (input[coffset++] << 8));
127 
128 		if (lowBits != 0xF)
129 		{
130 			matchLength = lowBits + 4;
131 		}
132 		else
133 		{
134 			while (input[coffset++] == 0xFF)
135 			{
136 				matchLength += 0xFF;
137 			}
138 			matchLength += input[coffset - 1];
139 		}
140 
141 		if (unlikely(offset < matchLength))
142 		{
143 			uint startMatch = cast(uint) output.length - offset;
144 
145 			// this works for now. Maybe it's even more complicated...
146 			// e.g. lz4 widens the offset as the match gets longer
147 			// but the docs seem to suggest that the following code is indeed correct
148 
149 			while (unlikely(offset < matchLength))
150 			{ // TODO: IS IT REALLY _unlikely_ or could be _likely_ ?
151 				output ~= output[startMatch .. startMatch + offset];
152 				matchLength -= offset;
153 			}
154 
155 			output ~= output[startMatch .. startMatch + matchLength];
156 		}
157 		else
158 		{
159 			output ~= output[$ - offset .. ($ - offset) + matchLength];
160 		}
161 	}
162 }