lz4 source code

1 module lz4;
2 /**
3  * CTFEable LZ4 decompressor
4  * Copyright © 2016 Stefan Koch
5  * All rights reserved
6  */
7 enum Endianess
8 {
9 	BigEndian,
10 	LittleEndian
11 }
12 /// JUST TO DEMONSTRATE THAT IT IS UNLIKELY :)
13 auto unlikely(T)(T expressionValue)
14 {
15 	return expressionValue;
16 }
17 
18 T fromBytes(T, Endianess endianess = Endianess.LittleEndian)(const ubyte[] _data)
19 {
20 	static assert(is(T : long)); // poor man's isIntegral
21 	T result;
22 	
23 	foreach (i; 0 .. T.sizeof)
24 	{
25 		static if (endianess == Endianess.LittleEndian)
26 		{
27 			result |= (_data[i] << i * 8);
28 		}
29 		else
30 		{
31 			result |= (_data[i] << (T.sizeof - 1 - i) * 8);
32 		}
33 	}
34 	return result;
35 }
36 
37 struct LZ4Header
38 {
39 	//TODO: finish this! ("parsing" LZ4 Frame format header)
40 	int end = 7;
41 	ubyte flags;
42 
43 	bool hasBlockIndependence;
44 	bool hasBlockChecksum;
45 	bool hasContentChecksum;
46 
47 	ulong contentSize;
48 
49 	this(const ubyte[] data) pure
50 	{
51 		assert(((data[0] >> 6) & 0b11) == 0b01, "Format can not be read");
52 
53 		hasBlockIndependence = ((data[0] >> 5) & 0b1);
54 		hasBlockChecksum = ((data[0] >> 4) & 0b1);
55 
56 		bool hasContentSize = ((data[0] >> 3) & 0b1);
57 
58 		hasContentChecksum = ((data[0] >> 2) & 0b1);
59 
60 		if (hasContentSize)
61 		{
62 			contentSize = fromBytes!ulong(data[2 .. 2 + ulong.sizeof]);
63 			assert(contentSize);
64 			end = 11;
65 		}
66 	}
67 }
68 
69 ubyte[] decodeLZ4File(const ubyte[] data) pure in {
70 	assert(data.length > 11, "Any valid LZ4 File has ti be longer then 11 bytes");
71 } body {
72 	assert(data[0 .. 4] == [0x04, 0x22, 0x4d, 0x18], "not a valid LZ4 file");
73 	auto lz4Header = LZ4Header(data[5 .. $]);
74 	uint length = fromBytes!uint(data[lz4Header.end .. lz4Header.end + uint.sizeof]);
75 
76 	return decodeLZ4Block(data[lz4Header.end + uint.sizeof .. $], length);
77 }
78 
79 ubyte[] decodeLZ4Block(const ubyte[] input, uint blockLength) pure in {
80 	assert(input.length > 5, "empty or too short input passed to decodeLZ4Block");
81 } body {
82 	uint coffset;
83 	ubyte[] output;
84 	
85 	while (true)
86 	{
87 		auto bitfield = input[coffset++];
88 		auto highBits = (bitfield >> 4);
89 		auto lowBits = bitfield & 0xF;
90 
91 		if (highBits)
92 		{
93 			uint literalsLength = 0xF;
94 
95 			if (highBits != 0xF)
96 			{
97 				literalsLength = highBits;
98 			}
99 			else
100 			{
101 				while (input[coffset++] == 0xFF)
102 				{
103 					literalsLength += 0xFF;
104 				}
105 				literalsLength += input[coffset - 1];
106 			}
107 
108 			output ~= input[coffset .. coffset + literalsLength];
109 			coffset += literalsLength;
110 		}
111 
112 		if (coffset >= blockLength)
113 			return output;
114 
115 		uint matchLength = 0xF + 4;
116 		ushort offset = (input[coffset++] | (input[coffset++] << 8));
117 
118 		if (lowBits != 0xF)
119 		{
120 			matchLength = lowBits + 4;
121 		}
122 		else
123 		{
124 			while (input[coffset++] == 0xFF)
125 			{
126 				matchLength += 0xFF;
127 			}
128 			matchLength += input[coffset - 1];
129 		}
130 
131 		if (unlikely(offset < matchLength))
132 		{
133 			uint startMatch = cast(uint) output.length - offset;
134 
135 			// this works for now. Maybe it's even more complicated...
136 			// e.g. lz4 widens the offset as the match gets longer
137 			// but the docs seem to suggest that the following code is indeed correct
138 
139 			while (unlikely(offset < matchLength))
140 			{ // TODO: IS IT REALLY _unlikely_ or could be _likely_ ?
141 				output ~= output[startMatch .. startMatch + offset];
142 				matchLength -= offset;
143 			}
144 
145 			output ~= output[startMatch .. startMatch + matchLength];
146 		}
147 		else
148 		{
149 			output ~= output[$ - offset .. ($ - offset) + matchLength];
150 		}
151 	}
152 }