Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Michael A. (Mike) Iovine
p1
Commits
7067a1c8
Commit
7067a1c8
authored
5 years ago
by
Mike Iovine
Browse files
Options
Download
Email Patches
Plain Diff
Implement better huffman code representation in inflate
parent
4bdde247
master
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
inflate.c
+170
-136
inflate.c
with
170 additions
and
136 deletions
+170
-136
inflate.c
View file @
7067a1c8
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
/* Types of huffman codes */
static
const
int
FIXED
=
1
;
static
const
int
DYNAMIC
=
2
;
#define FIXED 1
#define DYNAMIC 2
/* Maximum length for any Huffman code */
#define MAX_LENGTH 15
/* Dummy value for min_codes in huffman_t structs */
#define NO_CODE -1
typedef
struct
huffman
{
/* bl_counts[i] = number of codes of length i */
int
bl_counts
[
MAX_LENGTH
+
1
];
/* This stores the alphabet.
* alphabet[i] returns an array of alphabet symbols of length bl_counts[i].
*/
int
*
alphabet
[
MAX_LENGTH
+
1
];
/* Suppose we are reading a code and we want an index into the alphabet array.
* These are basically the numerical offsets for such an index.
* So if the code we are reading has value c and is of length i,
* its alphabet character is indexed by (c - min_codes[i]).
*/
int
min_codes
[
MAX_LENGTH
+
1
];
}
huffman_t
;
/*
* Constants for FIXED code type
*/
/* Alphabet for codes of length 7, 8, and 9, in order
* Not really intended for use; use HUFFMAN_FIXED instead.
*/
int
_FIXED_7
[
24
]
=
{
256
,
257
,
258
,
259
,
260
,
261
,
262
,
263
,
264
,
265
,
266
,
267
,
268
,
269
,
270
,
271
,
272
,
273
,
274
,
275
,
276
,
277
,
278
,
279
};
int
_FIXED_8
[
152
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
65
,
66
,
67
,
68
,
69
,
70
,
71
,
72
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
83
,
84
,
85
,
86
,
87
,
88
,
89
,
90
,
91
,
92
,
93
,
94
,
95
,
96
,
97
,
98
,
99
,
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
,
110
,
111
,
112
,
113
,
114
,
115
,
116
,
117
,
118
,
119
,
120
,
121
,
122
,
123
,
124
,
125
,
126
,
127
,
128
,
129
,
130
,
131
,
132
,
133
,
134
,
135
,
136
,
137
,
138
,
139
,
140
,
141
,
142
,
143
,
280
,
281
,
282
,
283
,
284
,
285
,
286
,
287
};
int
_FIXED_9
[
112
]
=
{
144
,
145
,
146
,
147
,
148
,
149
,
150
,
151
,
152
,
153
,
154
,
155
,
156
,
157
,
158
,
159
,
160
,
161
,
162
,
163
,
164
,
165
,
166
,
167
,
168
,
169
,
170
,
171
,
172
,
173
,
174
,
175
,
176
,
177
,
178
,
179
,
180
,
181
,
182
,
183
,
184
,
185
,
186
,
187
,
188
,
189
,
190
,
191
,
192
,
193
,
194
,
195
,
196
,
197
,
198
,
199
,
200
,
201
,
202
,
203
,
204
,
205
,
206
,
207
,
208
,
209
,
210
,
211
,
212
,
213
,
214
,
215
,
216
,
217
,
218
,
219
,
220
,
221
,
222
,
223
,
224
,
225
,
226
,
227
,
228
,
229
,
230
,
231
,
232
,
233
,
234
,
235
,
236
,
237
,
238
,
239
,
240
,
241
,
242
,
243
,
244
,
245
,
246
,
247
,
248
,
249
,
250
,
251
,
252
,
253
,
254
,
255
};
/* Fixed Huffman codes mapping. */
/* Note that we only have codes of length 7, 8, 9 */
huffman_t
HUFFMAN_FIXED
=
{
.
bl_counts
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
24
,
152
,
112
,
0
,
0
,
0
,
0
,
0
},
static
const
int
DECODE_7_MIN
=
0
;
static
const
int
DECODE_7_MAX
=
23
;
static
const
int
DECODE_7
[]
=
{
256
,
257
,
258
,
259
,
260
,
261
,
262
,
263
,
264
,
265
,
266
,
267
,
268
,
269
,
270
,
271
,
272
,
273
,
274
,
275
,
276
,
277
,
278
,
279
};
.
alphabet
=
{
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
_FIXED_7
,
_FIXED_8
,
_FIXED_9
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
},
static
const
int
DECODE_8_MIN
=
48
;
static
const
int
DECODE_8_MAX
=
191
;
static
const
int
DECODE_8
[]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
65
,
66
,
67
,
68
,
69
,
70
,
71
,
72
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
83
,
84
,
85
,
86
,
87
,
88
,
89
,
90
,
91
,
92
,
93
,
94
,
95
,
96
,
97
,
98
,
99
,
100
,
101
,
102
,
103
,
104
,
105
,
106
,
107
,
108
,
109
,
110
,
111
,
112
,
113
,
114
,
115
,
116
,
117
,
118
,
119
,
120
,
121
,
122
,
123
,
124
,
125
,
126
,
127
,
128
,
129
,
130
,
131
,
132
,
133
,
134
,
135
,
136
,
137
,
138
,
139
,
140
,
141
,
142
,
143
,
280
,
281
,
282
,
283
,
284
,
285
,
286
,
287
};
static
const
int
DECODE_9_MIN
=
400
;
static
const
int
DECODE_9_MAX
=
511
;
static
const
int
DECODE_9
[]
=
{
144
,
145
,
146
,
147
,
148
,
149
,
150
,
151
,
152
,
153
,
154
,
155
,
156
,
157
,
158
,
159
,
160
,
161
,
162
,
163
,
164
,
165
,
166
,
167
,
168
,
169
,
170
,
171
,
172
,
173
,
174
,
175
,
176
,
177
,
178
,
179
,
180
,
181
,
182
,
183
,
184
,
185
,
186
,
187
,
188
,
189
,
190
,
191
,
192
,
193
,
194
,
195
,
196
,
197
,
198
,
199
,
200
,
201
,
202
,
203
,
204
,
205
,
206
,
207
,
208
,
209
,
210
,
211
,
212
,
213
,
214
,
215
,
216
,
217
,
218
,
219
,
220
,
221
,
222
,
223
,
224
,
225
,
226
,
227
,
228
,
229
,
230
,
231
,
232
,
233
,
234
,
235
,
236
,
237
,
238
,
239
,
240
,
241
,
242
,
243
,
244
,
245
,
246
,
247
,
248
,
249
,
250
,
251
,
252
,
253
,
254
,
255
.
min_codes
=
{
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
0
,
48
,
192
,
400
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
,
NO_CODE
}
};
static
const
int
LENGTH_OFFSET
=
257
;
static
const
int
DECODE_LENGTH
[]
=
{
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
13
,
15
,
17
,
19
,
23
,
27
,
31
,
35
,
43
,
51
,
59
,
67
,
83
,
99
,
115
,
131
,
163
,
195
,
227
,
258
/* Conversion tables for lengths
* To index into lengths, use (value_read - LENGTH_OFFSET)
* We may have to read additional bits; check LEN_ADDITIONAL for how many
*/
#define LENGTH_OFFSET 257
int
LEN_TABLE
[
29
]
=
{
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
13
,
15
,
17
,
19
,
23
,
27
,
31
,
35
,
43
,
51
,
59
,
67
,
83
,
99
,
115
,
131
,
163
,
195
,
227
,
258
};
static
const
int
ADD_LENGTHS
[]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
5
,
5
,
5
,
5
,
0
int
LEN_ADDITIONAL
[
29
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
5
,
5
,
5
,
5
,
0
};
static
const
int
DECODE_DIST
[]
=
{
1
,
2
,
3
,
4
,
5
,
7
,
9
,
13
,
17
,
25
,
33
,
49
,
65
,
97
,
129
,
193
,
257
,
385
,
513
,
769
,
1025
,
1537
,
2049
,
3073
,
4097
,
6145
,
8193
,
12289
,
16385
,
24577
/* Conversion tables for distance codes
* This can be indexed into directly with the distance code.
* Again, we may have to read additional bits to get the distance.
*/
int
DIST_TABLE
[
30
]
=
{
1
,
2
,
3
,
4
,
5
,
7
,
9
,
13
,
17
,
25
,
33
,
49
,
65
,
97
,
129
,
193
,
257
,
385
,
513
,
769
,
1025
,
1537
,
2049
,
3073
,
4097
,
6145
,
8193
,
12289
,
16385
,
24577
};
static
const
int
ADD_DISTS
[]
=
{
0
,
0
,
0
,
0
,
1
,
1
,
2
,
2
,
3
,
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
,
9
,
10
,
10
,
11
,
11
,
12
,
12
,
13
,
13
int
DIST_ADDITIONAL
[
30
]
=
{
0
,
0
,
0
,
0
,
1
,
1
,
2
,
2
,
3
,
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
,
9
,
10
,
10
,
11
,
11
,
12
,
12
,
13
,
13
};
int
match
(
int
chunk
,
const
int
decode_table
[],
const
int
decode_min
,
const
int
decode_max
)
{
/* Keep track of the bit we are at.
* DO NOT MODIFY ANYWHERE EXCEPT get_next_bit */
int
_CUR_BIT
=
0
;
if
(
chunk
<
decode_min
||
chunk
>
decode_max
)
{
return
-
1
;
}
return
decode_table
[
chunk
-
decode_min
];
}
int
get_next_bit
(
char
*
buf
,
int
n
)
{
int
buf_pos
=
n
/
8
;
int
pos
=
n
-
8
*
buf_pos
;
int
get_next_bit
(
char
*
buf
)
{
int
buf_pos
=
_CUR_BIT
/
8
;
int
pos
=
_CUR_BIT
-
8
*
buf_pos
;
char
byte
=
buf
[
buf_pos
];
char
mask
=
1
<<
pos
;
_CUR_BIT
+=
1
;
return
((
byte
&
mask
)
!=
0
);
}
int
get_n_bits
(
char
*
buf
,
int
pos
,
int
n
,
bool
reverse
)
{
int
get_n_bits
(
char
*
buf
,
int
n
,
bool
reverse
)
{
int
res
=
0
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
if
(
!
reverse
)
{
res
+=
get_next_bit
(
buf
,
pos
)
<<
(
n
-
i
-
1
);
res
+=
get_next_bit
(
buf
)
<<
(
n
-
i
-
1
);
}
else
{
res
+=
get_next_bit
(
buf
,
pos
)
<<
i
;
res
+=
get_next_bit
(
buf
)
<<
i
;
}
pos
+=
1
;
}
return
res
;
}
/* Read the block starting at the n-th bit */
int
read_block
(
char
*
buf
,
int
buf_size
,
int
n
,
FILE
*
out
)
{
/* Keep track of the bit we are at in the buffer */
int
pos
=
n
;
int
read_chunk
(
char
*
buf
,
huffman_t
hf
)
{
int
code
=
0
;
/* There should be no codes of length 0, so we can start at 1 */
for
(
int
i
=
1
;
i
<
MAX_LENGTH
+
1
;
i
++
)
{
/* We haven't matched up to this point; shift left, and read another bit */
code
<<=
1
;
code
+=
get_next_bit
(
buf
);
/* Do we have codes of length i? */
if
(
hf
.
alphabet
[
i
]
!=
NULL
)
{
/* Index into hf.alphabet[i], i.e. values for codes of length i */
int
idx
=
(
code
-
hf
.
min_codes
[
i
]);
/* If this is a valid index, then we have a match of length i */
if
(
idx
<
hf
.
bl_counts
[
i
])
{
return
hf
.
alphabet
[
i
][
idx
];
}
}
}
/* Something has gone wrong if we reach here */
return
-
1
;
}
/* Read the block starting at the n-th bit */
void
read_block
(
char
*
buf
,
FILE
*
out
)
{
/* First bit is the BFINAL flag */
bool
bfinal
=
get_next_bit
(
buf
,
pos
);
pos
+=
1
;
bool
bfinal
=
get_next_bit
(
buf
);
/* Next two bits are BTYPE */
int
btype
=
get_n_bits
(
buf
,
pos
,
2
,
true
);
pos
+=
2
;
int
btype
=
get_n_bits
(
buf
,
2
,
true
);
/* By default, use fixed mapping */
huffman_t
hf
=
HUFFMAN_FIXED
;
if
(
btype
==
DYNAMIC
)
{
/* TODO */
return
0
;
// TODO update hf if dynamic
}
else
if
(
btype
!=
FIXED
)
{
fprintf
(
stderr
,
"error: unrecognized btype
\n
"
);
exit
(
1
);
}
/* Read to decode when we reach this point. */
int
chunk_val
;
do
{
/* Read first 7 bytes, stream order */
int
chunk
=
get_n_bits
(
buf
,
pos
,
7
,
false
);
pos
+=
7
;
chunk_val
=
match
(
chunk
,
DECODE_7
,
DECODE_7_MIN
,
DECODE_7_MAX
);
/* No match, read another bit */
if
(
chunk_val
==
-
1
)
{
chunk
<<=
1
;
chunk
+=
get_next_bit
(
buf
,
pos
);
pos
+=
1
;
chunk_val
=
match
(
chunk
,
DECODE_8
,
DECODE_8_MIN
,
DECODE_8_MAX
);
}
/* Again no match, read another bit */
if
(
chunk_val
==
-
1
)
{
chunk
+=
get_next_bit
(
buf
,
pos
);
pos
+=
1
;
chunk_val
=
match
(
chunk
,
DECODE_9
,
DECODE_9_MIN
,
DECODE_9_MAX
);
}
/* Literal */
if
(
chunk_val
<=
255
)
{
fwrite
(
&
chunk_val
,
1
,
sizeof
(
char
),
out
);
}
/* Length */
else
if
(
chunk_val
!=
256
)
{
int
length
=
DECODE_LENGTH
[
chunk_val
-
LENGTH_OFFSET
];
/* Might need to read some extra bits and add to the length */
int
additional_bits
=
ADD_LENGTHS
[
chunk_val
-
LENGTH_OFFSET
];
for
(
int
i
=
0
;
i
<
additional_bits
;
i
++
)
{
length
+=
get_next_bit
(
buf
,
pos
)
<<
i
;
pos
+=
1
;
}
/* Next 5 bits are a distance */
int
dist_val
=
get_n_bits
(
buf
,
pos
,
5
,
false
);
pos
+=
5
;
int
dist
=
DECODE_DIST
[
dist_val
];
additional_bits
=
ADD_DISTS
[
dist_val
];
for
(
int
i
=
0
;
i
<
additional_bits
;
i
++
)
{
dist
+=
get_next_bit
(
buf
,
pos
)
<<
i
;
pos
+=
1
;
}
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
{
/* Match a huffman code */
chunk_val
=
read_chunk
(
buf
,
hf
);
/* Literal, just write to output buffer */
if
(
chunk_val
<
256
)
{
fwrite
(
&
chunk_val
,
1
,
sizeof
(
char
),
out
);
/* We read a length */
}
else
if
(
chunk_val
>
256
)
{
int
length
=
LEN_TABLE
[
chunk_val
-
LENGTH_OFFSET
];
int
addit_len
=
LEN_ADDITIONAL
[
chunk_val
-
LENGTH_OFFSET
];
length
+=
get_n_bits
(
buf
,
addit_len
,
true
);
/* Next 5 bits are the distance code */
int
dist_code
=
get_n_bits
(
buf
,
5
,
false
);
int
dist
=
DIST_TABLE
[
dist_code
];
int
addit_dist
=
DIST_ADDITIONAL
[
dist_code
];
dist
+=
get_n_bits
(
buf
,
addit_dist
,
true
);
for
(
int
i
=
0
;
i
<
length
;
i
++
)
{
fseek
(
out
,
-
dist
,
SEEK_CUR
);
int
c
=
fgetc
(
out
);
int
val
=
fgetc
(
out
);
fseek
(
out
,
0
,
SEEK_END
);
fwrite
(
&
c
,
1
,
sizeof
(
char
),
out
);
fwrite
(
&
val
,
1
,
sizeof
(
char
),
out
);
}
}
}
while
(
chunk_val
!=
256
);
while
(
chunk_val
!=
256
);
/* Skip over filler at the end of the block */
while
(
(
pos
-
n
)
%
8
!=
0
)
{
pos
+=
1
;
while
(
_CUR_BIT
%
8
!=
0
)
{
_CUR_BIT
+=
1
;
}
return
(
pos
-
n
);
}
void
inflate
(
FILE
*
fp
)
{
...
...
@@ -198,11 +235,8 @@ void inflate(FILE *fp) {
}
fread
(
buf
,
1
,
size
,
fp
);
long
n_read
=
0
;
while
(
n_read
<
8
*
size
)
{
n_read
+=
read_block
(
buf
,
size
,
n_read
,
out
);
break
;
while
(
_CUR_BIT
<
8
*
size
)
{
read_block
(
buf
,
out
);
}
fclose
(
out
);
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment
Menu
Projects
Groups
Snippets
Help