-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompress.cpp
201 lines (163 loc) · 6.75 KB
/
compress.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#include <map>
#include <queue>
#include <bitset>
#include <iostream>
#include <iomanip>
#include <memory>
#include "compress.h"
int compress_file(std::string& inputFileName, std::string& outputFileName, bool verbose) {
std::ifstream inputFile(inputFileName);
if (!inputFile.is_open()) {
perror("open");
return 1;
}
// Get the original file size
inputFile.seekg(0, std::ios::end);
size_t originalFileSize = inputFile.tellg();
inputFile.seekg(0, std::ios::beg);
std::map<char, node*> charMap = countCharacters(inputFile);
// Build Huffman tree
node* huffmanRoot = buildHuffmanTree(charMap);
// Generate Huffman codes
std::map<char, std::string> huffmanCodes;
generateHuffmanCodes(huffmanRoot, "", huffmanCodes);
if (verbose) {
printCharacterFrequencies(charMap);
printHuffmanTree(huffmanRoot);
}
// Write Huffman-encoded data to binary file
writeToBinary(inputFileName, outputFileName, huffmanCodes, huffmanRoot);
// Free allocated memory for nodes
for (auto& pair : charMap) {
delete pair.second;
}
// Get the compressed file size
std::ifstream compressedFile(outputFileName, std::ios::binary);
compressedFile.seekg(0, std::ios::end);
size_t compressedFileSize = compressedFile.tellg();
compressedFile.close();
// Calculate and print the percent file size reduction
double reductionPercentage = (1.0 - static_cast<double>(compressedFileSize) / originalFileSize) * 100.0;
std::cout << "Original file size: " << originalFileSize << ", compressed file size: " << compressedFileSize << " Reduction: " << std::fixed << std::setprecision(2) << reductionPercentage << "%" << std::endl;
return 0;
}
std::shared_ptr<node> buildHuffmanTree(const std::map<char, std::shared_ptr<node>>& charMap) {
// Use a priority queue to build the Huffman tree
std::priority_queue<std::shared_ptr<node>, std::vector<std::shared_ptr<node>>, CompareNodes> pq;
// Initialize the priority queue with nodes from the character frequency map
for (const auto& pair : charMap) {
pq.push(pair.second);
}
// Build the Huffman tree by merging nodes until there is only one node left
while (pq.size() > 1) {
std::shared_ptr<node> left = pq.top();
pq.pop();
std::shared_ptr<node> right = pq.top();
pq.pop();
// Create a new node with a frequency equal to the sum of the frequencies of its children
std::shared_ptr<node> mergedNode = std::make_shared<node>(left->freq + right->freq, '\0', left, right);
pq.push(mergedNode);
}
// The last node in the priority queue is the root of the Huffman tree
return pq.top();
}
std::map<char, node*> countCharacters(std::ifstream& inputFile) {
std::map<char, node*> charMap;
char ch;
while (inputFile.get(ch)) {
if (charMap.find(ch) == charMap.end()) {
// If character is not in the map, add it with frequency 1
node* newNode = new node{1, ch, nullptr, nullptr};
charMap[ch] = newNode;
} else {
// If character is already in the map, increment its frequency
charMap[ch]->freq++;
}
}
return charMap;
}
void generateHuffmanCodes(node* root, const std::string& code, std::map<char, std::string>& huffmanCodes) {
if (root) {
if (root->letter != '\0') {
// Leaf node, store the character and its code in the map
huffmanCodes[root->letter] = code;
}
// Recursively traverse left and right subtrees
generateHuffmanCodes(root->left, code + "0", huffmanCodes);
generateHuffmanCodes(root->right, code + "1", huffmanCodes);
}
}
void padEnd(std::string& input) {
size_t padding = 8 - (input.length() % 8);
if (padding != 8) {
input += std::string(padding, '0');
}
}
void writeToBinary(const std::string& inputFile, const std::string& outputFile, std::map<char, std::string>& huffmanCodes, node *huffmanTree) {
std::ifstream input(inputFile, std::ios::binary);
std::ofstream output(outputFile, std::ios::binary);
if (!input.is_open() || !output.is_open()) {
perror("open");
exit(1);
}
char ch;
std::string encodedData;
// Read input file character by character and append corresponding Huffman code to the encodedData string
while (input.get(ch)) {
encodedData += huffmanCodes[ch];
}
std::string leafIndicator, treeValues;
postOrderTraversal(huffmanTree, true, leafIndicator);
postOrderTraversal(huffmanTree, false, treeValues);
// Write the length of the encoded data to the output file
uint32_t length = encodedData.size();
output.write(reinterpret_cast<const char*>(&length), sizeof(length));
// Write the size of the tree to the output file
uint32_t treeSize = leafIndicator.length();
output.write(reinterpret_cast<const char*>(&treeSize), sizeof(treeSize));
// Pad the end of encodedData and leafIndicator to they are a multiple of 8 bits
padEnd(leafIndicator);
padEnd(encodedData);
// Convert binary string of encoded data to actual binary data and write to the output file
std::bitset<8> bits;
for (size_t i = 0; i < length; i += 8) {
for (size_t j = 0; j < 8 && i + j < length; ++j) {
// Using 7 - j for little endian order
bits[7 - j] = (encodedData[i + j] == '1');
}
output.put(bits.to_ulong());
}
input.close();
// Convert binary string of leafIndicator to binary and write to output file
for (size_t i = 0; i < treeSize; i += 8) {
for(size_t j = 0; j < 8 && i + j < treeSize; ++j) {
// Using 7 - j for little endian order
bits[7 - j] = (leafIndicator[i + j] == '1');
}
output.put(bits.to_ulong());
}
// Write tree contents to output file
for (char c : treeValues) {
output.put(c);
}
output.close();
}
// Create post order traversal, mode 0 outputs values, mode 1 outputs strucutre
void postOrderTraversal(node *root, bool mode, std::string& output) {
if (root) {
postOrderTraversal(root->left, mode, output);
postOrderTraversal(root->right, mode, output);
if (mode) {
// Check if the current node is a leaf
if (!root->left && !root->right) {
output += '1'; // Node is a leaf
} else {
output += '0'; // Node is not a leaf
}
} else {
if (!root->left && !root->right) {
output += root->letter;
}
}
}
}