diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8c213f3 --- /dev/null +++ b/Makefile @@ -0,0 +1,67 @@ +TARGET = paq8sk +CC ?= gcc +CFLAGS ?= -Wall +CXX ?= g++ +CXXFLAGS ?= -Wall -msse2 -O3 +LDFLAGS ?= -s +AR = ar +RM ?= rm -f +SRCS = src/$(TARGET).cpp +OBJS = $(SRCS:%.cpp=%.o) +BZIP2SRCS = src/bzip2/blocksort.c \ + src/bzip2/bz2compress.c \ + src/bzip2/bz2decompress.c \ + src/bzip2/bzip2.c \ + src/bzip2/bzip2recover.c \ + src/bzip2/bzlib.c \ + src/bzip2/crctable.c \ + src/bzip2/dlltest.c \ + src/bzip2/huffman.c \ + src/bzip2/randtable.c \ + src/bzip2/spewG.c \ + src/bzip2/unzcrash.c +BZIP2OBJS = $(BZIP2SRCS:%.c=%.o) +ZLIBSRCS = src/zlib/adler32.c \ + src/zlib/crc32.c \ + src/zlib/deflate.c \ + src/zlib/inffast.c \ + src/zlib/inflate.c \ + src/zlib/inftrees.c \ + src/zlib/trees.c \ + src/zlib/zutil.c +ZLIBOBJS = $(ZLIBSRCS:%.c=%.o) + +ifeq ($(OS),Windows_NT) + CXXFLAGS += -DWINDOWS +else + CXXFLAGS += -DUNIX +endif + +ifeq ($(STATIC), Y) + CXXFLAGS += -Isrc/bzip2 -Isrc/zlib + LDFLAGS += -static + OBJS += libz.a libbz2.a +else + CXXFLAGS += -DMT + LDLIBS = -lpthread -lz -lbz2 +endif + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +libbz2.a: $(BZIP2OBJS) + $(AR) rcs $@ $^ + +libz.a: $(ZLIBOBJS) + $(AR) rcs $@ $^ + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +clean: + $(RM) $(TARGET) $(OBJS) $(BZIP2OBJS) $(ZLIBOBJS) diff --git a/README.md b/README.md index 1675588..476bdc2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,17 @@ +`ORG.FSMS:` +![GitHub release (latest by date)](https://img.shields.io/github/v/release/FS-make-simple/paq8sk) +![GitHub Release Date](https://img.shields.io/github/release-date/FS-make-simple/paq8sk) +![GitHub repo size](https://img.shields.io/github/repo-size/FS-make-simple/paq8sk) +![GitHub all releases](https://img.shields.io/github/downloads/FS-make-simple/paq8sk/total) +![GitHub](https://img.shields.io/github/license/FS-make-simple/paq8sk) + # paq8sk To install and use in Windows: -- To install, put paq8sk.exe or a shortcut to it on your desktop. -- To compress a file or folder, drop it on the paq8sk icon. -- To decompress, drop a .paq8sk file on the icon. +- To install, put `paq8sk.exe` or a shortcut to it on your desktop. +- To compress a file or folder, drop it on the `paq8sk` icon. +- To decompress, drop a `.paq8sk` file on the icon. A .paq8sk extension is added for compression, removed for decompression. The output will go in the same folder as the input. @@ -14,21 +21,21 @@ progress. When it is done you can close the window by pressing ENTER or clicking [X]. -# COMMAND LINE INTERFACE +## COMMAND LINE INTERFACE -- To install, put paq8sk.exe somewhere in your PATH. -- To compress: paq8sk [-N] file1 [file2...] -- To decompress: paq8sk [-d] file1.paq8sk [dir2] -- To view contents: paq8sk -l file1.paq8sk +- To install, put `paq8sk.exe` somewhere in your PATH. +- To compress: `paq8sk [-N] file1 [file2...]` +- To decompress: `paq8sk [-d] file1.paq8sk [dir2]` +- To view contents: `paq8sk -l file1.paq8sk` The compressed output file is named by adding ".paq8sk" extension to -the first named file (file1.paq8sk). Each file that exists will be +the first named file (`file1.paq8sk`). Each file that exists will be added to the archive and its name will be stored without a path. -The option -N specifies a compression level ranging from -0 -(fastest) to -8 (smallest). The default is -5. If there is +The option `-N` specifies a compression level ranging from `-0` +(fastest) to `-8` (smallest). The default is `-5`. If there is no option and only one file, then the program will pause when finished until you press the ENTER key (to support drag and drop). -If file1.paq8sk exists then it is overwritten. Level -0 only +If file1.paq8sk exists then it is overwritten. Level `-0` only transforms or decompresses data. If the first named file ends in ".paq8sk" then it is assumed to be @@ -47,11 +54,15 @@ structure, except that empty directories are not stored, and file attributes (timestamps, permissions, etc.) are not preserved. During extraction, directories are created as needed. For example: +```sh paq8sk -4 c:\tmp\foo bar +``` -compresses foo and bar (if they exist) to c:\tmp\foo.paq8sk at level 4. +compresses foo and bar (if they exist) to `c:\tmp\foo.paq8sk` at level 4. +```sh paq8sk -d c:\tmp\foo.paq8sk . +``` extracts foo and compares bar in the current directory. If foo and bar are directories then their contents are extracted/compared. @@ -63,18 +74,20 @@ File names with nonprintable characters are not supported (spaces are OK). -# TO COMPILE +## TO COMPILE -There are 2 files: paq8sk.cpp (C++) and wrtpre.cpp (C++). +There are 2 files: `paq8sk.cpp` (C++) and `wrtpre.cpp` (C++). paq8sk.cpp recognizes the following compiler options: +``` -DWINDOWS (to compile in Windows) -DUNIX (to compile in Unix, Linux, etc) -DMT (to compile with multithreading support) -DDEFAULT_OPTION=N (to change the default compression level from 5 to N). +``` -If you compile without -DWINDOWS or -DUNIX, you can still compress files, +If you compile without `-DWINDOWS` or `-DUNIX`, you can still compress files, but you cannot compress directories or create them during extraction. You can extract directories if you manually create the empty directories first. @@ -86,43 +99,52 @@ drag and drop on machines with less than 256 MB of memory. Use Recommended compiler commands and optimizations: +``` MINGW g++ (x86,x64): with multithreading: - g++ paq8sk.cpp -DWINDOWS -DMT -msse2 -O3 -s -static -lz -o paq8sk.exe + g++ paq8sk.cpp -DWINDOWS -DMT -msse2 -O3 -s -static -lz -lbz2 -o paq8sk.exe without multithreading: - g++ paq8sk.cpp -DWINDOWS -msse2 -O3 -s -static -lz -o paq8sk.exe + g++ paq8sk.cpp -DWINDOWS -msse2 -O3 -s -static -lz -lbz2 -o paq8sk.exe UNIX/Linux (PC x86,x64): with multithreading: - g++ paq8sk.cpp -DUNIX -DMT -msse2 -O3 -s -static -lpthread -lz -o paq8sk + g++ paq8sk.cpp -DUNIX -DMT -msse2 -O3 -s -static -lpthread -lz -lbz2 -o paq8sk without multithreading: - g++ paq8sk.cpp -DUNIX -msse2 -O3 -s -static -lpthread -lz -o paq8sk + g++ paq8sk.cpp -DUNIX -msse2 -O3 -s -static -lpthread -lz -lbz2 -o paq8sk Non PC (e.g. PowerPC under MacOS X): - g++ paq8sk.cpp -O2 -DUNIX -s -lz -o paq8sk + g++ paq8sk.cpp -O2 -DUNIX -s -lz -lbz2 -o paq8sk +``` -Alternatively, you can use CMake to build paq8sk. +Alternatively, you can use CMake to build `paq8sk`. -CMake recognizes the following compiler options for paq8sk: +CMake recognizes the following compiler options for paq8sk: +``` -DUNIX: Whether to build for Unix. Otherwise, build for Windows) -DNATIVECPU: Whether to build for your cpu (vs. the general public). Default is OFF) -DMT: Whether to enable Multithreading. Default is OFF) -DDISABLE_SM: Whether to disable faster statemaps. Default is OFF) - -To build for Windows in MinGW with Multithreading and build a native executable for your CPU: +``` + +To build for Windows in MinGW with Multithreading and build a native executable for your CPU: +```sh cmake . -G "MSYS Makefiles" -DMT=ON -DNATIVECPU=ON +``` -To build for Unix systems with Multithreading and build a native executable for your CPU: +To build for Unix systems with Multithreading and build a native executable for your CPU: +```sh cmake . -DUNIX=ON -DMT=ON -DNATIVECPU=ON +``` -Then build with make: +Then build with make: +```sh make +``` - -# ARCHIVE FILE FORMAT +## ARCHIVE FILE FORMAT An archive has the following format. - +``` paq8sk -N segment size compressed segment size @@ -135,98 +157,100 @@ An archive has the following format. compressed binary data file segmentation data stream data sizes[11] - --N is the option (-0 to -15) and mode, even if a default was used. +``` +`-N` is the option (`-0` to `-15`) and mode, even if a default was used. +``` 00LMNNNN bit M is set if fast mode, bit L is set if quick mode, if L or M are not set default to slow mode. - +``` segment size is total size of file(s) compressed segment size is compressed segmentation data in bytes at segmnet offset after compressed binary data. file segmentation data is full list of detected blocks: +``` type size info type size info type size type size info ..... - +``` info is present if block type needs extra info like in image or audio. Plain file names are stored without a path. Files in compressed directories are stored with path relative to the compressed directory (using UNIX style forward slashes "/"). For example, given these files: - +``` 123 C:\dir1\file1.txt 456 C:\dir2\file2.txt - +``` Then - +```sh paq8sk archive \dir1\file1.txt \dir2 - -will create archive.paq8sk +``` +will create `archive.paq8sk` The command: - +```sh paq8sk archive.paq8sk C:\dir3 - +``` will create the files: - +``` C:\dir3\file1.txt C:\dir3\dir2\file2.txt - +``` Decompression will fail if the first 10 bytes are not "paq8sk -". Sizes -are stored as decimal numbers. CR, LF, TAB are ASCII codes -13, 10, 9 respectively. +are stored as decimal numbers. `CR`, `LF`, `TAB` are ASCII codes +`13`, `10`, `9` respectively. -# ARITHMETIC CODING +## ARITHMETIC CODING The binary data is arithmetic coded as the shortest base 256 fixed point -number x = SUM_i x_i 256^-1-i such that p(= 16. - - The primaty output is t_i := stretch(sm(n0,n1,h)), where sm(.) is - a stationary map with K = 1/256, initialized to - sm(n0,n1,h) = (n1+(1/64))/(n+2/64). Four additional inputs are also +``` + The primaty output is `t_i := stretch(sm(n0,n1,h))`, where `sm(.)` is + a stationary map with `K = 1/256`, initialized to + `sm(n0,n1,h) = (n1+(1/64))/(n+2/64)`. Four additional inputs are also be computed to improve compression slightly: - +``` p1_i = sm(n0,n1,h) p0_i = 1 - p1_i t_i := stretch(p_1) @@ -274,26 +298,27 @@ There are several types of bit history states: t_i+2 := K2 stretch(p1) if n0 = 0, -K2 stretch(p1) if n1 = 0, else 0 t_i+3 := K3 (-p0_i if n1 = 0, p1_i if n0 = 0, else 0) t_i+4 := K3 (-p0_i if n0 = 0, p1_i if n1 = 0, else 0) +``` + where `K1..K4` are ad-hoc constants. - where K1..K4 are ad-hoc constants. - - h is updated as follows: + `h` is updated as follows: +``` If n < 4, append y_j to h. Else if n <= 16, set h := y_j. Else h = 0. - +``` The update rule is biased toward newer data in a way that allows n0 or n1, but not both, to grow large by discarding counts of the opposite bit. Large counts are incremented probabilistically. Specifically, when y_j = 0 then the update rule is: - +``` n0 := n0 + 1, n < 29 n0 + 1 with probability 2^(27-n0)/2 else n0, 29 <= n0 < 41 n0, n = 41. n1 := n1, n1 <= 5 round(8/3 lg n1), if n1 > 5 - - swapping (n0,n1) when y_j = 1. +``` + swapping `(n0,n1)` when `y_j = 1`. Furthermore, to allow an 8 bit representation for (n0,n1,h), states exceeding the following values of n0 or n1 are replaced with the @@ -305,9 +330,9 @@ There are several types of bit history states: - Match Model. The state is (c,b), initially (0,0), where c is 1 if the context was previously seen, else 0, and b is the next bit in this context. The prediction is: - +``` t_i := (2b - 1)Kc log(m + 1) - +``` where m is the length of the context. The update rule is c := 1, b := y_j. A match model can be implemented efficiently by storing input in a buffer and storing pointers into the buffer into a hash @@ -315,7 +340,7 @@ There are several types of bit history states: and b can be retrieved from the buffer. -# CONTEXTS +## CONTEXTS High compression is achieved by combining a large number of contexts. Most (not all) contexts start on a byte boundary and end on the bit @@ -408,7 +433,7 @@ modeled with both a run map and a nonstationary map unless indicated. DMC. The second predictor is a bit history state mapped adaptively to a probability as as in a Nonstationary Map. -# ARCHITECTURE +## ARCHITECTURE The context models are mixed by several of several hundred neural networks selected by a low-order context. The outputs of these networks are @@ -421,11 +446,11 @@ An APM is a stationary map combining a context and an input probability. The input probability is stretched and divided into 32 segments to combine with other contexts. The output is interpolated between two adjacent quantized values of stretch(p1). There are 2 APM stages in series: - +``` p1 := (p1 + 3 APM(order 0, p1)) / 4. p1 := (APM(order 1, p1) + 2 APM(order 2, p1) + APM(order 3, p1)) / 4. - -# PREPROCESSING +``` +## PREPROCESSING paq8sk uses preprocessing transforms on certain data types to improve compression. To improve reliability, the decoding transform is @@ -477,7 +502,7 @@ The preprocessor has 3 parts: Supports: https://en.wikipedia.org/wiki/Ascii85#Adobe_version - 24-bit images: 24-bit image data uses simple color transform - (b, g, r) -> (g, g-r, g-b) + `(b, g, r) -> (g, g-r, g-b)` - ZLIB: Decodes zlib encoded data and recursively transformed up to level 5. Supports zlib compressed images (4/8/24 bit) in pdf @@ -499,7 +524,7 @@ The preprocessor has 3 parts: - MRB: 8 bit images with RLE compression -# IMPLEMENTATION +## IMPLEMENTATION Hash tables are designed to minimize cache misses, which consume most of the CPU time. @@ -533,5 +558,5 @@ at a time. Using assembler is 8 times faster than C++ for this code and 1/3 faster overall. (However I found that SSE2 code on an AMD-64, which computes 8 elements at a time, is not any faster). -# SEE ALSO - paq8px https://github.com/hxim/paq8px +## SEE ALSO +* [paq8px](https://github.com/hxim/paq8px) diff --git a/bzip2/blocksort.c b/src/bzip2/blocksort.c similarity index 100% rename from bzip2/blocksort.c rename to src/bzip2/blocksort.c diff --git a/bzip2/bz2compress.c b/src/bzip2/bz2compress.c similarity index 100% rename from bzip2/bz2compress.c rename to src/bzip2/bz2compress.c diff --git a/bzip2/bz2decompress.c b/src/bzip2/bz2decompress.c similarity index 100% rename from bzip2/bz2decompress.c rename to src/bzip2/bz2decompress.c diff --git a/bzip2/bzip2.c b/src/bzip2/bzip2.c similarity index 99% rename from bzip2/bzip2.c rename to src/bzip2/bzip2.c index 3904107..04c9c49 100644 --- a/bzip2/bzip2.c +++ b/src/bzip2/bzip2.c @@ -128,7 +128,7 @@ #if BZ_LCCWIN32 # include # include -# include +# include # define NORETURN /**/ # define PATH_SEP '\\' diff --git a/bzip2/bzip2recover.c b/src/bzip2/bzip2recover.c similarity index 100% rename from bzip2/bzip2recover.c rename to src/bzip2/bzip2recover.c diff --git a/bzip2/bzlib.c b/src/bzip2/bzlib.c similarity index 100% rename from bzip2/bzlib.c rename to src/bzip2/bzlib.c diff --git a/bzip2/bzlib.h b/src/bzip2/bzlib.h similarity index 100% rename from bzip2/bzlib.h rename to src/bzip2/bzlib.h diff --git a/bzip2/bzlib_private.h b/src/bzip2/bzlib_private.h similarity index 100% rename from bzip2/bzlib_private.h rename to src/bzip2/bzlib_private.h diff --git a/bzip2/crctable.c b/src/bzip2/crctable.c similarity index 100% rename from bzip2/crctable.c rename to src/bzip2/crctable.c diff --git a/bzip2/dlltest.c b/src/bzip2/dlltest.c similarity index 100% rename from bzip2/dlltest.c rename to src/bzip2/dlltest.c diff --git a/bzip2/huffman.c b/src/bzip2/huffman.c similarity index 100% rename from bzip2/huffman.c rename to src/bzip2/huffman.c diff --git a/bzip2/randtable.c b/src/bzip2/randtable.c similarity index 100% rename from bzip2/randtable.c rename to src/bzip2/randtable.c diff --git a/bzip2/spewG.c b/src/bzip2/spewG.c similarity index 100% rename from bzip2/spewG.c rename to src/bzip2/spewG.c diff --git a/bzip2/unzcrash.c b/src/bzip2/unzcrash.c similarity index 100% rename from bzip2/unzcrash.c rename to src/bzip2/unzcrash.c diff --git a/lstm1.inc b/src/lstm1.inc similarity index 100% rename from lstm1.inc rename to src/lstm1.inc diff --git a/mod_ppmd.inc b/src/mod_ppmd.inc similarity index 100% rename from mod_ppmd.inc rename to src/mod_ppmd.inc diff --git a/mod_sse.cpp b/src/mod_sse.cpp similarity index 100% rename from mod_sse.cpp rename to src/mod_sse.cpp diff --git a/paq8sk.cpp b/src/paq8sk.cpp similarity index 100% rename from paq8sk.cpp rename to src/paq8sk.cpp diff --git a/sh_v2f.inc b/src/sh_v2f.inc similarity index 100% rename from sh_v2f.inc rename to src/sh_v2f.inc diff --git a/wrton.cpp b/src/wrton.cpp similarity index 100% rename from wrton.cpp rename to src/wrton.cpp diff --git a/wrtpre.cpp b/src/wrtpre.cpp similarity index 100% rename from wrtpre.cpp rename to src/wrtpre.cpp diff --git a/zlib/adler32.c b/src/zlib/adler32.c similarity index 100% rename from zlib/adler32.c rename to src/zlib/adler32.c diff --git a/zlib/crc32.c b/src/zlib/crc32.c similarity index 100% rename from zlib/crc32.c rename to src/zlib/crc32.c diff --git a/zlib/crc32.h b/src/zlib/crc32.h similarity index 100% rename from zlib/crc32.h rename to src/zlib/crc32.h diff --git a/zlib/deflate.c b/src/zlib/deflate.c similarity index 100% rename from zlib/deflate.c rename to src/zlib/deflate.c diff --git a/zlib/deflate.h b/src/zlib/deflate.h similarity index 100% rename from zlib/deflate.h rename to src/zlib/deflate.h diff --git a/zlib/inffast.c b/src/zlib/inffast.c similarity index 100% rename from zlib/inffast.c rename to src/zlib/inffast.c diff --git a/zlib/inffast.h b/src/zlib/inffast.h similarity index 100% rename from zlib/inffast.h rename to src/zlib/inffast.h diff --git a/zlib/inffixed.h b/src/zlib/inffixed.h similarity index 100% rename from zlib/inffixed.h rename to src/zlib/inffixed.h diff --git a/zlib/inflate.c b/src/zlib/inflate.c similarity index 100% rename from zlib/inflate.c rename to src/zlib/inflate.c diff --git a/zlib/inflate.h b/src/zlib/inflate.h similarity index 100% rename from zlib/inflate.h rename to src/zlib/inflate.h diff --git a/zlib/inftrees.c b/src/zlib/inftrees.c similarity index 100% rename from zlib/inftrees.c rename to src/zlib/inftrees.c diff --git a/zlib/inftrees.h b/src/zlib/inftrees.h similarity index 100% rename from zlib/inftrees.h rename to src/zlib/inftrees.h diff --git a/zlib/trees.c b/src/zlib/trees.c similarity index 100% rename from zlib/trees.c rename to src/zlib/trees.c diff --git a/zlib/trees.h b/src/zlib/trees.h similarity index 100% rename from zlib/trees.h rename to src/zlib/trees.h diff --git a/zlib/zconf.h b/src/zlib/zconf.h similarity index 100% rename from zlib/zconf.h rename to src/zlib/zconf.h diff --git a/zlib/zlib.h b/src/zlib/zlib.h similarity index 100% rename from zlib/zlib.h rename to src/zlib/zlib.h diff --git a/zlib/zutil.c b/src/zlib/zutil.c similarity index 100% rename from zlib/zutil.c rename to src/zlib/zutil.c diff --git a/zlib/zutil.h b/src/zlib/zutil.h similarity index 100% rename from zlib/zutil.h rename to src/zlib/zutil.h