diff options
| -rw-r--r-- | common.lisp | 9 | ||||
| -rw-r--r-- | decode.lisp | 55 | ||||
| -rw-r--r-- | encode.lisp | 57 | ||||
| -rw-r--r-- | file.lisp | 29 | ||||
| -rw-r--r-- | lempel.asd | 7 | ||||
| -rw-r--r-- | lempel.lisp | 1 | ||||
| -rw-r--r-- | package.lisp | 9 |
7 files changed, 163 insertions, 4 deletions
diff --git a/common.lisp b/common.lisp new file mode 100644 index 0000000..a061ca1 --- /dev/null +++ b/common.lisp @@ -0,0 +1,9 @@ +(in-package #:lempel) + +(defparameter window-bits 12) ; Amount of bits for window. Valid range between 8 and 15 +(defparameter break-even-threshold 4) ; Minimum word length to consider for compression +(defparameter reversed-byte-order nil) + +(defstruct point + (offset 0 :type fixnum) + (size 0 :type fixnum)) diff --git a/decode.lisp b/decode.lisp new file mode 100644 index 0000000..df578a7 --- /dev/null +++ b/decode.lisp @@ -0,0 +1,55 @@ +(in-package #:lempel) + +(declaim (ftype (function ((array (unsigned-byte 8)) (unsigned-byte 8)) (array (unsigned-byte 8))) prefix-decode)) +(defun prefix-decode (data prefix) + (let ((result (queue:new))) + (dotimes (i (length data)) + (cond ((= (elt data i) prefix) + (let ((point (byte-pair->point data i))) + (if (= (point-size point) 0) + (progn + (queue:push prefix result) + (incf i)) + (progn + (queue:push point result) + (incf i 2))))) + (t (queue:push (elt data i) result)))) + (decompress (queue:to-list result)))) + +(declaim (ftype (function (list) (array (unsigned-byte 8))) decompress)) +(defun decompress (data) + (declare (optimize (debug 3))) + (let ((result (make-array (get-uncompressed-size data) :element-type '(unsigned-byte 8))) + (fill-count 0)) + (dolist (point data) + (cond ((point-p point) + (let ((start (- fill-count (point-offset point)))) + (dotimes (j (point-size point)) + (setf (elt result (+ fill-count j)) (elt result (+ start j)))) + (incf fill-count (point-size point)))) + ((typep point '(unsigned-byte 8)) + (setf (elt result fill-count) point) + (incf fill-count)) + (t (error (format nil "cannot handle type ~a" (type-of point)))))) + (assert (= fill-count (length result))) + result)) + +(declaim (ftype (function ((array (unsigned-byte 8)) fixnum) point) byte-pair->point)) +(defun byte-pair->point (bytes index) + (let ((offset (elt bytes (+ (if reversed-byte-order 2 1) index))) + (size (elt bytes (+ (if reversed-byte-order 1 2) index))) + (max-word-length (1- (ash 1 (- 16 window-bits))))) + (make-point :offset (logior offset + (ash (logand size (logxor #xff max-word-length)) + (- window-bits 8))) + :size (logand size max-word-length)))) + +(declaim (ftype (function (list) fixnum) get-uncompressed-size)) +(defun get-uncompressed-size (data) + (let ((size 0)) + (dolist (point data) + (cond ((point-p point) + (incf size (point-size point))) + (t + (incf size)))) + size)) diff --git a/encode.lisp b/encode.lisp new file mode 100644 index 0000000..2a50828 --- /dev/null +++ b/encode.lisp @@ -0,0 +1,57 @@ +(in-package #:lempel) + +(declaim (ftype (function (point) (cons (unsigned-byte 8))) point->bytes)) +(defun point->bytes (p) + (let ((window-width (1- (ash 1 window-bits)))) + (let ((offset (logand (point-offset p) #xff)) + (size (logior (ash (logand (point-offset p) + (logxor window-width #xff)) + (- 8 window-bits)) + (point-size p)))) + (if reversed-byte-order + (cons size offset) + (cons offset size))))) + +(defun prefix-encode (data prefix) + (let ((result '())) + (dolist (point data) + (cond ((characterp point) + (push (char-code point) result)) + ((point-p point) + (push prefix result) + (let ((encoded-pair (point->bytes point))) + (push (car encoded-pair) result) + (push (cdr encoded-pair) result))) + (t (error "Invalid point type")))) + (make-array (length result) + :element-type '(unsigned-byte 8) + :initial-contents (reverse result)))) + +(defun compress (data) + (let ((result '()) + (window-width (1- (ash 1 window-bits)))) + (dotimes (cursor (length data)) + (let* ((window-start (max 0 (- cursor window-width))) + (match (search-window data window-start cursor))) + (if match + (progn + (push match result) + (incf cursor (1- (point-size match)))) + (push (elt data cursor) result)))) + (reverse result))) + +(defun search-window (data start end) + (let ((offset 0) + (size 0) + (max-word-length (1- (ash 1 (- 16 window-bits))))) + (do ((i (min (1+ end) (1- (length data))) (1+ i))) + ((let ((match (search data data :start1 end :end1 i :start2 start :end2 end))) + (when match + (setf offset (- end match)) + (setf size (- i end))) + (or (not match) + (>= size max-word-length) + (> i (1- (length data))))) + (if (>= size break-even-threshold) + (make-point :offset offset :size size) + nil))))) diff --git a/file.lisp b/file.lisp new file mode 100644 index 0000000..642c5ab --- /dev/null +++ b/file.lisp @@ -0,0 +1,29 @@ +(in-package #:lempel) + +(defun compress-file (input-path output-path) + (let ((file (str:read-file input-path))) + (with-open-file (output output-path + :direction :output + :if-does-not-exist :create + :if-exists :supersede + :element-type '(unsigned-byte 8)) + (write-sequence (prefix-encode (compress file) #xAE) output))) + t) + +(defun decompress-file (input-path output-path) + (let ((file (read-file input-path))) + (with-open-file (output output-path + :direction :output + :if-does-not-exist :create + :if-exists :supersede + :element-type :default) + (write-sequence (prefix-decode file #xAE) output))) + t) + +(declaim (ftype (function (string) (array (unsigned-byte 8))) read-file)) +(defun read-file (path) + (with-open-file (file path :element-type '(unsigned-byte 8)) + (let* ((size (file-length file)) + (buf (make-array size :element-type '(unsigned-byte 8)))) + (read-sequence buf file) + buf))) @@ -1,6 +1,9 @@ (defsystem "lempel" :serial t - :depends-on () + :depends-on ("utils") :components ((:file "package") - (:file "lempel"))) + (:file "common") + (:file "encode") + (:file "decode") + (:file "file"))) diff --git a/lempel.lisp b/lempel.lisp deleted file mode 100644 index bb063c8..0000000 --- a/lempel.lisp +++ /dev/null @@ -1 +0,0 @@ -(in-package #:lempel) diff --git a/package.lisp b/package.lisp index f25edf6..bb34b29 100644 --- a/package.lisp +++ b/package.lisp @@ -1,2 +1,9 @@ (defpackage #:lempel - (:use #:cl)) + (:use #:cl) + (:export #:compress-file + #:decompress-file + #:prefix-encode + #:prefix-decode + #:window-bits + #:break-even-threshold + #:reversed-byte-order)) |
