summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuan Manuel Tomás <jtomas1815@gmail.com>2025-11-04 17:53:41 -0300
committerJuan Manuel Tomás <jtomas1815@gmail.com>2025-11-04 17:53:41 -0300
commit4e6aae19275dad2053adbc2ad34ab1330daf1b0f (patch)
tree0f6d08e48b06be9ee48ca6848289068ea228112c
parentd2a8d3a2171ebbc9934e3703a0f9b6fd4070a6b8 (diff)
downloadlempel-4e6aae19275dad2053adbc2ad34ab1330daf1b0f.tar.gz
lempel-4e6aae19275dad2053adbc2ad34ab1330daf1b0f.zip
Working decoder on byte level
-rw-r--r--common.lisp9
-rw-r--r--decode.lisp55
-rw-r--r--encode.lisp57
-rw-r--r--file.lisp29
-rw-r--r--lempel.asd7
-rw-r--r--lempel.lisp1
-rw-r--r--package.lisp9
7 files changed, 163 insertions, 4 deletions
diff --git a/common.lisp b/common.lisp
new file mode 100644
index 0000000..a061ca1
--- /dev/null
+++ b/common.lisp
@@ -0,0 +1,9 @@
+(in-package #:lempel)
+
+(defparameter window-bits 12) ; Amount of bits for window. Valid range between 8 and 15
+(defparameter break-even-threshold 4) ; Minimum word length to consider for compression
+(defparameter reversed-byte-order nil)
+
+(defstruct point
+ (offset 0 :type fixnum)
+ (size 0 :type fixnum))
diff --git a/decode.lisp b/decode.lisp
new file mode 100644
index 0000000..df578a7
--- /dev/null
+++ b/decode.lisp
@@ -0,0 +1,55 @@
+(in-package #:lempel)
+
+(declaim (ftype (function ((array (unsigned-byte 8)) (unsigned-byte 8)) (array (unsigned-byte 8))) prefix-decode))
+(defun prefix-decode (data prefix)
+ (let ((result (queue:new)))
+ (dotimes (i (length data))
+ (cond ((= (elt data i) prefix)
+ (let ((point (byte-pair->point data i)))
+ (if (= (point-size point) 0)
+ (progn
+ (queue:push prefix result)
+ (incf i))
+ (progn
+ (queue:push point result)
+ (incf i 2)))))
+ (t (queue:push (elt data i) result))))
+ (decompress (queue:to-list result))))
+
+(declaim (ftype (function (list) (array (unsigned-byte 8))) decompress))
+(defun decompress (data)
+ (declare (optimize (debug 3)))
+ (let ((result (make-array (get-uncompressed-size data) :element-type '(unsigned-byte 8)))
+ (fill-count 0))
+ (dolist (point data)
+ (cond ((point-p point)
+ (let ((start (- fill-count (point-offset point))))
+ (dotimes (j (point-size point))
+ (setf (elt result (+ fill-count j)) (elt result (+ start j))))
+ (incf fill-count (point-size point))))
+ ((typep point '(unsigned-byte 8))
+ (setf (elt result fill-count) point)
+ (incf fill-count))
+ (t (error (format nil "cannot handle type ~a" (type-of point))))))
+ (assert (= fill-count (length result)))
+ result))
+
+(declaim (ftype (function ((array (unsigned-byte 8)) fixnum) point) byte-pair->point))
+(defun byte-pair->point (bytes index)
+ (let ((offset (elt bytes (+ (if reversed-byte-order 2 1) index)))
+ (size (elt bytes (+ (if reversed-byte-order 1 2) index)))
+ (max-word-length (1- (ash 1 (- 16 window-bits)))))
+ (make-point :offset (logior offset
+ (ash (logand size (logxor #xff max-word-length))
+ (- window-bits 8)))
+ :size (logand size max-word-length))))
+
+(declaim (ftype (function (list) fixnum) get-uncompressed-size))
+(defun get-uncompressed-size (data)
+ (let ((size 0))
+ (dolist (point data)
+ (cond ((point-p point)
+ (incf size (point-size point)))
+ (t
+ (incf size))))
+ size))
diff --git a/encode.lisp b/encode.lisp
new file mode 100644
index 0000000..2a50828
--- /dev/null
+++ b/encode.lisp
@@ -0,0 +1,57 @@
+(in-package #:lempel)
+
+(declaim (ftype (function (point) (cons (unsigned-byte 8))) point->bytes))
+(defun point->bytes (p)
+ (let ((window-width (1- (ash 1 window-bits))))
+ (let ((offset (logand (point-offset p) #xff))
+ (size (logior (ash (logand (point-offset p)
+ (logxor window-width #xff))
+ (- 8 window-bits))
+ (point-size p))))
+ (if reversed-byte-order
+ (cons size offset)
+ (cons offset size)))))
+
+(defun prefix-encode (data prefix)
+ (let ((result '()))
+ (dolist (point data)
+ (cond ((characterp point)
+ (push (char-code point) result))
+ ((point-p point)
+ (push prefix result)
+ (let ((encoded-pair (point->bytes point)))
+ (push (car encoded-pair) result)
+ (push (cdr encoded-pair) result)))
+ (t (error "Invalid point type"))))
+ (make-array (length result)
+ :element-type '(unsigned-byte 8)
+ :initial-contents (reverse result))))
+
+(defun compress (data)
+ (let ((result '())
+ (window-width (1- (ash 1 window-bits))))
+ (dotimes (cursor (length data))
+ (let* ((window-start (max 0 (- cursor window-width)))
+ (match (search-window data window-start cursor)))
+ (if match
+ (progn
+ (push match result)
+ (incf cursor (1- (point-size match))))
+ (push (elt data cursor) result))))
+ (reverse result)))
+
+(defun search-window (data start end)
+ (let ((offset 0)
+ (size 0)
+ (max-word-length (1- (ash 1 (- 16 window-bits)))))
+ (do ((i (min (1+ end) (1- (length data))) (1+ i)))
+ ((let ((match (search data data :start1 end :end1 i :start2 start :end2 end)))
+ (when match
+ (setf offset (- end match))
+ (setf size (- i end)))
+ (or (not match)
+ (>= size max-word-length)
+ (> i (1- (length data)))))
+ (if (>= size break-even-threshold)
+ (make-point :offset offset :size size)
+ nil)))))
diff --git a/file.lisp b/file.lisp
new file mode 100644
index 0000000..642c5ab
--- /dev/null
+++ b/file.lisp
@@ -0,0 +1,29 @@
+(in-package #:lempel)
+
+(defun compress-file (input-path output-path)
+ (let ((file (str:read-file input-path)))
+ (with-open-file (output output-path
+ :direction :output
+ :if-does-not-exist :create
+ :if-exists :supersede
+ :element-type '(unsigned-byte 8))
+ (write-sequence (prefix-encode (compress file) #xAE) output)))
+ t)
+
+(defun decompress-file (input-path output-path)
+ (let ((file (read-file input-path)))
+ (with-open-file (output output-path
+ :direction :output
+ :if-does-not-exist :create
+ :if-exists :supersede
+ :element-type :default)
+ (write-sequence (prefix-decode file #xAE) output)))
+ t)
+
+(declaim (ftype (function (string) (array (unsigned-byte 8))) read-file))
+(defun read-file (path)
+ (with-open-file (file path :element-type '(unsigned-byte 8))
+ (let* ((size (file-length file))
+ (buf (make-array size :element-type '(unsigned-byte 8))))
+ (read-sequence buf file)
+ buf)))
diff --git a/lempel.asd b/lempel.asd
index 895e46b..04fd911 100644
--- a/lempel.asd
+++ b/lempel.asd
@@ -1,6 +1,9 @@
(defsystem "lempel"
:serial t
- :depends-on ()
+ :depends-on ("utils")
:components
((:file "package")
- (:file "lempel")))
+ (:file "common")
+ (:file "encode")
+ (:file "decode")
+ (:file "file")))
diff --git a/lempel.lisp b/lempel.lisp
deleted file mode 100644
index bb063c8..0000000
--- a/lempel.lisp
+++ /dev/null
@@ -1 +0,0 @@
-(in-package #:lempel)
diff --git a/package.lisp b/package.lisp
index f25edf6..bb34b29 100644
--- a/package.lisp
+++ b/package.lisp
@@ -1,2 +1,9 @@
(defpackage #:lempel
- (:use #:cl))
+ (:use #:cl)
+ (:export #:compress-file
+ #:decompress-file
+ #:prefix-encode
+ #:prefix-decode
+ #:window-bits
+ #:break-even-threshold
+ #:reversed-byte-order))