From d08d5b232d74f3a75a833b231c4ef5e80870c993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Tom=C3=A1s?= Date: Tue, 17 Mar 2026 17:48:03 -0300 Subject: Unify cursor start and end --- base.lisp | 63 ++++++++++++++++++---------------------------- core.lisp | 42 +++++++++++++------------------ cursor.lisp | 82 ++++++++++++++++++++++++++++++++++++++++++------------------ main.lisp | 8 +++--- package.lisp | 7 ++---- test.lisp | 10 +++----- 6 files changed, 110 insertions(+), 102 deletions(-) diff --git a/base.lisp b/base.lisp index 0599a3c..7c72501 100644 --- a/base.lisp +++ b/base.lisp @@ -1,61 +1,48 @@ (in-package #:monparser) -(defstruct result) +(defstruct result + (place (make-instance 'cursor) :type cursor)) (defstruct (parsing (:include result)) - tree - (start (make-instance 'cursor) :type cursor) - (end (make-instance 'cursor) :type cursor)) + tree) (defstruct (failure (:include result)) - (place (make-instance 'cursor) :type cursor) (message "" :type string) (priority 0 :type integer)) +(defmethod print-object ((obj failure) stream) + (let ((linecol (line-and-column (result-place obj)))) + (format stream "~a:~a: ~a~&~a~&" + (car linecol) (cdr linecol) (failure-message obj) (result-place obj)))) + (deftype parser () - `(function (cursor cursor) result)) + `(function (cursor) result)) (defmacro lazy (parser &rest args) - (let ((start (gensym)) - (input (gensym))) + (let ((input (gensym))) `(the parser - (lambda (,start ,input) - (funcall (,parser ,@args) ,start ,input))))) - -(defun line-and-column (str index) - (let ((line 1) (column 1)) - (dotimes (i index) - (let ((c (char str i))) - (case c - (#\Newline - (incf line) - (setf column 1)) - (t (incf column))))) - (cons line column))) - -(defmethod print-object ((obj failure) stream) - (let ((linecol (line-and-column (data (failure-place obj)) - (index (failure-place obj))))) - (format stream "~a:~a: ~a~&~a~&" - (car linecol) (cdr linecol) (failure-message obj) (failure-place obj)))) + (lambda (,input) + (funcall (,parser ,@args) ,input))))) (declaim (ftype (function (t &key (:priority integer)) parser) fail)) (defun fail (message &key (priority 1)) - (lambda (start input) - (declare (ignore start)) + (lambda (input) (make-failure :place input :message message :priority priority))) (declaim (ftype (function (t) parser) new)) (defun new (tree) - (lambda (start input) - (make-parsing :tree tree :start start :end input))) + (lambda (input) + (make-parsing :place input :tree tree))) + +(deftype parser-continuation () + `(function (t) parser)) -(declaim (ftype (function (parser (function (result) parser)) parser) bind)) +(declaim (ftype (function (parser parser-continuation) parser) bind)) (defun bind (parser f) - (lambda (start input) - (let ((r (funcall parser input input))) + (lambda (input) + (let ((r (funcall parser (cursor-rebase input)))) (cond ((parsing-p r) - (funcall (funcall f r) start (parsing-end r))) + (funcall (funcall f r) (cursor-merge input (result-place r)))) ((failure-p r) r) (t (error (format nil "Invalid return value: ~a" r))))))) @@ -67,18 +54,18 @@ (cond ((symbolp var) (if (string= (symbol-name var) "_") `(bind ,parser - (the (function (result) parser) + (the parser-continuation (lambda (,var) (declare (ignore ,var)) (comp ,(cdr bindings) ,@body)))) `(bind ,parser - (the (function (result) parser) + (the parser-continuation (lambda (,var) (let ((,var (parsing-tree ,var))) (comp ,(cdr bindings) ,@body))))))) ((and (listp var) (= (length var) 1) (symbolp (car var))) `(bind ,parser - (the (function (result) parser) + (the parser-continuation (lambda (,(first var)) (comp ,(cdr bindings) ,@body))))) (t (error "Binding must be either a symbol or a list of one symbol.")))))) diff --git a/core.lisp b/core.lisp index 9fef78f..b7b204a 100644 --- a/core.lisp +++ b/core.lisp @@ -28,14 +28,12 @@ (let ((start (gensym)) (input (gensym))) `(the parser - (lambda (,start ,input) - (declare (ignore ,start)) - (if (has-data? ,input) - (let ((it (peek ,input))) + (lambda (,input) + (if (cursor-has-data? ,input) + (let ((it (cursor-peek ,input))) (if ,predicate - (make-parsing :tree it - :start ,input - :end (advance ,input)) + (make-parsing :place (cursor-advance ,input) + :tree it) (make-failure :place ,input :message (format nil "Expected: ~a, Got: ~:c." ',predicate it)))) (make-failure :place ,input @@ -43,16 +41,15 @@ (declaim (ftype (function (parser parser &rest parser) parser) one-of)) (defun one-of (first-parser second-parser &rest other-parsers) - (lambda (start input) - (declare (ignore start)) + (lambda (input) (let ((parsers (cons first-parser (cons second-parser other-parsers))) (result (make-failure :place input))) (dolist (p parsers) - (let ((r (funcall p input input))) + (let ((r (funcall p (cursor-rebase input)))) (cond ((parsing-p r) (when (or (not (parsing-p result)) - (> (distance (parsing-end result) - (parsing-end r)) + (> (distance (result-place result) + (result-place r)) 0)) (setf result r))) ((failure-p r) @@ -62,7 +59,7 @@ (when (or (> priority-cmp 0) (and (= priority-cmp 0) (>= (distance (failure-place result) - (failure-place r)) + (failure-place r)) 0))) (setf result r))))) (t (error (format nil "Invalid return value: ~a." r)))))) @@ -74,22 +71,19 @@ (declaim (ftype (function (parser &key (:all t)) parser) many)) (defun many (p &key all) - (lambda (start input) - (declare (ignore start)) + (lambda (input) (let* ((result '())) - (do ((r (funcall p input input) - (funcall p (parsing-end r) (parsing-end r)))) + (do ((r (funcall p (cursor-rebase input)) + (funcall p (cursor-rebase (result-place r))))) ((or (failure-p r) - (= (index (parsing-start r)) - (index (parsing-end r)))) + (cursor-at-start? (result-place r))) nil) (push r result)) (cond ((not result) (make-failure :place input :message "No matches.")) - ((and all (has-data? (parsing-end (first result)))) - (make-failure :place (parsing-end (first result)) + ((and all (cursor-has-data? (result-place (first result)))) + (make-failure :place (result-place (first result)) :message "Input not exausted.")) - (t (make-parsing :tree (reverse (mapcar (lambda (x) (parsing-tree x)) result)) - :start input - :end (parsing-end (first result)))))))) + (t (make-parsing :place (result-place (first result)) + :tree (reverse (mapcar (lambda (x) (parsing-tree x)) result)))))))) diff --git a/cursor.lisp b/cursor.lisp index 72d4c90..a4cc100 100644 --- a/cursor.lisp +++ b/cursor.lisp @@ -1,40 +1,74 @@ (in-package #:monparser) -(defclass cursor () - ((index :type (unsigned-byte 44) :initarg :index :accessor index :initform 0) - (data :type simple-string :initarg :data :reader data :initform ""))) +(defstruct cursor + (start 0 :type (unsigned-byte 44)) + (end 0 :type (unsigned-byte 44)) + (data "" :type simple-string)) -(defun has-data? (cursor) - (< (index cursor) (length (data cursor)))) +(declaim (ftype (function (cursor) boolean) cursor-has-data?)) +(defun cursor-has-data? (cursor) + (< (cursor-end cursor) (length (cursor-data cursor)))) -(defun peek (cursor) - (char (data cursor) - (index cursor))) +(declaim (ftype (function (cursor) boolean) cursor-at-start?)) +(defun cursor-at-start? (cursor) + (= (cursor-start cursor) (cursor-end cursor))) -(defun advance (cursor) - (make-instance 'cursor - :data (data cursor) - :index (+ (index cursor) 1))) +(declaim (ftype (function (cursor) standard-char) cursor-peek)) +(defun cursor-peek (cursor) + (char (cursor-data cursor) + (cursor-end cursor))) +(declaim (ftype (function (cursor) cursor) cursor-advance)) +(defun cursor-advance (cursor) + (make-cursor :data (cursor-data cursor) + :start (cursor-start cursor) + :end (+ (cursor-end cursor) 1))) + +(declaim (ftype (function (cursor) cursor) cursor-rebase)) +(defun cursor-rebase (cursor) + (make-cursor :data (cursor-data cursor) + :start (cursor-end cursor) + :end (cursor-end cursor))) + +(declaim (ftype (function (cursor cursor) cursor) cursor-merge)) +(defun cursor-merge (prev next) + (assert (eq (cursor-data prev) (cursor-data next))) + (make-cursor :data (cursor-data next) + :start (cursor-start prev) + :end (cursor-end next))) + +(declaim (ftype (function (cursor cursor) fixnum) distance)) (defun distance (from to) - (- (index to) - (index from))) + (assert (eq (cursor-data from) (cursor-data to))) + (- (cursor-end to) + (cursor-end from))) -(defun context-window (str index &key (side-length 20)) - (let ((begin (max (- index side-length) 0)) - (end (min (+ index side-length) (length str))) +(declaim (ftype (function (cursor &key (:side-length fixnum)) t) context-window)) +(defun context-window (cursor &key (side-length 20)) + (let ((begin (max (- (cursor-start cursor) side-length) 0)) + (end (min (+ (cursor-end cursor) side-length) (length (cursor-data cursor)))) (result '())) - (push (subseq str (1+ index) end) result) - (push (elt str index) result) - (push (subseq str begin index) result) + (push (subseq (cursor-data cursor) (cursor-end cursor) end) result) + (push (subseq (cursor-data cursor) (cursor-start cursor) (cursor-end cursor)) result) + (push (subseq (cursor-data cursor) begin (cursor-start cursor)) result) result)) +(declaim (ftype (function (cursor) (cons fixnum fixnum)) line-and-column)) +(defun line-and-column (cursor) + (let ((line 1) (column 1)) + (dotimes (i (cursor-end cursor)) + (let ((c (char (cursor-data cursor) i))) + (case c + (#\Newline + (incf line) + (setf column 1)) + (t (incf column))))) + (cons line column))) + (defmethod print-object ((obj cursor) stream) (print-unreadable-object (obj stream :type t) - (let ((str (if (has-data? obj) + (let ((str (if (cursor-has-data? obj) (format nil "~{~a~a~a~}" - (context-window (data obj) - (index obj) - :side-length 10)) + (context-window obj :side-length 10)) "END OF DATA"))) (format stream "~s" (substitute #\~ #\Newline str))))) diff --git a/main.lisp b/main.lisp index 683b70f..b817601 100644 --- a/main.lisp +++ b/main.lisp @@ -2,14 +2,12 @@ (declaim (ftype (function (parser string) result) parse)) (defun parse (parser data) - (funcall parser - (make-instance 'cursor :data data) - (make-instance 'cursor :data data))) + (funcall parser (make-cursor :data data))) (declaim (ftype (function (parser string) parser) append-on-failure)) (defun append-on-failure (p message) - (lambda (start input) - (let ((result (funcall p start input))) + (lambda (input) + (let ((result (funcall p input))) (if (failure-p result) (make-failure :place (failure-place result) :message (concatenate 'string message (failure-message result)) diff --git a/package.lisp b/package.lisp index 80036ab..124c75b 100644 --- a/package.lisp +++ b/package.lisp @@ -3,14 +3,11 @@ (:export #:parse #:defparser #:lazy - #:parsing + #:result-p + #:result-place #:parsing-p #:parsing-tree - #:parsing-start - #:parsing-end - #:failure #:failure-p - #:failure-place #:failure-message #:fail #:unit diff --git a/test.lisp b/test.lisp index b0ffdc2..b1e3f3e 100644 --- a/test.lisp +++ b/test.lisp @@ -37,13 +37,11 @@ ((match) (if (parsing-tree prefix) (fail "Reached prefix") (progn - (format t "prefix start: ~a, end: ~a~&" - (parsing-start prefix) - (parsing-end prefix)) + (format t "prefix place ~a~&" + (result-place prefix)) (unit))))) - (format t "match start: ~a, end: ~a~&" - (parsing-start match) - (parsing-end match)) + (format t "match place: ~a~&" + (result-place match)) match) "ezy") ((many (comp ((prefix (optional (literal "zy"))) -- cgit v1.2.3