diff options
-rw-r--r-- | base.lisp | 51 | ||||
-rw-r--r-- | core.lisp | 83 | ||||
-rw-r--r-- | extra.lisp | 63 | ||||
-rw-r--r-- | input.lisp | 57 | ||||
-rw-r--r-- | main.lisp | 11 | ||||
-rw-r--r-- | monparser.asd | 5 | ||||
-rw-r--r-- | package.lisp | 4 | ||||
-rw-r--r-- | parser.lisp | 214 |
8 files changed, 228 insertions, 260 deletions
diff --git a/base.lisp b/base.lisp new file mode 100644 index 0000000..8206322 --- /dev/null +++ b/base.lisp @@ -0,0 +1,51 @@ +(in-package #:monparser) + +(defstruct parsing + tree + left + limit) + +(defstruct failure + place + message) + +(defun lazy-parsing-p (r) + (or (functionp r) + (parsing-p r))) + +(defun new (tree) + (lambda (input &key limit lazy) + (declare (ignore lazy)) + (if (and limit (> limit 0)) + (make-failure :place input + :message (format nil "Didn't reach expected limit: ~a." limit)) + (make-parsing :tree tree :left input)))) + +(defun bind (p f &key (greedy t)) + (lambda (input &key limit lazy) + (let (r) + (if greedy + (setf r (funcall p input :limit limit)) + (let ((next-parser (funcall f nil input)) + (limit -1)) + (do ((sweep-input input (advance sweep-input))) + ((or (not (has-data? sweep-input)) + (> limit -1)) nil) + (when (lazy-parsing-p (funcall next-parser sweep-input :lazy t)) + (setf limit (cursor-distance sweep-input input)))) + (if (< limit 0) + (setf r (make-failure :place input + :message "Reached end of input while sweeping.")) + (setf r (funcall p input :limit limit))))) + (if (parsing-p r) + (if lazy + (lambda (ignored-input &key lazy limit) + (declare (ignore ignored-input limit)) + (funcall (funcall f (parsing-tree r) input) + (parsing-left r) + :lazy lazy + :limit (if greedy (parsing-limit r)))) + (funcall (funcall f (parsing-tree r) input) + (parsing-left r) + :limit (if greedy (parsing-limit r)))) + r)))) diff --git a/core.lisp b/core.lisp new file mode 100644 index 0000000..d0955fe --- /dev/null +++ b/core.lisp @@ -0,0 +1,83 @@ +(in-package #:monparser) + +(defun fail (message) + (lambda (input &key limit lazy) + (make-failure :place input :message message))) + +(defmacro unit (&optional predicate) + (cond ((null predicate) + (setf predicate '(characterp it))) + ((symbolp predicate) + (setf predicate `(,predicate it))) + ((characterp predicate) + (setf predicate `(char-equal ,predicate it))) + (t (setf predicate + (nsubst-if 'it + (lambda (x) + (and (symbolp x) + (string-equal (symbol-name x) "IT"))) predicate)))) + `(lambda (input &key limit lazy) + (declare (ignore lazy)) + (if (and limit (<= limit 0)) + (make-failure :place input :message "Reached established limit.") + (if (has-data? input) + (let ((it (peek input))) + (if ,predicate + (make-parsing :tree it :left (advance input) :limit (if limit (1- limit))) + (make-failure :place input + :message (format nil "Expected: ~a, Got: ~a" ',predicate it)))) + (make-failure :place input :message "Reached end of input."))))) + +(defun one-of (first-parser second-parser &rest other-parsers) + (lambda (input &key limit lazy) + (declare (ignore lazy)) + (labels ((one-of-rec (parsers) + (let ((intermediate-parsers '()) + (result nil)) + (dolist (p parsers) + (let ((r (funcall p + input + :lazy (> (length parsers) 1) + :limit limit))) + (cond ((functionp r) + (push r intermediate-parsers)) + ((parsing-p r) + (when (or (not (parsing-p result)) + (> (cursor (parsing-left r)) + (cursor (parsing-left result)))) + (setf result r))) + ((failure-p r) + (when (or (failure-p result) + (= (length parsers) 1)) + (setf result r)))))) + (if intermediate-parsers + (one-of-rec intermediate-parsers) + result)))) + (one-of-rec (cons first-parser (cons second-parser other-parsers)))))) + +(defmacro comp (bindings &body body) + (if (null bindings) + `(new (progn ,@body)) + (let ((var (first (car bindings))) + (parser (second (car bindings))) + (lazy (third (car bindings))) + (unused (gensym))) + (if (symbolp var) + (if (string= (symbol-name var) "_") + `(bind ,parser + (lambda (&rest ,unused) + (declare (ignore ,unused)) + (comp ,(cdr bindings) ,@body)) + :greedy ,(not lazy)) + `(bind ,parser + (lambda (,var &rest ,unused) + (declare (ignore ,unused)) + (comp ,(cdr bindings) ,@body)) + :greedy ,(not lazy))) + (if (and (consp var) (symbolp (car var)) (symbolp (cdr var))) + `(bind ,parser + (lambda (,(car var) ,(cdr var) &rest ,unused) + (declare (ignore ,unused)) + (comp ,(cdr bindings) ,@body)) + :greedy ,(not lazy)) + (error "Binding must be either a symbol or a cons of symbols.")))))) diff --git a/extra.lisp b/extra.lisp new file mode 100644 index 0000000..916465b --- /dev/null +++ b/extra.lisp @@ -0,0 +1,63 @@ +(in-package #:monparser) + +(defmacro literal (word) + (when (not (stringp word)) + (error "Literal only accepts strings as input.")) + (let ((binding-list '()) + (name-list '())) + (loop :for c :across word :do + (when c + (let ((name (gensym))) + (push name name-list) + (push `(,name (unit ,c)) binding-list)))) + `(comp ,(reverse binding-list) + (coerce ,(cons 'list (reverse name-list)) 'string)))) + +(defparameter nothing + (new nil)) + +(defun optional (p) + (one-of p nothing)) + +(defun many (p) + (comp ((x p) + (xs (if (not x) + (fail "Parsing result is empty.") + (optional (many p))))) + (cons x xs))) + +(defun repeat (p min &optional (max 0)) + (if (> min 0) + (comp ((x p) + (xs (repeat p (1- min) (1- max)))) + (cons x xs)) + (if (> max 0) + (comp ((x (optional p)) + (xs (repeat p 0 (if x (1- max) 0)))) + (if x (cons x xs) x)) + nothing))) + +(defun whitespace? (x) + (some (lambda (y) (char= x y)) '(#\Space #\Newline #\Tab))) + +(defparameter whitespace + (comp ((_ (optional (many (unit whitespace?))))) + :whitespace)) + +(defun separated-list (p separator &key include-separator) + (comp ((v p) + (sep (optional separator)) + (vn (if sep + (separated-list p separator) + nothing))) + (if include-separator + (cons v (cons sep vn)) + (cons v vn)))) + +(defun surrounded (left p right &key include-surrounding) + (comp ((l left) + (body p :lazy) + (r right)) + (if include-surrounding + (list l body r) + body))) @@ -1,53 +1,20 @@ (in-package #:monparser) -(defclass input () - ((cursor :initarg :cursor :accessor cursor :initform 0) - (file :initarg :file :reader file :initform nil) - (data :initarg :data :reader data :initform nil))) +(defclass parser-input () + ((cursor :initarg :cursor :accessor input-cursor :initform 0) + (data :initarg :data :reader input-data :initform nil))) (defun has-data? (input) - (< (cursor input) (length (data input)))) - -(defun prefix? (target input) - (string= target - (data input) - :start2 (cursor input) - :end2 (min (+ (cursor input) (length target)) - (length (data input))))) + (< (input-cursor input) (length (input-data input)))) (defun peek (input) - (char (data input) - (cursor input))) - -(defun advance (input &optional (amount 1)) - (make-instance 'input - :data (data input) - :file (file input) - :cursor (+ (cursor input) amount))) - -(defun input-sub (input1 input2) - (- (cursor input1) (cursor input2))) - -(defun from-string (str) - (make-instance 'input :data str)) - -(defun read-file (path) - (with-open-file (file path) - (let* ((size (file-length file)) - (buf (make-string size))) - (read-sequence buf file) - buf))) + (char (input-data input) + (input-cursor input))) -(defun from-file (filename) - (make-instance 'input :file filename :data (read-file filename))) +(defun advance (input) + (make-instance 'parser-input + :data (input-data input) + :cursor (+ (input-cursor input) 1))) -(defun line-and-column (input) - (let ((line 1) (column 1)) - (dotimes (i (cursor input)) - (let ((c (char (data input) i))) - (case c - (#\Newline - (incf line) - (setf column 1)) - (t (incf column))))) - (values line column))) +(defun cursor-distance (input1 input2) + (- (input-cursor input1) (input-cursor input2))) diff --git a/main.lisp b/main.lisp new file mode 100644 index 0000000..6357cc4 --- /dev/null +++ b/main.lisp @@ -0,0 +1,11 @@ +(in-package #:monparser) + +(defun parse (parser data) + (let* ((result (funcall parser + (make-instance 'parser-input + :cursor 0 + :data data)))) + (if (parsing-p result) + (let ((finished? (not (has-data? (parsing-left result))))) + (values (parsing-tree result) finished?)) + result))) diff --git a/monparser.asd b/monparser.asd index 2aa7290..aa5bc2c 100644 --- a/monparser.asd +++ b/monparser.asd @@ -3,4 +3,7 @@ :components ((:file "package") (:file "input") - (:file "parser"))) + (:file "base") + (:file "core") + (:file "extra") + (:file "main"))) diff --git a/package.lisp b/package.lisp index 035cf77..c59f997 100644 --- a/package.lisp +++ b/package.lisp @@ -1,6 +1,10 @@ (defpackage #:monparser (:use #:cl) (:export #:parse + #:failure-place + #:failure-message + #:input-cursor + #:input-data #:comp #:one-of #:unit diff --git a/parser.lisp b/parser.lisp deleted file mode 100644 index c627af1..0000000 --- a/parser.lisp +++ /dev/null @@ -1,214 +0,0 @@ -(in-package #:monparser) - -(defun parse-string (p input) - (let ((result (funcall p (from-string input)))) - (if (parsing-p result) - (parsing-tree result) - result))) - -(defun parse-file (p input) - (let ((result (funcall p (from-file input)))) - (if (parsing-p result) - (parsing-tree result) - result))) - -(defstruct parsing - tree - left - limit) - -(defun lazy-parsing-p (r) - (or (functionp r) - (parsing-p r))) - -(defstruct failure - place - message) - -(defmethod print-object ((obj failure) stream) - (let ((file (file (failure-place obj)))) - (if file - (multiple-value-bind (line column) (line-and-column (failure-place obj)) - (format stream "~a:~a:~a: ~a" line column file (failure-message obj))) - (format stream "~a: ~a" (cursor (failure-place obj)) (failure-message obj))))) - -(defun new (tree) - (lambda (input &key limit lazy) - (declare (ignore lazy)) - (if (and limit (> limit 0)) - (make-failure :place input - :message (format nil "Didn't reach expected limit: ~a." limit)) - (make-parsing :tree tree :left input)))) - -(defun bind (p f &key (greedy t)) - (lambda (input &key limit lazy) - (let (r) - (if greedy - (setf r (funcall p input :limit limit)) - (let ((next-parser (funcall f nil input)) - (limit -1)) - (do ((sweep-input input (advance sweep-input))) - ((or (not (has-data? sweep-input)) - (> limit -1)) nil) - (when (lazy-parsing-p (funcall next-parser sweep-input :lazy t)) - (setf limit (input-sub sweep-input input)))) - (if (< limit 0) - (setf r (make-failure :place input - :message "Reached end of input while sweeping.")) - (setf r (funcall p input :limit limit))))) - (if (parsing-p r) - (if lazy - (lambda (ignored-input &key lazy limit) - (declare (ignore ignored-input limit)) - (funcall (funcall f (parsing-tree r) input) - (parsing-left r) - :lazy lazy - :limit (if greedy (parsing-limit r)))) - (funcall (funcall f (parsing-tree r) input) - (parsing-left r) - :limit (if greedy (parsing-limit r)))) - r)))) - -(defmacro comp (bindings &body body) - (if (null bindings) - `(new (progn ,@body)) - (let ((var (first (car bindings))) - (parser (second (car bindings))) - (lazy (third (car bindings))) - (unused (gensym))) - (if (symbolp var) - (if (string= (symbol-name var) "_") - `(bind ,parser - (lambda (&rest ,unused) - (declare (ignore ,unused)) - (comp ,(cdr bindings) ,@body)) - :greedy ,(not lazy)) - `(bind ,parser - (lambda (,var &rest ,unused) - (declare (ignore ,unused)) - (comp ,(cdr bindings) ,@body)) - :greedy ,(not lazy))) - (if (and (consp var) (symbolp (car var)) (symbolp (cdr var))) - `(bind ,parser - (lambda (,(car var) ,(cdr var) &rest ,unused) - (declare (ignore ,unused)) - (comp ,(cdr bindings) ,@body)) - :greedy ,(not lazy)) - (error "Binding must be either a symbol or a cons of symbols.")))))) - -(defun one-of (first-parser second-parser &rest other-parsers) - (lambda (input &key limit lazy) - (declare (ignore lazy)) - (labels ((one-of-rec (parsers) - (let ((intermediate-parsers '()) - (result nil)) - (dolist (p parsers) - (let ((r (funcall p - input - :lazy (> (length parsers) 1) - :limit limit))) - (cond ((functionp r) - (push r intermediate-parsers)) - ((parsing-p r) - (when (or (not (parsing-p result)) - (> (cursor (parsing-left r)) - (cursor (parsing-left result)))) - (setf result r))) - ((failure-p r) - (when (or (failure-p result) - (= (length parsers) 1)) - (setf result r)))))) - (if intermediate-parsers - (one-of-rec intermediate-parsers) - result)))) - (one-of-rec (cons first-parser (cons second-parser other-parsers)))))) - -(defun fail (message) - (lambda (input &key limit lazy) - (make-failure :place input :message message))) - -(defmacro unit (&optional predicate) - (cond ((null predicate) - (setf predicate '(characterp it))) - ((symbolp predicate) - (setf predicate `(,predicate it))) - ((characterp predicate) - (setf predicate `(char-equal ,predicate it))) - (t (setf predicate - (nsubst-if 'it - (lambda (x) - (and (symbolp x) - (string-equal (symbol-name x) "IT"))) predicate)))) - `(lambda (input &key limit lazy) - (declare (ignore lazy)) - (if (and limit (<= limit 0)) - (make-failure :place input :message "Reached established limit.") - (if (has-data? input) - (let ((it (peek input))) - (if ,predicate - (make-parsing :tree it :left (advance input) :limit (if limit (1- limit))) - (make-failure :place input - :message (format nil "Expected: ~a, Got: ~a" ',predicate it)))) - (make-failure :place input :message "Reached end of input."))))) - -(defmacro literal (word) - (when (not (stringp word)) - (error "Literal only accepts strings as input.")) - (let ((binding-list '()) - (name-list '())) - (loop :for c :across word :do - (when c - (let ((name (gensym))) - (push name name-list) - (push `(,name (unit ,c)) binding-list)))) - `(comp ,(reverse binding-list) - (coerce ,(cons 'list (reverse name-list)) 'string)))) - -(defparameter nothing - (new nil)) - -(defun optional (p) - (one-of p nothing)) - -(defun many (p) - (comp ((x p) - (xs (if (not x) - (fail "Parsing result is empty.") - (optional (many p))))) - (cons x xs))) - -(defun repeat (p min &optional (max 0)) - (if (> min 0) - (comp ((x p) - (xs (repeat p (1- min) (1- max)))) - (cons x xs)) - (if (> max 0) - (comp ((x (optional p)) - (xs (repeat p 0 (if x (1- max) 0)))) - (if x (cons x xs) x)) - nothing))) - -(defun whitespace? (x) - (some (lambda (y) (char= x y)) '(#\Space #\Newline #\Tab))) - -(defparameter whitespace - (comp ((_ (optional (many (unit whitespace?))))) - :whitespace)) - -(defun separated-list (p separator &key include-separator) - (comp ((v p) - (sep (optional separator)) - (vn (if sep - (separated-list p separator) - nothing))) - (if include-separator - (cons v (cons sep vn)) - (cons v vn)))) - -(defun surrounded (left p right &key include-surrounding) - (comp ((l left) - (body p :lazy) - (r right)) - (if include-surrounding - (list l body r) - body))) |