(in-package :example)

;; Make sure the right prolog is already loaded!

(eval-when (compile load eval)
  (unless (find "prolog" *modules* :test #'string-equal)
    (warn "A prerelease version of prolog should be loaded before this file.")))

(eval-when (compile load eval)
  (require :aserve)
  (require :sax)
  (require :uri)
  (use-package :prolog)
  (use-package :net.xml.sax)
  (use-package :net.uri))

(eval-when (load)
  (setf prolog::*prolog-stack-limit* 10000))

(defpackage :s (:use))

(defmethod parse ((uri uri) &key uri-to-package package)
  (cleanup
   (car
    (net.xml.sax:parse-to-lxml (net.aserve.client:do-http-request uri)
			       :uri-to-package uri-to-package
			       :package  package))))

(defmethod parse ((stream stream) &key uri-to-package package)
  (cleanup
   (car
    (net.xml.sax:parse-to-lxml stream
			       :content-only t
			       :uri-to-package uri-to-package
			       :package  package))))

(defmethod parse ((pathname pathname) &key uri-to-package package)
  (with-open-file (stream pathname)
    (parse stream :uri-to-package uri-to-package :package package)))

(defun cleanup (element)
  (if (consp element)
      (destructuring-bind (tag &rest contents) element
	(loop with x = contents
	    while x
	    if (and (stringp (car x)) (stringp (cadr x)))
	    do (setf (car x) (concatenate 'string (car x) (cadr x))
		     (cdr x) (cddr x))
	    else do (setf x (cdr x)))
	(cons tag
	      (loop for content in contents
		  if (consp content)
		  collect (cleanup content)
		  else unless (and (stringp content)
				   (every (lambda (c)
					    (member c
						    '(#\return #\linefeed #\space #\tab)))
					  content))
		  collect content)))
    element))

(defparameter *tempest* #p"~smh/src/xml/shakespeare.1.10.xml/tempest.xml")

(defparameter *serfile*
    #+never (net.uri:parse-uri "http://www.nlm.nih.gov/databases/dtd/sersamp2005a.xml")
    #-never #P"./sersamp2005a.xml")

(defparameter *serfile1* #P"./sersamp2005.xml")

;; (defparameter *lxml* (parse *serfile1* :package :s))

;;;;;;;;;; Schema analysis

(defun tag (head) (if (consp head) (car head) head))

(define-modify-macro maxf (value) max)

(defstruct element
  attributes				; List of attributes encountered.
  pcdatap				; If PCDATA occurs, that slot will have type string.
  subelements				; If element content, max count of each subelement.
					; If count > 1, that slot will have type list.
					; We don't yet support MIXED content.
  )

(defun schema-walk-element (element ht)
  (when (consp element)
    (let* ((head (car element))
	   (body (cdr element))
	   (tag (tag head)))
      (unless (gethash tag ht)
	(setf (gethash tag ht) (make-element)))
      (when (consp head)
	(loop for attribute in (cdr head) by #'cddr
	    do (pushnew attribute (element-attributes (gethash tag ht)))))
      (loop for content in body
	  with subs-counts = nil	; Record counts of element content,
	  with pcdatap = nil		; True if pcdata occurs.
	  do (typecase content
	       (cons			; Element content.
		(let* ((content-head (car content))
		       (content-tag (tag content-head)))
		  (incf (getf subs-counts content-tag 0))
		  (schema-walk-element content ht)))
	       (symbol			; Element content.
		(incf (getf subs-counts content 0)))
	       ;; Should concat multiple strings?
	       (string (setf pcdatap t)))
	  finally
	    (let ((element (gethash tag ht)))
	      (when (and pcdatap subs-counts)
		(error "Mixed content: ~s" element))
	      (when (or (and (element-pcdatap element) subs-counts)
			(and (element-subelements element) pcdatap))
		(error "Mixed content clash with previous usage: ~s" element))
	      (if pcdatap
		  (setf (element-pcdatap element) t)
		(loop for (sub count) on subs-counts by #'cddr
		    do (maxf (getf (element-subelements element) sub 0) count))))))))

(defun schema-walk (lxml)
  (let ((ht (make-hash-table :test #'equal)))
    (schema-walk-element lxml ht)
    ;;(maphash (lambda (k v) (format t "~&~a ~a~%" k v)) ht)
    ht))

(defun erase-schema ()
  (loop for sym being the :symbols in :s
      as setf = `(setf ,sym)
      when (find-class sym nil)
      do (setf (find-class sym) nil)
      when (fboundp sym)
      do (fmakunbound sym)
      when (fboundp setf)
      do (fmakunbound setf)))

(set-pprint-dispatch '(cons (member defclass))
  (lambda (stm form)
    (pprint-logical-block (stm form :prefix "(" :suffix ")")
      (pprint-indent :block 3 stm)
      (dotimes (i 2)			; defclass and name
	(write (pprint-pop) :stream stm)
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :miser stm))
      (pprint-fill stm (pprint-pop) t)	; direct superclasses
      (pprint-exit-if-list-exhausted)
      (pprint-indent :block 1 stm)
      (write-char #\space stm)
      (pprint-newline :linear stm)
      (let ((slots (pprint-pop)))
	(pprint-logical-block (stm slots :prefix "(" :suffix ")")
	  (loop
	    (pprint-exit-if-list-exhausted)
	    (let ((slot (pprint-pop)))
	      (if (atom slot)
		  (write slot :stream stm)
		(pprint-logical-block (stm slot :prefix "(" :suffix ")")
		  (pprint-indent :block 1 stm)
		  (write (pprint-pop) :stream stm) ; slot name
		  (write-char #\space stm)
		  (pprint-newline :linear stm)
		  (loop
		    (write (pprint-pop) :stream stm) ; slot option keyword
		    (pprint-exit-if-list-exhausted)
		    (pprint-indent :block 3 stm)
		    (write-char #\space stm)
		    (pprint-newline :fill stm)
		    (write (pprint-pop) :stream stm) ; slot option value
		    (pprint-exit-if-list-exhausted)
		    (pprint-indent :block 1 stm)
		    (write-char #\space stm)
		    (pprint-newline :linear stm)))))
	    (pprint-exit-if-list-exhausted)
	    (write-char #\space stm)
	    (pprint-newline :linear stm)))
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :linear stm))
      (pprint-exit-if-list-exhausted)
      (loop (write (pprint-pop) :stream stm)
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :linear stm))))
  '0 #+never *ipd*)

(defun dump-schema (&key (package :s) (stream *standard-output*))
  (loop for sym being the :symbols in package
      as class = (find-class sym nil)
      when class
      do (prin1 `(defclass ,(class-name class) ()
		   ,(loop for dslotd in (mop:class-direct-slots class)
			collect `(,(mop:slot-definition-name dslotd)
				  ,@(loop for reader in (mop:slot-definition-readers dslotd)
					append `(:reader ,reader))
				  ,@(loop for writer in (mop:slot-definition-writers dslotd)
					append `(:writer ,writer))
				  ;; This can't recover the original lexical environment
				  ;; around the form.
				  ,@(when (mop:slot-definition-initfunction dslotd)
				      `(:initform ,(mop:slot-definition-initform dslotd)))))
		   ,@(unless (eql (class-of class)
				  (load-time-value (find-class 'standard-class)))
		       `((:metaclass ,(class-name (class-of class))))))
		stream)
	 (terpri stream) (terpri stream))
  (values))

(defun dump-schema-pretty (&key (package :s) (stream *standard-output*))
  (flet ((local (sym) (intern (symbol-name sym))))
    (loop for sym being the :symbols in package
        as class = (find-class sym nil)
        when class
        do (prin1
            `(defclass ,(local (class-name class)) ()
               ,(loop for dslotd in (mop:class-direct-slots class)
                    collect `(,(local (mop:slot-definition-name dslotd))
                                ,@(loop for reader in (mop:slot-definition-readers dslotd)
                                      append `(:reader ,(local reader)))
                                ,@(loop for writer in (mop:slot-definition-writers dslotd)
                                      append `(:writer
                                               ,(if (symbolp writer)
                                                    (local writer)
                                                  `(,(car writer)
                                                      ,(local (cadr writer))))))
                                ;; This can't recover the original lexical environment
                                ;; around the form.
                                ,@(when (mop:slot-definition-initfunction dslotd)
                                    `(:initform ,(mop:slot-definition-initform dslotd)))))
               ,@(unless (eql (class-of class)
                              (load-time-value (find-class 'standard-class)))
                   `((:metaclass ,(class-name (class-of class))))))
            stream)
          (terpri stream) (terpri stream)))
  (values))

#+notyet
(set-pprint-dispatch '(cons (member defclass))
  (lambda (stm form)
    (pprint-logical-block (stm form :prefix "(" :suffix ")")
      (pprint-indent :block 3 stm)
      (dotimes (i 2)			; defclass and name
	(write (pprint-pop) :stream stm)
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :miser stm))
      (pprint-fill stm (pprint-pop) t)	; direct superclasses
      (pprint-exit-if-list-exhausted)
      (pprint-indent :block 1 stm)
      (write-char #\space stm)
      (pprint-newline :linear stm)
      (let ((slots (pprint-pop)))
	(pprint-logical-block (stm slots :prefix "(" :suffix ")")
	  (loop
	    (pprint-exit-if-list-exhausted)
	    (let ((slot (pprint-pop)))
	      (if (atom slot)
		  (write slot :stream stm)
		(pprint-logical-block (stm slot :prefix "(" :suffix ")")
		  (pprint-indent :block 1 stm)
		  (write (pprint-pop) :stream stm) ; slot name
		  (write-char #\space stm)
		  (pprint-newline :linear stm)
		  (loop
		    (write (pprint-pop) :stream stm) ; slot option keyword
		    (pprint-exit-if-list-exhausted)
		    (pprint-indent :block 3 stm)
		    (write-char #\space stm)
		    (pprint-newline :fill stm)
		    (write (pprint-pop) :stream stm) ; slot option value
		    (pprint-exit-if-list-exhausted)
		    (pprint-indent :block 1 stm)
		    (write-char #\space stm)
		    (pprint-newline :linear stm)))))
	    (pprint-exit-if-list-exhausted)
	    (write-char #\space stm)
	    (pprint-newline :linear stm)))
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :linear stm))
      (pprint-exit-if-list-exhausted)
      (loop (write (pprint-pop) :stream stm)
	(pprint-exit-if-list-exhausted)
	(write-char #\space stm)
	(pprint-newline :linear stm))))
  '0 #+never *ipd*)


;; XXX What if there is an attribute with the same name as a subelement tag?
;; XXX Need to support MIXED content somehow.

;; If an element has attreibutes and/or element content, define it as a class with a slot
;; for each.  If an element has only PCDATA content, define it as a simple slot with type
;; string.  But if an element has both attributes and pcdata, we need to define the class
;; with the attribute slots, plus an additional slot named pcdata.  This could support
;; MIXED content, except there is still the problem of pcdata split by element content.

(defun nilsource () ())

(defun default-persistentp ()
  (and (find-package :db.allegrocache)
       (symbol-value (find-symbol (load-time-value (symbol-name :*allegrocache*))
				  (find-package :db.allegrocache)))))

(defun compute-schema (ht &optional (persistentp (default-persistentp)))
  (erase-schema)
  (loop for tag being each hash-key of ht
      using (hash-value element)
	    ;; do (format t "~a ~a~%" tag element)
      if (or (element-attributes element) (element-subelements element))
      do (mop:ensure-class
	  tag
	  :metaclass (if persistentp
			 (find-symbol (load-time-value (symbol-name :persistent-class))
				      :db.allegrocache)
		       'standard-class)
	  :direct-slots
	  (append
	   (loop for attribute in (element-attributes element)
	       collect `(:name ,attribute
			       :readers (,attribute)
			       :writers ((setf ,attribute))
			       :initfunction nilsource :initform nil
			       :initargs (,(intern (symbol-name attribute) :keyword))))
	   (when (element-pcdatap element)
	     `((:name pcdata :readers (pcdata) :writers ((setf pcdata)) :initargs (:pcdata)
		      :type string)))
	   (loop for (content count) on (element-subelements element) by #'cddr
	       collect `(:name ,content
			       :readers (,content)
			       :writers ((setf ,content))
			       ,@(if (> count 1)
				     '(:type list)
				   (let ((subelement (gethash content ht)))
				     (if (or (element-attributes subelement)
					     (element-subelements subelement))
					 `(:type ,content)
				       `(:type string))))
			       :initfunction nilsource :initform nil
			       :initargs (,(intern (symbol-name content) :keyword))))))))

(define-modify-macro nconcf (newlist) nconc)

(defun create-walk-element (elem ht)
  (when (stringp elem)
    (return-from create-walk-element elem))
  (when (symbolp elem)
    (return-from create-walk-element (make-instance elem)))
  (when (consp elem)			; What else?
    (let* ((head (car elem))
	   (body (cdr elem))
	   (tag (tag head))
	   (attributes (and (consp head) (cdr head)))
	   (class (find-class tag nil)))
      (if class
	  (let* ((inst (make-instance class))
		 (slots (mop:class-slots class)))
	    (loop for (attribute value) on attributes by #'cddr
		do (setf (slot-value inst attribute) value))
	    (loop for content in body
		do (etypecase content
		     (cons (let* ((content-head (car content))
				  (content-tag (tag content-head))
				  (slotd (find content-tag slots :test #'eq
					       :key 'mop:slot-definition-name))
				  (data (create-walk-element content ht)))
			     #+debug (when (eq content-tag 's::Month)
				       (format t "~s ~s~%" slotd data))
			     (if (eq 'list (mop:slot-definition-type slotd))
				 (nconcf (slot-value inst content-tag) (list data))
			       (setf (slot-value inst content-tag) data))))
		     (symbol (error "Empty leaf??? ~s" elem))
		     (string (setf (slot-value inst 'pcdata) content))))
	    inst)
	(if (and (stringp (car body))
		 (null (cdr body)))
	    (car body)
	  (warn "Strange element should be PCDATA: ~s" elem))))))

(defun objectify-xml (&key (file *serfile*) (persistent (default-persistentp)))
  (let* ((lxml (parse file :package :s))
	 (ht (schema-walk lxml)))
    ;; This first undefines all classes and accessors, then defines them.
    (compute-schema ht persistent)
    (create-walk-element lxml ht)
    ))

#+notyet
(defun objectify-xml (&key (file *serfile*) (persistent (default-persistentp))
                           (package :s))
  (let* ((lxml (parse file :package package))
	 (ht (schema-walk lxml)))
    ;; This first undefines all classes and accessors, then defines them.
    (compute-schema ht persistent)
    (create-walk-element lxml ht)
    ))

; (defparameter *ser* (test))

(<-- (records ?ser ?records)
     (slot= ?ser s::NLMCatalogRecord ?records))

(<-- (record ?record)
     (lisp ?ser *ser*)
     (records ?ser ?records)
     (member ?record ?records))

(<-- (same-month ?r1 ?r2)
     (record ?r1)
     (record ?r2)
     (not (= ?r1 ?r2))
     (slot= ?r1 s::DateCreated ?date-created-1)
     (slot= ?date-created-1 s::Year ?year1)
     (slot= ?date-created-1 s::Month ?month1)
     (slot= ?r2 s::DateCreated ?date-created-2)
     (slot= ?date-created-2 s::Year ?year1)
     (slot= ?date-created-2 s::Month ?month1))

(<-- (title-main ?rec ?title)
     (slot= ?rec s::TitleMain ?title-main)
     (slot= ?title-main s::Title ?title))

(<-- (string-slot-value ?object ?slot-value)
     (lisp ?eslotds (mop:class-slots (class-of ?object)))
     (member ?eslotd ?eslotds)
     (lisp ?value (mop:slot-value-using-class (class-of ?object) ?object ?eslotd))
     (recursive-string-slot-value-1 ?value ?slot-value)
     )

(<-- (recursive-string-slot-value-1 ?value ?slot-value)
     (lispp (stringp ?value))
     (= ?slot-value ?value))

(<-  (recursive-string-slot-value-1 ?value ?slot-value)
     (lispp (listp ?value))
     (member ?value1 ?value)
     (recursive-string-slot-value-1 ?value1 ?slot-value))

(<-  (recursive-string-slot-value-1 ?value ?slot-value)
     (lispp (typep ?value 'standard-object))
     (string-slot-value ?value ?slot-value))

;; This finds all pairs of entries completed in the same month.

#+example
(?- (record ?r1)
    (same-month ?r1 ?r2)
    (title-main ?r1 ?title1)
    (title-main ?r2 ?title2)
    )

;; This finds all entries with any recursive slot content containing the substring
;; "gero", matching variants of gerontology

(defun records-matching-re (re)
  (let ((r 0)				; count records scanned
	(m nil)				; records that match anywhere
	(c 0)				; count text fields scanned
	(g 0)				; count matching strings
	(re (compile-re re :case-fold t)))
    (prolog (record ?r)
	    (lisp (incf r))
	    (string-slot-value ?r ?v)
	    (lisp (incf c))
	    (lispp (match-re re ?v))
	    (title-main ?r ?title)
	    (lisp (progn (incf g)
			 (pushnew r m)
			 (format t "[~d]: ~a~%  ~a~%" r ?title ?v)))
	    )
    (values (nreverse m) r c g)))

#+example
(records-matching-re "laser")

#+example
(records-matching-re "geron")

#+example
(records-matching-re "china")

#+example
(dump-schema)
