#!/bin/sh
main='(module-ref (resolve-module '\''(scripts make-module-catalog)) '\'main')'
exec ${GUILE-guile} -l $0 -c "($main (command-line))" "$@"
!#
;;; make-module-catalog --- Create a .module-catalog file

;;	Copyright (C) 2003,2004 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE.  If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way.  To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.

;;; Author: Thien-Thi Nguyen <ttn@gnu.org>

;;; Commentary:

;; Usage: make-module-catalog [OPTIONS] [DIR ...]
;;
;; Create a module catalog (named ".module-catalog") in each directory DIR.
;; A module catalog is an alist with module names (list of symbols) for the
;; key and CATALOG-INFO, described below, for the value.  OPTIONS are zero
;; or more of the following (defaults in square braces):
;;
;;  -f, --force        -- ignore current catalog entries
;;  -v, --verbose      -- describe each file scanned
;;  -x, --exclude NAME -- do not scan NAME, which may be either a filename
;;                        or a directory name (also excludes children);
;;                        this option may be given multiple times
;;  -X, --xregexp RX   -- do not scan files or directories that match
;;                        regular expression RX (may include path sep char);
;;                        this option may be given multiple times
;;  -d, --dot-ext EXTS -- like -X "[.]foo$" for every foo in comma-sep EXTS;
;;                        this option may be given multiple times
;;  -o, --output STEM  -- write to DIR/STEM [".module-catalog"]
;;  -b, --bufsize NUM  -- use NUM bytes for the scan buffer [256]
;;  -t, --trim PREFIX  -- delete PREFIX from each filename
;;
;; Note that STEM may not include a directory component.  Also, the minimum
;; buffer size is 128; a specified NUM less than that is silently adjusted.
;;
;; Each catalog entry has the form:
;;
;;   (MODULE-NAME [LOAD-TYPE EXTRA-INFO...] . FILENAME)
;;
;; MODULE-NAME is a list of symbols, such as: (scripts slurp).  If LOAD-TYPE
;; and EXTRA-INFO... are omitted, the module is taken to be loadable as plain
;; text (human-readable) Scheme code with a `define-module' form starting in
;; column 0.  Otherwise, LOAD-TYPE is a symbol and EXTRA-INFO... is other
;; information useful in loading a module.  Here is a list of possible values
;; for LOAD-TYPE:
;;
;;   scm_init_module -- module is a shared object library following the
;;                      "scm_init_MODULE_NAME_module" convention outlined
;;                      in the Guile manual; EXTRA-INFO is a string naming
;;                      the C init function followed by a (possibly empty)
;;                      list of upstream module names
;;
;; FILENAME is a string, the absolute path to the file that provides the
;; interface to the module.  (This file may also provide the definitions, or
;; implementation, of the module, although that is not required.)
;;
;; There are two special catalog entries whose keys are not module names:
;;
;;   **exclude**  -- list of subdirectories excluded from this catalog,
;;                   as specified by the `--exclude' command-line option
;;
;;   **catfmtv**  -- vector, the first element of which is the version of
;;                   the catalog format (at this time: 1); and remaining
;;                   elements catfmtv-specific info (for version 1: none)
;;
;; The asterisks (*) are part of the symbol name.
;;
;; Collisions within a single catalog are flagged and resolved if possible in
;; favor of the most recent existing FILENAME.  If not possible, signal error.
;;
;; Normally, FILENAME is written out in full.  This can be problematic for
;; installations that use a $(DESTDIR)$(foodir) style prefix, because the
;; desired effective directory is solely $(foodir), without $(DESTDIR).  You
;; can use `--trim $(DESTDIR)' in this case, to omit the $(DESTDIR) prefix.
;;
;;
;; Acknowledgements: The idea and style of the module catalog are directly
;;                   inspired by SLIB's slibcat.  Thanks, Aubrey Jaffer!
;;
;;
;; TODO: Factor bounded-space grep into its own module.
;;       Make scanner extensible.
;;       Make collision resolution extensible.

;;; Code:

(define-module (scripts make-module-catalog)
  #:autoload (scripts PROGRAM2) (HVQC-MAIN)
  #:autoload (ice-9 ftw) (nftw)
  #:autoload (scripts slurp) (slurp-file!)
  #:autoload (ice-9 regex) (match:start match:end)
  #:autoload (scripts scan-md-module) (scan-md-module-file))

(define get object-property)
(define put set-object-property!)
(define sub make-shared-substring)

;; scanning

(define (make-scanner bufsize)
  (set! bufsize (max bufsize 128))
  (let ((buf (make-string bufsize))
        (rx (make-regexp "^\\(define-module *(\\([^()]*\\))" regexp/newline))
        (backtrack 64))                 ; this suits rx
    ;; rv
    (lambda (dir filename statinfo flag)
      (cond ((not (eq? 'regular flag))
             (list #f flag))
            ((> 19 (stat:size statinfo))
             (list #f 'too-small))
            ;; machine-dependent libraries
            ((scan-md-module-file filename))
            ;; everything else
            ((let ((initial-slurp 8))   ; maintain me
               (slurp-file! buf filename 0 initial-slurp 0)
               (string=? "!<arch>" (sub buf 0 7)))
             (list #f 'ar-archive))
            ((and (char=? #\# (string-ref buf 0))
                  (not (char=? #\! (string-ref buf 1))))
             (list #f 'unixoid-text-config))
            ((let ((p (open-input-file filename))
                   (fsize (stat:size statinfo)))
               (let loop ((start 0))
                 (and start
                      (let* ((left (let ((diff (- (+ start bufsize) fsize)))
                                     (and (< 0 diff)
                                          (- fsize start))))
                             (fill (or left bufsize))
                             ;; We end-justify so that `regexp-exec' never
                             ;; sees the previous `buf' contents, saving us a
                             ;; defensive (and expensive) `string-fill!'.
                             (fpos (- bufsize fill)))
                        (slurp-file! buf p start fill fpos)
                        (or (regexp-exec rx buf fpos)
                            (loop (if left
                                      #f
                                      (+ start (- bufsize backtrack)))))))))
             => (lambda (m)
                  (list (with-input-from-string
                            (sub buf (match:start m 1) (match:end m 1))
                          read))))
            (else (list #f 'unrecognized))))))

(define (pair->list p) (cond ((pair? p) (cons (car p) (pair->list (cdr p)))) (else (cons p '()))))

(define (make-module-catalog options dir)
  (or (file-exists? dir) (error "no such file:" dir))
  (or (file-is-directory? dir) (error "not a directory:" dir))
  (let* ((fn (or (assq-ref options 'output) ".module-catalog"))
         (out (in-vicinity dir fn))
         (verbose (assq 'verbose options))
         (exclude (delete dir (or (assq-ref options 'exclude) '())))
         (xregexp (cond ((assq 'xregexp options)
                         => (lambda (look)
                              (map make-regexp (cdr (pair->list look)))))
                        (else #f)))
         (cur (and (not (assq-ref options 'force))
                   (file-exists? out)
                   (let* ((out-mtime (stat:mtime (stat out)))
                          (p (open-input-file out))
                          (ls (read p))
                          (ht (make-hash-table 31)))
                     (close-port p)
                     (or (list? ls)
                         (error "bad file format: ~A" out))
                     (for-each (lambda (prev)
                                 (let* ((file (cdr (last-pair prev)))
                                        (mt (and file (string? file)
                                                 (file-exists? file)
                                                 (stat:mtime (stat file)))))
                                   (and mt (< mt out-mtime)
                                        (put file #:mt mt)
                                        (hash-set! ht file prev))))
                               ls)
                     ht)))
         (cat '())
         (scan (make-scanner (cond ((assq-ref options 'bufsize)
                                    => (lambda (s)
                                         (if (string? s)
                                             (string->number s)
                                             s)))
                                   (else 256))))
         (mask #f)
         (orphans '())
         (new 0) (ignored 0) (carried 0))
    (and (assq-ref options 'from-shell)
         (format #t "~A ~A\n" (if (file-exists? out)
                                  (if (assq-ref options 'force)
                                      "Overwriting"
                                      "Updating")
                                  "Making")
                 out))
    ;; tree walk
    (nftw dir
          (lambda (filename statinfo flag base level)
            (let* ((xk? (let ((rv (and mask (> level mask)))) ; excluded kid
                          (and mask (not rv) (set! mask #f))
                          rv))
                   (res (cond (xk? (list #f 'child-of-excluded level))
                              ((and cur (hash-ref cur filename #f))
                               => (lambda (prev)
                                    (set! carried (1+ carried))
                                    (set! cat (cons prev cat))
                                    (list #f 'previously-scanned)))
                              ((and (not mask) (member filename exclude))
                               (and (eq? 'directory flag) (set! mask level))
                               (set! orphans (cons filename orphans))
                               (list #f 'excluded level))
                              ((and xregexp
                                    (let loop ((ls xregexp))
                                      (if (null? ls)
                                          #f
                                          (or (regexp-exec (car ls) filename)
                                              (loop (cdr ls))))))
                               => (lambda (m)
                                    (list #f 'excluded-regexp
                                          (sub filename
                                               (match:start m)
                                               (match:end m)))))
                              (else
                               (scan dir filename statinfo flag)))))
              (cond ((car res)
                     (set! new (1+ new))
                     (put filename #:mt (stat:mtime statinfo))
                     (set! cat (cons `(,@res . ,filename) cat)))
                    (else (set! ignored (1+ ignored))))
              (and verbose (format #t " ~A => ~A\n"
                                   filename (or (car res)
                                                (format #f "ignored ~A"
                                                        (cdr res))))))
            #t)                         ; keep going
          'physical)
    ;; collision resolution
    (let ((ht (make-hash-table 53)))
      (define (most-recent a a-file b b-file)
        (case (+ (if (file-exists? a-file) 1 0)
                 (if (file-exists? b-file) 2 0))
          ((0) (error "both b-file and a-file don't exist"
                      (list #:b-file b-file #:a-file a-file)))
          ((1) a)
          ((2) b)
          ((3) (let ((imt (get a-file #:mt))
                     (pmt (get b-file #:mt)))
                 (if (< imt pmt)
                     b
                     a)))))
      (for-each
       (lambda (ent)
         (let* ((name (car ent))
                (this (cdr ent))
                (handle (hash-get-handle ht name)))
           (cond ((member name '((guile) (guile-user))))        ;;; ignore
                 ((not handle) (hash-set! ht name (cdr ent)))   ;;; ok
                 (else                                          ;;; collision
                  (let ((prev (cdr handle))
                        (done #f))
                    (format #t "collision: ~A\n this: ~S\n prev: ~S\n"
                            name this prev)
                    (cond ((and (string? this) (string? prev)
                                (most-recent this this prev prev))
                           => (lambda (choice)
                                (set-cdr! handle choice)
                                (set! done 'most-recent)))
                          ((and (pair? this) (eq? 'scm_init_module (car this))
                                (pair? prev) (eq? 'scm_init_module (car prev))
                                (most-recent this (cdr (last-pair this))
                                             prev (cdr (last-pair prev))))
                           => (lambda (choice)
                                (set-cdr! handle choice)
                                (set! done 'most-recent)))
                          (else (error "unhandled collision")))
                    (if done
                        (format #t " prio: ~A\n    => ~S\n" done (cdr handle))
                        (error "unhandled collision")))))))
       cat)
      (set! cat (hash-fold acons '() ht)))
    ;; output
    (let* ((p (open-output-file out))
           (trim (assq-ref options 'trim))
           (tlen (and trim (string-length trim))))
      (format p ";;; ~A\n" out)
      (format p ";;; generated ~A UTC -- do not edit!\n\n"
              (strftime "%Y-%m-%d %H:%M:%S" (gmtime (current-time))))
      (format p "(\n")
      (format p " ~S\n" (cons '**catfmtv** (vector 1)))
      (format p " ~S\n" (cons '**exclude** orphans))
      (for-each (lambda (x)
                  (and trim (let* ((lp (last-pair x))
                                   (fn (cdr lp)))
                              (and (string=? trim (substring fn 0 tlen))
                                   (set-cdr! lp (substring fn tlen)))))
                  (format p " ~S\n" x))
                cat)
      (format p ")\n\n")
      (format p ";;; ~A ends here\n" out)
      (close-port p))
    (set! ignored (- ignored carried))
    (and (assq-ref options 'from-shell)
         (format #t " => Done ~A.\n"
                 `(,new new ,carried carried-over ,ignored ignored)))
    (list new carried ignored)))

(define (make-module-catalog/qop qop)
  (let* ((options '())
         (chk! (lambda (key)
                 (qop key (lambda (val)
                            (set! options (acons key val options)))))))
    (for-each chk! '(force verbose output exclude xregexp bufsize trim))
    (qop 'dot-ext
         (lambda (comma-sep-ext-ls)
           (let ((acc '()))
             (define (mimic-X ext)
               (set! acc (cons (format #f "[.]~A$" ext) acc)))
             (for-each (lambda (raw)
                         (let loop ((start 0))
                           (cond ((string-index raw #\, start)
                                  => (lambda (split)
                                       (mimic-X (sub raw start split))
                                       (loop (1+ split))))
                                 (else
                                  (mimic-X (sub raw start))))))
                       (cons comma-sep-ext-ls '()))
             (let ((cur (assq 'xregexp options)))
               (if cur
                   (set-cdr! cur (append acc (cdr cur)))
                   (set! options (acons 'xregexp acc options)))))))
    (qop 'output (lambda (name)
                   (and (string-index name #\/)
                        (error "cannot include directory:" name))))
    (cond ((assq 'exclude options)
           => (lambda (cell)
                (set-cdr! cell (append (qop '()) (cdr cell)))))
          (else (set! options (acons 'exclude (qop '()) options))))
    (set! options (acons 'from-shell #t options))
    (let ((do-it! (lambda (dir)
                    (make-module-catalog options dir))))
      (for-each do-it! (qop '())))))

(define (main args)
  (HVQC-MAIN
   args make-module-catalog/qop
   '(usage . commentary)
   '(package . "Guile")
   '(option-spec (force   (single-char #\f))
                 (verbose (single-char #\v))
                 (output  (single-char #\o) (value #t))
                 (dot-ext (single-char #\d) (value #t))
                 (exclude (single-char #\x) (value #t))
                 (xregexp (single-char #\X) (value #t))
                 (bufsize (single-char #\b) (value #t))
                 (trim    (single-char #\t) (value #t)))))

;;; make-module-catalog ends here
