Skip to content
Snippets Groups Projects
Commit 12129998 authored by Mark H Weaver's avatar Mark H Weaver
Browse files

union: Rewrite to be faster; handle symlink/directory conflicts.

* guix/build/union.scm: Rewrite; only 'file=?' remains unchanged.  Remove
  'tree-union' and 'delete-duplicate-leaves' exports.  Merge inputs in a
  breadth-first fashion.  Follow symlinks for purposes of making decisions
  about the merge.

* tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leaves'.
parent 8ead71b4
No related branches found
No related tags found
No related merge requests found
;;; GNU Guix --- Functional package management for GNU ;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2013, 2014 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2012, 2013, 2014 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014 Mark H Weaver <mhw@netris.org>
;;; ;;;
;;; This file is part of GNU Guix. ;;; This file is part of GNU Guix.
;;; ;;;
...@@ -17,16 +18,13 @@ ...@@ -17,16 +18,13 @@
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix build union) (define-module (guix build union)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match) #:use-module (ice-9 match)
#:use-module (ice-9 format) #:use-module (ice-9 format)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
#:use-module (rnrs bytevectors) #:use-module (rnrs bytevectors)
#:use-module (rnrs io ports) #:use-module (rnrs io ports)
#:export (tree-union #:export (union-build))
delete-duplicate-leaves
union-build))
;;; Commentary: ;;; Commentary:
;;; ;;;
...@@ -35,72 +33,20 @@ (define-module (guix build union) ...@@ -35,72 +33,20 @@ (define-module (guix build union)
;;; ;;;
;;; Code: ;;; Code:
(define (tree-union trees) (define (files-in-directory dirname)
"Return a tree that is the union of the trees listed in TREES. Each (let ((dir (opendir dirname)))
tree has the form (PARENT LEAVES ...) or just LEAF, where each leaf is (let loop ((files '()))
itself a tree. " (match (readdir dir)
(let loop ((trees trees)) ((or "." "..")
(match trees (loop files))
(() ; nothing left ((? eof-object?)
'()) (closedir dir)
(_ (sort files string<?))
(let ((dirs (filter pair? trees)) (file
(leaves (remove pair? trees))) (loop (cons file files)))))))
`(,@leaves
,@(fold (lambda (dir result) (define (file-is-directory? file)
(cons `(,dir (eq? 'directory (stat:type (stat file))))
,@(loop
(concatenate
(filter-map (match-lambda
((head children ...)
(and (equal? head dir)
children)))
dirs))))
result))
'()
(delete-duplicates (map car dirs)))))))))
(define* (delete-duplicate-leaves tree
#:optional
(leaf=? equal?)
(delete-duplicates (match-lambda
((head _ ...) head))))
"Delete duplicate leaves from TREE. Two leaves are considered equal
when LEAF=? applied to them returns #t. Each collision (list of leaves
that are LEAF=?) is passed to DELETE-DUPLICATES, which must return a
single leaf."
(let loop ((tree tree))
(match tree
((dir children ...)
(let ((dirs (filter pair? children))
(leaves (remove pair? children)))
(define collisions
(fold (lambda (leaf result)
(define same?
(cut leaf=? leaf <>))
(if (any (cut find same? <>) result)
result
(match (filter same? leaves)
((_)
result)
((collision ...)
(cons collision result)))))
'()
leaves))
(define non-collisions
(filter (lambda (leaf)
(match (filter (cut leaf=? leaf <>) leaves)
((_) #t)
((_ _ ..1) #f)))
leaves))
`(,dir
,@non-collisions
,@(map delete-duplicates collisions)
,@(map loop dirs))))
(leaf leaf))))
(define (file=? file1 file2) (define (file=? file1 file2)
"Return #t if FILE1 and FILE2 are regular files and their contents are "Return #t if FILE1 and FILE2 are regular files and their contents are
...@@ -124,110 +70,82 @@ (define buf2 (make-bytevector len)) ...@@ -124,110 +70,82 @@ (define buf2 (make-bytevector len))
(or (eof-object? n1) (or (eof-object? n1)
(loop)))))))))))) (loop))))))))))))
(define* (union-build output directories (define* (union-build output inputs
#:key (log-port (current-error-port))) #:key (log-port (current-error-port)))
"Build in the OUTPUT directory a symlink tree that is the union of all "Build in the OUTPUT directory a symlink tree that is the union of all
the DIRECTORIES." the INPUTS."
(define (file-tree dir)
;; Return the contents of DIR as a tree. (define (symlink* input output)
(format log-port "`~a' ~~> `~a'~%" input output)
(define (others-have-it? subdir) (symlink input output))
;; Return #t if other elements of DIRECTORIES have SUBDIR.
(let ((subdir (substring subdir (string-length dir)))) (define (resolve-collisions output dirs files)
(any (lambda (other) (cond ((null? dirs)
(and (not (string=? other dir)) ;; The inputs are all files.
(file-exists? (string-append other "/" subdir)))) (format (current-error-port)
directories))) "warning: collision encountered: ~{~a ~}~%"
files)
(match (file-system-fold (lambda (subdir stat result) ; enter?
;; No need to traverse DIR since there's (let ((file (first files)))
;; nothing to union it with. Thus, we avoid ;; TODO: Implement smarter strategies.
;; creating a gazillon symlinks (think (format (current-error-port)
;; share/emacs/24.3, share/texmf, etc.) "warning: arbitrarily choosing ~a~%"
(or (string=? subdir dir) file)
(others-have-it? subdir)))
(lambda (file stat result) ; leaf (symlink* file output)))
(match result
(((siblings ...) rest ...) (else
`((,file ,@siblings) ,@rest)))) ;; The inputs are a mixture of files and directories
(lambda (dir stat result) ; down (error "union-build: collision between file and directories"
`(() ,@result)) `((files ,files) (dirs ,dirs))))))
(lambda (dir stat result) ; up
(match result (define (union output inputs)
(((leaves ...) (siblings ...) rest ...) (match inputs
`(((,(basename dir) ,@leaves) ,@siblings) ((input)
,@rest)))) ;; There's only one input, so just make a link.
(lambda (dir stat result) ; skip (symlink* input output))
;; DIR is not available elsewhere, so treat it (_
;; as a leaf. (call-with-values (lambda () (partition file-is-directory? inputs))
(match result (match-lambda*
(((siblings ...) rest ...) ((dirs ())
`((,dir ,@siblings) ,@rest)))) ;; All inputs are directories. Create a new directory
(lambda (file stat errno result) ;; where we will merge the input directories.
(format (current-error-port) "union-build: ~a: ~a~%" (mkdir output)
file (strerror errno)))
'(()) ;; Build a hash table mapping each file to a list of input
dir) ;; directories containing that file.
(((tree)) tree) (let ((table (make-hash-table)))
(() #f)))
(define (add-to-table! file dir)
(define tree-leaves (hash-set! table file (cons dir (hash-ref table file '()))))
;; Return the leaves of the given tree.
(match-lambda ;; Populate the table.
(((? string?) leaves ...) (for-each (lambda (dir)
leaves))) (for-each (cut add-to-table! <> dir)
(files-in-directory dir)))
(define (leaf=? a b) dirs)
(equal? (basename a) (basename b)))
;; Now iterate over the table and recursively
(define (resolve-collision leaves) ;; perform a union for each entry.
;; LEAVES all have the same basename, so choose one of them. (hash-for-each (lambda (file dirs-with-file)
(match (delete-duplicates leaves string=?) (union (string-append output "/" file)
((one-and-the-same) (map (cut string-append <> "/" file)
;; LEAVES all actually point to the same file, so nothing to worry (reverse dirs-with-file))))
;; about. table)))
one-and-the-same)
((and lst (head rest ...)) ((() (file (? (cut file=? <> file)) ...))
;; A real collision, unless those files are all identical. ;; There are no directories, and all files have the same contents,
(unless (every (cut file=? head <>) rest) ;; so there's no conflict.
(format (current-error-port) "warning: collision encountered: ~{~a ~}~%" (symlink* file output))
lst)
((dirs files)
;; TODO: Implement smarter strategies. (resolve-collisions output dirs files)))))))
(format (current-error-port) "warning: arbitrarily choosing ~a~%"
head))
head)))
(setvbuf (current-output-port) _IOLBF) (setvbuf (current-output-port) _IOLBF)
(setvbuf (current-error-port) _IOLBF) (setvbuf (current-error-port) _IOLBF)
(when (file-port? log-port) (when (file-port? log-port)
(setvbuf log-port _IOLBF)) (setvbuf log-port _IOLBF))
(mkdir output) (union output (delete-duplicates inputs)))
(let loop ((tree (delete-duplicate-leaves
(cons "."
(tree-union
(append-map (compose tree-leaves file-tree)
(delete-duplicates directories))))
leaf=?
resolve-collision))
(dir '()))
(match tree
((? string?)
;; A leaf: create a symlink.
(let* ((dir (string-join dir "/"))
(target (string-append output "/" dir "/" (basename tree))))
(format log-port "`~a' ~~> `~a'~%" tree target)
(symlink tree target)))
(((? string? subdir) leaves ...)
;; A sub-directory: create it in OUTPUT, and iterate over LEAVES.
(unless (string=? subdir ".")
(let ((dir (string-join dir "/")))
(mkdir (string-append output "/" dir "/" subdir))))
(for-each (cute loop <> `(,@dir ,subdir))
leaves))
((leaves ...)
;; A series of leaves: iterate over them.
(for-each (cut loop <> dir) leaves)))))
;;; union.scm ends here ;;; union.scm ends here
...@@ -43,47 +43,6 @@ (define %store ...@@ -43,47 +43,6 @@ (define %store
(test-begin "union") (test-begin "union")
(test-equal "tree-union, empty"
'()
(tree-union '()))
(test-equal "tree-union, leaves only"
'(a b c d)
(tree-union '(a b c d)))
(test-equal "tree-union, simple"
'((bin ls touch make awk gawk))
(tree-union '((bin ls touch)
(bin make)
(bin awk gawk))))
(test-equal "tree-union, several levels"
'((share (doc (make README) (coreutils README)))
(bin ls touch make))
(tree-union '((bin ls touch)
(share (doc (coreutils README)))
(bin make)
(share (doc (make README))))))
(test-equal "delete-duplicate-leaves, default"
'(bin make touch ls)
(delete-duplicate-leaves '(bin ls make touch ls)))
(test-equal "delete-duplicate-leaves, file names"
'("doc" ("info"
"/binutils/ld.info"
"/gcc/gcc.info"
"/binutils/standards.info"))
(let ((leaf=? (lambda (a b)
(string=? (basename a) (basename b)))))
(delete-duplicate-leaves '("doc"
("info"
"/binutils/ld.info"
"/binutils/standards.info"
"/gcc/gcc.info"
"/gcc/standards.info"))
leaf=?)))
(test-skip (if (and %store (test-skip (if (and %store
(false-if-exception (false-if-exception
(getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV))) (getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV)))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment