emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: enable sorting by version in `ls-lisp-handle-switches'


From: Toru TSUNEYOSHI
Subject: Re: enable sorting by version in `ls-lisp-handle-switches'
Date: Fri, 12 Mar 2010 16:00:26 +0900

I mistaked in the following.

> And, if you eval the following, `string-logical-lessp' emulates sorting
> style by Windows Explorer on Windows XP or later. (Windows Explorer
> seems to use function `StrCmpLogicalW')
> 
>     (put 'string-version-lessp 'strcmplogical t)

I should have written

    (put 'string-logical-lessp 'strcmplogical t)

And for temporary change, I shoud have replaced symbol property with
variable.

    (setq string-logical-lessp--strcmplogical t)

I fixed and attached it.
;; `strcmplogical-trans-tbl-at-1st-char' and
;; `strcmplogical-trans-tbl-after-2nd-char' of `string-logical-lessp'
;; are results composed of only output by Window Explorer on Windows XP
;; and Windows API `StrCmpLogicalW'.
;;
;; It is arranged about some code (?\C-? ?' ?- and array using a case
;; sensitive of alphabet. (Enable case insensitive by `ls-lisp-ignore-case')

(put
 'string-logical-lessp
 'strcmplogical-trans-tbl-at-1st-char
 '(
   #x00 ; \C-@
   #x01 ; \C-a
   #x02 ; \C-b
   #x03 ; \C-c
   #x04 ; \C-d
   #x05 ; \C-e
   #x06 ; \C-f
   #x07 ; \C-g
   #x08 ; \C-h
   #x0e ; \C-n
   #x0f ; \C-o
   #x10 ; \C-p
   #x11 ; \C-q
   #x12 ; \C-r
   #x13 ; \C-s
   #x14 ; \C-t
   #x15 ; \C-u
   #x16 ; \C-v
   #x17 ; \C-w
   #x18 ; \C-x
   #x19 ; \C-y
   #x1a ; \C-z
   #x1b ; \C-[
   #x1c ; \C-\\
   #x1d ; \C-]
   #x1e ; \C-^
   #x1f ; \C-_
   #x20 ;   (SPC)
   #x09 ;       TAB
   #x0a ; \C-j
   #x0b ; \C-k
   #x0c ; \C-l
   #x0d ; \C-m
   #x21 ; !
   #x22 ; "
   #x23 ; #
   #x24 ; $
   #x25 ; %
   #x26 ; &
   #x27 ; '
   #x28 ; (
   #x29 ; )
   #x2a ; *
   #x2c ; ,
   #x2e ; .
   #x2f ; /
   #x3a ; :
   #x3b ; ;
   #x3f ; ?
   #x40 ; @
   #x5b ; [
   #x5d ; ]
   #x5e ; ^
   #x5f ; _
   #x60 ; `
   #x7b ; {
   #x7c ; |
   #x7d ; }
   #x7e ; ~
   #x2b ; +
   #x2d ; -
   #x3c ; <
   #x3d ; =
   #x3e ; >
   #x5c ; \
   #x7f ; \C-?
   #x30 ; 0
   #x31 ; 1
   #x32 ; 2
   #x33 ; 3
   #x34 ; 4
   #x35 ; 5
   #x36 ; 6
   #x37 ; 7
   #x38 ; 8
   #x39 ; 9
   #x41 ; A
   #x42 ; B
   #x43 ; C
   #x44 ; D
   #x45 ; E
   #x46 ; F
   #x47 ; G
   #x48 ; H
   #x49 ; I
   #x4a ; J
   #x4b ; K
   #x4c ; L
   #x4d ; M
   #x4e ; N
   #x4f ; O
   #x50 ; P
   #x51 ; Q
   #x52 ; R
   #x53 ; S
   #x54 ; T
   #x55 ; U
   #x56 ; V
   #x57 ; W
   #x58 ; X
   #x59 ; Y
   #x5a ; Z
   #x61 ; a
   #x62 ; b
   #x63 ; c
   #x64 ; d
   #x65 ; e
   #x66 ; f
   #x67 ; g
   #x68 ; h
   #x69 ; i
   #x6a ; j
   #x6b ; k
   #x6c ; l
   #x6d ; m
   #x6e ; n
   #x6f ; o
   #x70 ; p
   #x71 ; q
   #x72 ; r
   #x73 ; s
   #x74 ; t
   #x75 ; u
   #x76 ; v
   #x77 ; w
   #x78 ; x
   #x79 ; y
   #x7a ; z
   ))

(put
 'string-logical-lessp
 'strcmplogical-trans-tbl-after-2nd-char
 '(
   #x00 ; \C-@
   #x01 ; \C-a
   #x02 ; \C-b
   #x03 ; \C-c
   #x04 ; \C-d
   #x05 ; \C-e
   #x06 ; \C-f
   #x07 ; \C-g
   #x08 ; \C-h
   #x0e ; \C-n
   #x0f ; \C-o
   #x10 ; \C-p
   #x11 ; \C-q
   #x12 ; \C-r
   #x13 ; \C-s
   #x14 ; \C-t
   #x15 ; \C-u
   #x16 ; \C-v
   #x17 ; \C-w
   #x18 ; \C-x
   #x19 ; \C-y
   #x1a ; \C-z
   #x1b ; \C-[
   #x1c ; \C-\\
   #x1d ; \C-]
   #x1e ; \C-^
   #x1f ; \C-_
   #x30 ; 0
   #x31 ; 1
   #x32 ; 2
   #x33 ; 3
   #x34 ; 4
   #x35 ; 5
   #x36 ; 6
   #x37 ; 7
   #x38 ; 8
   #x39 ; 9
   #x20 ;   (SPC)
   #x09 ;       TAB
   #x0a ; \C-j
   #x0b ; \C-k
   #x0c ; \C-l
   #x0d ; \C-m
   #x21 ; !
   #x22 ; "
   #x23 ; #
   #x24 ; $
   #x25 ; %
   #x26 ; &
   #x27 ; '
   #x28 ; (
   #x29 ; )
   #x2a ; *
   #x2c ; ,
   #x2e ; .
   #x2f ; /
   #x3a ; :
   #x3b ; ;
   #x3f ; ?
   #x40 ; @
   #x5b ; [
   #x5d ; ]
   #x5e ; ^
   #x5f ; _
   #x60 ; `
   #x7b ; {
   #x7c ; |
   #x7d ; }
   #x7e ; ~
   #x2b ; +
   #x2d ; -
   #x3c ; <
   #x3d ; =
   #x3e ; >
   #x5c ; \
   #x7f ; \C-?
   #x41 ; A
   #x42 ; B
   #x43 ; C
   #x44 ; D
   #x45 ; E
   #x46 ; F
   #x47 ; G
   #x48 ; H
   #x49 ; I
   #x4a ; J
   #x4b ; K
   #x4c ; L
   #x4d ; M
   #x4e ; N
   #x4f ; O
   #x50 ; P
   #x51 ; Q
   #x52 ; R
   #x53 ; S
   #x54 ; T
   #x55 ; U
   #x56 ; V
   #x57 ; W
   #x58 ; X
   #x59 ; Y
   #x5a ; Z
   #x61 ; a
   #x62 ; b
   #x63 ; c
   #x64 ; d
   #x65 ; e
   #x66 ; f
   #x67 ; g
   #x68 ; h
   #x69 ; i
   #x6a ; j
   #x6b ; k
   #x6c ; l
   #x6d ; m
   #x6e ; n
   #x6f ; o
   #x70 ; p
   #x71 ; q
   #x72 ; r
   #x73 ; s
   #x74 ; t
   #x75 ; u
   #x76 ; v
   #x77 ; w
   #x78 ; x
   #x79 ; y
   #x7a ; z
   ))

;; Convert the above translation table to vector indexed by ascii code
(mapc
 (lambda (x)
   (unless (vectorp (get 'string-logical-lessp x))
     ;; make vector from list of cdr part
     ;;
     ;; '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53))
     ;; => '(0 1 ... 127 ... 53)
     ;; =>  [0 1 ... 127 ... 53]
     (put 'string-logical-lessp
          x
          (apply
           'vector
           (mapcar
            'cdr
            ;; sort by car part
            ;;
            ;; '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127))
            ;; => '((#x00 . 0) (#x01 . 1) ... (#x7a . 127) ... (#x7f . 53))
            (sort
             (let ((i 0))
               ;; make index on cdr part
               ;;
               ;; '(#x00 #x01 ... #x7a)
               ;; => '((#x00 . 0) (#x01 . 1) ... (#x7f . 53) ... (#x7a . 127))
               (mapcar
                (lambda (x)
                  (prog1
                      (cons x i)
                    (setq i (1+ i))))
                (get 'string-logical-lessp x)))
             (lambda (x y)
               (< (car x) (car y)))))))))
 '(strcmplogical-trans-tbl-at-1st-char
   strcmplogical-trans-tbl-after-2nd-char))

;; Length
(put 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char-len
     (length (get 'string-logical-lessp 'strcmplogical-trans-tbl-at-1st-char)))
(put 'string-logical-lessp 'strcmplogical-trans-tbl-after-2nd-char-len
     (length (get 'string-logical-lessp 
'strcmplogical-trans-tbl-after-2nd-char)))

(defvar string-logical-lessp--strcmplogical nil
  "If non-nil, emulate filename sorting style of Window Explorer on
Windows XP (or later) and Windows API `StrCmpLogicalW' easily.
See also `string-logical-lessp'.

Example:
    (sort
     '(\".emacs\"
       \".emacs-places.~10~\"
       \".emacs-places.~9~\"
       \".emacs.~10~\"
       \".emacs.~9~\")
     'string-logical-lessp)
    =>
    ;; string-logical-lessp--strcmplogical
    ;; =>
    ;; nil:                     t:
    (\".emacs\"                   (\".emacs\"
     \".emacs-places.~9~\"         \".emacs.~9~\"            ; <= just after 
\".emacs\"
     \".emacs-places.~10~\"        \".emacs.~10~\"
     \".emacs.~9~\"                \".emacs-places.~9~\"
     \".emacs.~10~\")              \".emacs-places.~10~\")")



(defalias 'string-version< 'string-version-lessp)

(defun string-version-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-logical-lessp'.

Policy of version order:
  See `glibc-2.11.1/string/strverscmp.c' or it's manual.

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-version-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.007.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.012b.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.01a.tgz\")
   'string-version-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"009.01.91.tgz\"
     \"009.01.91.tgz\")            \"9.000001.10.tgz\")
   'string-version-lessp)"
  (let* (;; states
         (S_N #x0)                   ; normal
         (S_I #x3)                   ; comparing integral part
         (S_F #x6)                   ; comparing fractionnal parts
         (S_Z #x9)                   ; idem but with leading Zeroes only

         ;; Symbol(s)    0       [1-9]   others
         ;; Transition   (10) 0  (01) d  (00) x
         ;;
         ;;                   x   d   0    ; state
         (next-state (vector S_N S_I S_Z   ; S_N
                             S_N S_I S_I   ; S_I
                             S_N S_F S_F   ; S_F
                             S_N S_F S_Z)) ; S_Z

         ;; result-type
         (CMP 2)                        ; return diff
         (LEN 3)                        ; compare using len_diff/diff

         ;; `glibc-2.11.1/string/strverscmp.c'
         ;;
         ;;                       *p1     *p1                       ; pair
         ;;                           *p2             *p2           ;
         ;;                                       *p3     *p3       ;
         ;;
         ;;                   x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
         (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
                              CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
                              CMP CMP CMP CMP CMP CMP CMP CMP CMP   ; S_F
                              CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

;;;      ;; like `coreutils-6.12/lib/strverscmp.c'
;;;      ;;
;;;      ;;                   x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0   ; state
;;;      (result-type (vector CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_N
;;;                           CMP -1  -1  +1  LEN LEN +1  LEN LEN   ; S_I
;;;                           CMP CMP CMP CMP LEN CMP CMP CMP CMP   ; S_F
;;;                           CMP +1  +1  -1  CMP CMP -1  CMP CMP)) ; S_Z

         ret                 ; same style as return value of C language `strcmp'
         l1 l2               ; length of string s1, s2
         (i 0)               ; index of string s1, s2
         c1 c2               ; character of string s1, s2 at index i
         diff                ; difference between c1 and c2
         (dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list
         (dl-except-0 '(?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9)) ; digit-list except 0
         (state S_N)
         (repeat t))
    (setq ret (catch 'end
                (if (eq s1 s2)
                    (throw 'end 0))

                (if (symbolp s1)
                    (setq s1 (symbol-name s1)))
                (if (symbolp s2)
                    (setq s2 (symbol-name s2)))
                (unless (stringp s1)
                  (signal 'wrong-type-argument `(stringp ,s1)))
                (unless (stringp s2)
                  (signal 'wrong-type-argument `(stringp ,s2)))

                (if ignore-case
                    (setq s1 (upcase s1)
                          s2 (upcase s2)))

                (setq l1 (length s1)
                      l2 (length s2))

                (while repeat
                  ;; check limit
                  (if (<= l1 i)
                      (if (<= l2 i)
                          (throw 'end 0) ; s1 and s2 match.
                        (throw 'end -1)) ; s1 is less than s2.
                    (if (<= l2 i)
                        (throw 'end 1))) ; s1 is greater than s2.

                  (setq c1 (elt s1 i)
                        c2 (elt s2 i)
                        i (1+ i)

                        diff (- c1 c2)
                        state (+ state (cond ((eq c1 ?0) 2)
                                             ((memq c1 dl-except-0) 1)
                                             (t 0))))

                  (if (= diff 0)
                      (setq state (aref next-state state))
                    (setq repeat nil)))

                (setq state (aref result-type (+ (* state 3)
                                                 (cond ((eq c2 ?0) 2)
                                                       ((memq c2 dl-except-0) 1)
                                                       (t 0)))))

                (cond ((eq state CMP)
                       (setq ret diff))
                      ((eq state LEN)
                       (while (progn
                                (setq c1 (if (< i l1) (elt s1 i) -1) ; -1: 
invalid code as character
                                      c2 (if (< i l2) (elt s2 i) -1)
                                      i (1+ i))
                                (memq c1 dl))
                         (unless (memq c2 dl)
                           (throw 'end 1)))
                       (setq ret (if (memq c2 dl) -1 diff)))
                      (t
                       (setq ret state)))

                ret))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

(defalias 'string-logical< 'string-logical-lessp)

(defun string-logical-lessp (s1 s2 &optional ignore-case)
  "Return t if first arg string is less than second in logical version order.
Case is significant in this comparison if IGNORE-CASE is nil.
Symbols are also allowed; their print names are used instead.
See also `string-version-lessp'.

Policy of Logical version order:
  Sort by number whose leading 0 is skipped.
  For example, \"1\", \"0001\" and \"0000001\" are equivalent.

  And if `string-logical-lessp--strcmplogical' is non-nil, emulate
  filename sorting style of Window Explorer on Windows XP (or later) and
  Windows API `StrCmpLogicalW' easily. (The emulation is poor and
  incomplete.)
    1st character: sort by alphabetical order (not precisely ascii)
    after 2nd character: digit takes first priority

Example:
  (sort
   '(\"foo.zml-1.gz\"          => (\"foo.zml-1.gz\"
     \"foo.zml-100.gz\"            \"foo.zml-2.gz\"
     \"foo.zml-12.gz\"             \"foo.zml-6.gz\"
     \"foo.zml-13.gz\"             \"foo.zml-12.gz\"
     \"foo.zml-2.gz\"              \"foo.zml-13.gz\"
     \"foo.zml-25.gz\"             \"foo.zml-25.gz\"
     \"foo.zml-6.gz\")             \"foo.zml-100.gz\")
   'string-logical-lessp)

  (sort
   '(\"abc-1.01a.tgz\"         => (\"abc-1.01a.tgz\"
     \"abc-1.007.tgz\"             \"abc-1.007.tgz\"
     \"abc-1.012b.tgz\")           \"abc-1.012b.tgz\")
   'string-logical-lessp)

  (sort
   '(\"9.000001.10.tgz\"       => (\"9.000001.10.tgz\"
     \"009.01.91.tgz\")            \"009.01.91.tgz\")
   'string-logical-lessp)

  ;; if `string-logical-lessp--strcmplogical' is non-nil

  (sort
   '(\"1#.txt\"                => (\"##.txt\"
     \"##.txt\")                   \"1#.txt\")
   'string-logical-lessp)

  (sort
   '(\"#1.txt\"                => (\"#1.txt\"
     \"##.txt\")                   \"##.txt\")
   'string-logical-lessp)"
  (let (ret                  ; same style as return value of C language `strcmp'
        l1 l2                ; length of string s1, s2
        (i1 0)               ; index of string s1, s2
        (i2 0)
        (c1 0)                      ; character of string s1, s2 at index n1, n2
        (c2 0)                      ; (set dummy code as initial value)
        d1 d2                       ; digit flag
        (dl '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9))) ; digit-list
    (if (symbolp s1)
        (setq s1 (symbol-name s1)))
    (if (symbolp s2)
        (setq s2 (symbol-name s2)))
    (unless (stringp s1)
      (signal 'wrong-type-argument `(stringp ,s1)))
    (unless (stringp s2)
      (signal 'wrong-type-argument `(stringp ,s2)))

    (if ignore-case
        (setq s1 (upcase s1)
              s2 (upcase s2)))

    (setq l1 (length s1)
          l2 (length s2))

    (setq ret (catch 'end
                (while t
                  ;; check limit
                  (if (<= l1 i1)
                      (if (<= l2 i2)
                          (throw 'end 0) ; s1 and s2 match logically.
                        (throw 'end -1)) ; s1 is less than s2 logically.
                    (if (<= l2 i2)
                        (throw 'end 1))) ; s1 is greater than s2 logically.

                  (setq c1 (elt s1 i1)
                        c2 (elt s2 i2)

                        d1 (memq c1 dl)
                        d2 (memq c2 dl))

                  (cond ((and d1 d2)    ; both c1 and c2 are digit.
                         (let (n1 n2    ; number
                               w1 w2)   ; length (or width) of number
                           ;; skip needless "0"
                           ;;
                           ;; example:
                           ;;  "0"      => "0"
                           ;;  "00000"  => "0"
                           ;;  "10"     => "10"
                           ;;  "010"    => "10"
                           ;;  "000010" => "10"
                           (string-match "0*\\([0-9]+\\)" s1 i1)
                           (setq n1 (match-string 1 s1)
                                 w1 (length n1)
                                 i1 (match-end 1)) ; next character index after 
number
                           (string-match "0*\\([0-9]+\\)" s2 i2)
                           (setq n2 (match-string 1 s2)
                                 w2 (length n2)
                                 i2 (match-end 1))

                           ;; number whose length is shorter is less than 
another.
                           (cond ((< w1 w2) (throw 'end -1))
                                 ((> w1 w2) (throw 'end 1))
                                 (t
                                  ;; as both lengths are equal,
                                  ;; we should use `compare-strings' instead of
                                  ;; `number-to-string' to avoid overflow.
                                  (setq ret (compare-strings n1 nil nil
                                                             n2 nil nil))
                                  (unless (eq ret t)
                                    (throw 'end ret))))))
                        (t
                         (setq ret (- c1 c2))
                         (if (= ret 0)
                             ;; next character index
                             (setq i1 (1+ i1)
                                   i2 (1+ i2))
                           (when string-logical-lessp--strcmplogical
                             (let (tbl tbl-len)
                               (cond ((= i1 0) ; 1st character: sort by 
alphabetical order
                                      (setq tbl (get 'string-logical-lessp
                                                     
'strcmplogical-trans-tbl-at-1st-char)
                                            tbl-len (get 'string-logical-lessp
                                                         
'strcmplogical-trans-tbl-at-1st-char-len)))
                                     (t ; after 2nd character: digit takes 
first priority
                                      (setq tbl (get 'string-logical-lessp
                                                     
'strcmplogical-trans-tbl-after-2nd-char)
                                            tbl-len (get 'string-logical-lessp
                                                         
'strcmplogical-trans-tbl-after-2nd-char-len))))
                               (if (< c1 tbl-len)
                                   (setq c1 (aref tbl c1)))
                               (if (< c2 tbl-len)
                                   (setq c2 (aref tbl c2))))
                             (setq ret (- c1 c2)))
                           (throw 'end ret)))))))

    ;; convert ret to the style of `string-lessp'
    (< ret 0)))

reply via email to

[Prev in Thread] Current Thread [Next in Thread]