info-gnus-english
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[nnrss.el patch] Only Use Subject, Author, URL and Date to build the Has


From: David Hansen
Subject: [nnrss.el patch] Only Use Subject, Author, URL and Date to build the Hash Index
Date: Sun, 04 Feb 2007 05:10:34 +0100
User-agent: Gnus/5.110006 (No Gnus v0.6) Emacs/22.0.93 (gnu/linux)

Hello,

so far this works well with the feeds i read, no nasty duplicates
anymore.  Should be easy to extend to other fields if necessary.

David

*** nnrss.el    30 Jan 2007 21:50:55 +0100      7.47
--- nnrss.el    03 Feb 2007 11:42:52 +0100      
***************
*** 691,750 ****
          rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/";)
          content-ns (nnrss-get-namespace-prefix xml 
"http://purl.org/rss/1.0/modules/content/";))
      (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) 
xml)))
!       (when (and (listp item)
!                (string= (concat rss-ns "item") (car item))
!                (progn (setq hash-index (md5 (gnus-prin1-to-string item)))
!                       (not (gethash hash-index nnrss-group-hashtb))))
        (setq subject (nnrss-node-text rss-ns 'title item))
!       (setq url (nnrss-decode-entities-string
!                  (nnrss-node-text rss-ns 'link (cddr item))))
!       (setq extra (or (nnrss-node-text content-ns 'encoded item)
!                       (nnrss-node-text rss-ns 'description item)))
!       (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
!           (setq extra (concat feed-subject "<br /><br />" extra)))
!       (setq author (or (nnrss-node-text rss-ns 'author item)
                         (nnrss-node-text dc-ns 'creator item)
                         (nnrss-node-text dc-ns 'contributor item)))
!       (setq date (nnrss-normalize-date
!                   (or (nnrss-node-text dc-ns 'date item)
!                       (nnrss-node-text rss-ns 'pubDate item))))
!       (setq comments (nnrss-node-text rss-ns 'comments item))
!       (when (setq enclosure (cadr (assq (intern (concat rss-ns "enclosure")) 
item)))
!         (let ((url (cdr (assq 'url enclosure)))
!               (len (cdr (assq 'length enclosure)))
!               (type (cdr (assq 'type enclosure)))
!               (name))
!           (setq len
!                 (if (and len (integerp (setq len (string-to-number len))))
!                     ;; actually already in `ls-lisp-format-file-size' but
!                     ;; probably not worth to require it for one function
!                     (do ((size (/ len 1.0) (/ size 1024.0))
!                          (post-fixes (list "" "k" "M" "G" "T" "P" "E")
!                                      (cdr post-fixes)))
!                         ((< size 1024)
!                          (format "%.1f%s" size (car post-fixes))))
!                   "0"))
!           (setq url (or url ""))
!           (setq name (if (string-match "/\\([^/]*\\)$" url)
!                          (match-string 1 url)
!                        "file"))
!           (setq type (or type ""))
!           (setq enclosure (list url name len type))))
!       (push
!        (list
!         (incf nnrss-group-max)
!         (current-time)
!         url
!         (and subject (nnrss-mime-encode-string subject))
!         (and author (nnrss-mime-encode-string author))
!         date
!         (and extra (nnrss-decode-entities-string extra))
!         enclosure
!         comments
!         hash-index)
!        nnrss-group-data)
!       (puthash hash-index t nnrss-group-hashtb)
!       (setq changed t))
        (setq extra nil))
      (when changed
        (nnrss-save-group-data group server)
--- 691,754 ----
          rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/";)
          content-ns (nnrss-get-namespace-prefix xml 
"http://purl.org/rss/1.0/modules/content/";))
      (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) 
xml)))
!       (when (and (listp item) (string= (concat rss-ns "item") (car item)))
!         ;; for hashing use subject, author, url and date
        (setq subject (nnrss-node-text rss-ns 'title item))
!         (setq author (or (nnrss-node-text rss-ns 'author item)
                         (nnrss-node-text dc-ns 'creator item)
                         (nnrss-node-text dc-ns 'contributor item)))
!         (setq url (nnrss-decode-entities-string
!                  (nnrss-node-text rss-ns 'link (cddr item))))
!         (setq date (or (nnrss-node-text dc-ns 'date item)
!                        (nnrss-node-text rss-ns 'pubDate item)))
!         (when (progn
!                 (setq hash-index (md5 (concat (or subject "")
!                                               (or author "")
!                                               (or url "")
!                                               (or date ""))))
!                 (not (gethash hash-index nnrss-group-hashtb)))
!           (setq date (nnrss-normalize-date date))
!           (setq extra (or (nnrss-node-text content-ns 'encoded item)
!                           (nnrss-node-text rss-ns 'description item)))
!           (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
!               (setq extra (concat feed-subject "<br /><br />" extra)))
!           (setq comments (nnrss-node-text rss-ns 'comments item))
!           (when (setq enclosure (cadr (assq (intern (concat rss-ns 
"enclosure")) item)))
!             (let ((url (cdr (assq 'url enclosure)))
!                   (len (cdr (assq 'length enclosure)))
!                   (type (cdr (assq 'type enclosure)))
!                   (name))
!               (setq len
!                     (if (and len (integerp (setq len (string-to-number len))))
!                         ;; actually already in `ls-lisp-format-file-size' but
!                         ;; probably not worth to require it for one function
!                         (do ((size (/ len 1.0) (/ size 1024.0))
!                              (post-fixes (list "" "k" "M" "G" "T" "P" "E")
!                                          (cdr post-fixes)))
!                             ((< size 1024)
!                              (format "%.1f%s" size (car post-fixes))))
!                       "0"))
!               (setq url (or url ""))
!               (setq name (if (string-match "/\\([^/]*\\)$" url)
!                              (match-string 1 url)
!                            "file"))
!               (setq type (or type ""))
!               (setq enclosure (list url name len type))))
!           (push
!            (list
!             (incf nnrss-group-max)
!             (current-time)
!             url
!             (and subject (nnrss-mime-encode-string subject))
!             (and author (nnrss-mime-encode-string author))
!             date
!             (and extra (nnrss-decode-entities-string extra))
!             enclosure
!             comments
!             hash-index)
!            nnrss-group-data)
!           (puthash hash-index t nnrss-group-hashtb)
!           (setq changed t)))
        (setq extra nil))
      (when changed
        (nnrss-save-group-data group server)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]