;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Touched: Sun Jan 09 09:46:59 2005 +0530 ;;; Time-stamp: <06/11/08 20:54:12 madhu> ;;; Bugs-To: ;;; Copyright (C) 2005, 2006 Madhu. All Rights Reserved. ;;; ;;; newsdir.lisp cmucl file manipulation ;;; (defpackage "SCRATCH-NEWS" (:use "CL")) (in-package "SCRATCH-NEWS") #|| (import 'user::lc) (lc "home:cl/cmime/cmime") (lc "home:cl/babyl") (lc "home:cl/parse-time") ||# ;; ;; Time-stamp: <06/07/31 14:30:17 madhu> ;; (defun crop-message-id (string) (let* ((start (position #\< string)) (end (position #\> string :start start))) (when (and start end) (subseq string (1+ start) end)))) (defun snarf-message-id-and-date (file &optional progressp) (check-type file pathname) (let (msg-id date) (with-open-file (stream file) (when progressp (format t "~&file=~a " file)) (loop with line-length for line = (read-line stream t nil) for lineno from 0 while line do (when progressp (format t ".")) (setf line-length (length line)) (cond ((and (not msg-id) (>= line-length 11) (string-equal "Message-ID:" line :end2 11)) (setq msg-id (crop-message-id (subseq line 12))) (if date (loop-finish))) ((and (not date) (>= line-length 5) (string-equal "Date:" line :end2 5)) (setq date (PARSE-TIME:parse-time (subseq line 6))) (if msg-id (loop-finish)))) finally (unless (and msg-id date) (error "~S is invalid: Missing some of (msg-id=~a date=~a)" file msg-id date)) (return (list msg-id date)))))) (defstruct nndir-entry pathname message-id date file-write-date number) (defun snarf-nndir-entries (directory) (let* ((dirlist (directory directory)) removables-removed) (loop for prev-cons = nil then cons for cons on dirlist for pathname = (car cons) do (cond ((and (not removables-removed) (equal (pathname-name pathname) ".overview")) (cond (prev-cons (setf (cdr prev-cons) (cdr cons))) (t (setq dirlist (cdr cons)))) (setq removables-removed t)) (t (let ((num (parse-integer (pathname-name pathname))) (write-date (file-write-date pathname))) (destructuring-bind (message-id date) (snarf-message-id-and-date pathname) (setf (car cons) (make-nndir-entry :number num :pathname pathname :message-id message-id :file-write-date write-date :date date))))))) (sort dirlist #'< :key 'nndir-entry-number))) #|| (setq $nnents (snarf-nndir-entries #p"home:/News/cache/nntp+news.gmane.org:gmane.lisp.cmucl.general/")) (length $nnents) ||# (defun detect-duplicates (nndir-entries &key key (test #'equalp) (table (make-hash-table :test test))) (map nil (lambda (x &aux (hashkey (if key (funcall key x) x))) (multiple-value-bind (value foundp) (gethash hashkey table) (cond (foundp (push x value)) (t (setf (gethash hashkey table) (list x)))))) nndir-entries) (loop for key being each hash-key of table using (hash-value value) if (cdr value) collect (cons key value))) #|| (loop for x in '(nndir-entry-file-date nndir-entry-name nndir-entry-message-id nndir-entry-file-date) for dups = (detect-duplicates $nnents) when dups collect (cons x dups)) ||# (defun collect-articles-posted-in-year (nnents year &key) (let ((lo (encode-universal-time 0 0 0 1 1 year 0)) (hi (encode-universal-time 0 0 0 1 1 (1+ year) 0))) (loop for x in nnents for date = (nndir-entry-date x) when (< lo date hi) collect x))) #+nil (collect-articles-posted-in-year $nnents 2004) #+nil (mapcar (lambda (x) (pathname-name (nndir-entry-pathname x))) (collect-articles-posted-in-year $nnents 2004))