Wednesday, September 28, 2005

[Tcl] RSS Feed

#! /usr/local/bin/tclsh


if { $argc == 0 } {
puts stderr "Usage: $argv0 url \[url\] ..."
exit 1
}


package require http
package require tdom
package require uri


http::config -useragent {Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv: 1.7.2) Gecko/20040804 Netscape/7.2 (ax)} -accept {text/xml,application/xml,application/xhtml+xml,text/html;q=0.9;text/plain;q=0.8,image/png,*/*,q=0.5}


proc rss2dom { url } {
set timeout 8000; # in milliseconds
set s [http::geturl $url -timeout $timeout]
set ncode [http::ncode $s]
set data [http::data $s]
http::cleanup $s
if { $ncode == 200 } {
 if { [catch {dom parse $data} root] == 0 } {
  set doc [$root documentElement]
  return $doc
 } else {
  return -code error -errorinfo "Error (dom) -  $root"
 }
} else {
 http::cleanup $s
 return -code error -errorinfo "Error (http) - Code=$ncode"
}
}


proc rssExplore { doc } {
set max 15
set channel [$doc selectNodes {//*[local-name()='channel']}]
       set rssTitle [[$channel selectNodes {*[local-name()='title']/text()}] nodeValue]
       set rssLink  [[$channel selectNodes {*[local-name()='link']/text()}] nodeValue]
set rc1 [list $rssTitle $rssLink]

set rc2 {}
set rssItem  [$doc selectNodes {//*[local-name()='item']}]
foreach i [lrange $rssItem 0 [expr {$max-1}]] {
 set t [[$i selectNodes {*[local-name()='title']/text()}] nodeValue]
 set l [[$i selectNodes {*[local-name()='link']/text()}] nodeValue]
 set t [string trim $t]
 set l [string trim $l]

 # fix up url if it does not start with http/https
 if { [string match {http*} $l] == 0 } {
  if { [string match {/*} $l] == 0 } {
   set l [join [list $rssLink $l] {/}]
  } else {
   array set uri [uri::split $rssLink]
   set l "http://$uri(host):$uri(port)$l"
  }
 }

 lappend rc2 $t
 lappend rc2 $l
}
return [list $rc1 $rc2]
}



set fp [open POPUP.html w]
# put in your own html->body open and close tags
puts $fp ""
puts $fp ""
puts $fp ""
foreach url $argv {
if { [catch {rss2dom $url} doc] == 0 } {
 foreach { header items } [rssExplore $doc] {
  foreach { t l } $header {}
  puts $fp [format {%s     [rss]     [top]} $l $t $url]
  puts $fp "
    " foreach { t l } $items { puts $fp [format {
  • %s} $l $t] } puts $fp "
" } } } puts $fp "
" puts $fp "
"

Calling the above rss.tcl in a UNIX script

#! /bin/sh

./rss.tcl  'http://www.sun.com/rss/news-rss.xml'  'http://www.sun.com/rss/events-rss.xml'  'http://www.sun.com/rss/books-just-published-rss.xml'  'http://developers.sun.com/rss/solaris.xml'  'http://www.forbes.com/technology/index.xml'  'http://www.oreillynet.com/meerkat/?_fl=rss10&t=ALL&c=916'  'http://www.cnet.com/4914-6022_1-0.xml?author=Wood:Molly&maxhits=5'  'http://lwn.net/headlines/newrss'  'http://www.linuxjournal.com/node/feed'  'http://rssnewsapps.ziffdavis.com/eweeklinux.xml'  'http://www.utilitycomputing.com/news/recent10.xml'  'http://rssnewsapps.ziffdavis.com/eweeksecurity.xml'  'http://www.sans.org/rr/rss/'  'http://www.sans.org/newsletters/newsbites/rss/'  'http://www.sans.org/newsletters/risk/rss/'  'http://www.nwfusion.com/rss/datacenter.xml'  'http://www.nwfusion.com/rss/utility.xml'  'http://rssnewsapps.ziffdavis.com/eweek_infrastructure.xml'


if [ $? -eq 0 ]; then
/usr/local/bin/curl   --silent   --output /dev/null   --user web:master   -F upfile=@POPUP.html   -F uri=/   http://192.168.2.3/upfile2uri/index_add2.cgi
fi


[Tcl] Download Accelerator in Tcl

#! /usr/local/bin/tclsh
#
# Download Accelerator in Tcl with thread
# - inspired by Download Accelerator Plus (www.speedbit.com)
# - output follows 'wget' (lazy to craft the GUI)
#
#
# Written by: Chan Chi Hung
# Date: 4 Nov 2004
# History:
# 1.0 - without resume capability
# 1.1 - with resume capability
#             intermediate files are hidden
# 1.2 - check header Accept-Ranges before settting the nthread
#     - in resume, the nthread has to follow the previous setting
#     - http progress, similar to wget (see below)
#       1.3 - fix the status bar during resume
#           - fix nthreads when thread has not reached max and terminated
#
# Intermediate files:
#  hidden files .[urlBasename $url]-
#               .[urlBasename $url]#nthreads


#
# simulate wget output
#
# wget http://www.abc123.com/abc/video/abc.mpg
# --16:04:19--  http://www.abc123.com/abc/video/abc.mpg
#            => `abc.mpg'
# Resolving www.abc123.com... 195.222.13.16
# Connecting to www.abc123.com[195.222.13.16]:80... connected.
# HTTP request sent, awaiting response... 200 OK
# Length: 546,140 [video/mpeg]
# 
# 100%[====================================>] 546,140      6.51K/s  ETA 00:00 
# 16:06:36 (3.91 KB/s) - `abc.mpg' saved [546140/546140]


if { $argc < 1 || $argc > 2 } {
 puts stderr "Usage: $argv0  \[#threads\]"
 puts stderr "       default #threads is 4"
 exit 1
}



proc comma {num {sep ,}} {
 while {[regsub {^([-+]?\d+)(\d\d\d)} $num "\\1$sep\\2" num]} {}
 return $num
}


proc now {} {
 return [clock format [clock seconds] -format {%H:%M:%S}]
}


proc lremove { l v } {
 foreach i $v {
  set ind [lsearch $l $i]
  if { $ind == -1 } { continue }
  set indm1 [expr {$ind-1}]
  set indp1 [expr {$ind+1}]
  set l [concat [lrange $l 0 $indm1] [lrange $l $indp1 end]]
 }
 return $l
}


proc urlSize { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 set code [http::ncode $validate]
 if { $code != 200 } {
  puts stderr "Error. http return code=$code"
  exit 2
 }
 set size [set ${validate}(totalsize)]

 return $size
}



proc urlType { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 return [set ${validate}(type)]
}


proc isAcceptRanges { url } {
 global validate

 if { [info exists validate] == 0 } {
  set validate [http::geturl $url -validate 1]
 }
 array set www [set ${validate}(meta)]
 if { [info exists www(Accept-Ranges)] == 1 } {
  return 1
 } else {
  return 0
 }
}


#
# get basename of url
#
proc urlBasename { url } {
 array set www [uri::split $url]
 set fname [lindex [split $www(path) /] end]
 return $fname
}


#
# work out the byte range
#
proc byteRanges { size nthreads } {
 set step [expr $size/$nthreads]
 set p0 -1
 set p1 -1
 set br {}
 for { set i 0 } { $i < $nthreads } { incr i } {
  set p0 [expr $p1 + 1]
  if { $i == [expr {$nthreads-1}] } {
   set p1 $size
  } else {
   set p1 [expr $p0 + $step]
   }
  lappend br $p0
  lappend br $p1
  set p0 $p1
 }
 return $br
}


#
# fix up nthreads
# if server does not support accept-range, nthreads=1
# if '#nthreads' file exists, get from there
#
proc fixNthreads { url nthreads } {
 set rc $nthreads

 # if server cannot support byte range, nthreads=1
 if { [isAcceptRanges $url] == 0 } {
  set rc 1
 }

 # in resume mode, nthreads now and previous has to tally
 set fname [urlBasename $url]
 set ntFilename ".${fname}#nthreads"
 if { [file exists $ntFilename] } {
  set fp [open $ntFilename r]
  set rc [read $fp]
  close $fp
 } else {
  set fp [open $ntFilename w]
  puts $fp $nthreads
  close $fp
  set rc $nthreads
 }
 return $rc
}




# MAIN PROGRAM STARTS HERE


package require Thread
package require http
package require uri


set url [lindex $argv 0]
set nthreads 4
if { $argc == 2 } {
 set nthreads [lindex $argv 1]
}
tsv::set dap url $url
tsv::set dap t0 [clock seconds]


puts "--[now]-- $url"
puts "\t=> [urlBasename $url]"


#
# if resume is needed, set resumeSize to sum of file size
#
set resume [glob -nocomplain [format {.%s-*} [urlBasename $url]]]
if { [llength $resume] > 0 } {
 set rs 0
 foreach i $resume {
  incr rs [file size $i]
 }
 tsv::set dap resumeSize $rs
} else {
 tsv::set dap resumeSize 0
}


set nthreads [fixNthreads $url $nthreads]


#
# create and initialise thread pool
#
puts -nonewline "Setting up thread pool of $nthreads threads ... "
set tpool [tpool::create -minworkers $nthreads -maxworkers $nthreads   -idletime 20 -initcmd {
 package require http
 package require uri

 proc dl { seq p0 p1 } {
  set url [tsv::get dap url]
  array set www [uri::split $url]
  set fname [lindex [split $www(path) /] end]
  set fname [format {.%s-%d} $fname $seq]

  # resume
  if { [file exists $fname] == 1 } {
   set size [file size $fname]
   if { $size >= [expr $p1-$p0+1] } {
    return
   }
   set p0 [expr $p0+$size]
  }

  set fpi [open $fname a]
  fconfigure $fpi -translation binary
  set s [http::geturl $url -channel $fpi -binary 1    -progress httpProgress    -headers [list Range bytes=$p0-$p1]]
  close $fpi
 }
 proc httpProgress { token total current } {
  upvar #0 $token state

  tsv::set dap thread[thread::id] $current

  # calculate
  set max [tsv::get dap size]
  set sum [tsv::get dap resumeSize]
  foreach t [thread::names] {
   if { $t == 1 } { continue }
   incr sum [tsv::get dap thread$t]
  }

  # progress status 
  set t0 [tsv::get dap t0]
  set size [tsv::get dap size]
  set percent [expr {100*$sum/$max}]
  set elapse [expr [clock seconds] - $t0]
  set kbps [expr {$sum*8.0/(1024.0*$elapse)}]
  set eta [expr [clock seconds]-$t0]
  set etam [expr $eta/60]
  set etas [expr $eta-$etam*60]
  set status [format {%3d%%[%-51s] %6.2fKbps  ETA %02d:%02d}    $percent    "[string repeat = [expr $percent/2]]>"    $kbps    $etam    $etas]
  puts -nonewline "$status\r"
  flush stdout
 }
}]
puts "Done"


#
# submit job to thread pool, work out the byte range for each thread
#
puts -nonewline "Submitting jobs to all threads ... "
set joblist {}
set seq 1
set size [urlSize $url]
tsv::set dap size $size
foreach { p0 p1 } [byteRanges $size $nthreads] {
 lappend joblist [tpool::post $tpool [list dl $seq $p0 $p1]] 
 incr seq 
}
puts "Done"


puts "Length: [comma $size] \[[urlType $url]\]"



#
# monitor thread pool til completion
#
while 1 {
 set f [tpool::wait $tpool $joblist]
 set joblist [lremove $joblist $f]
 if { [llength $joblist] == 0 } { break }
 after 100
}


#
# consolidation
#
puts "\n"
puts -nonewline "Download completed. Consolidating ... "
set fnameo [urlBasename $url]
set fpo [open $fnameo w]
fconfigure $fpo -translation binary
for { set seq 1 } { $seq <= $nthreads } { incr seq } {
 set fnamei [format {.%s-%d} $fnameo $seq]
 set fpi [open $fnamei r]
 fconfigure $fpi -translation binary
 fcopy $fpi $fpo -size [file size $fnamei]
 close $fpi
}
close $fpo
puts "Done"


#
# cleanup
#
foreach i [glob -nocomplain ".${fnameo}*"] {
 file delete -force $i
}


puts ""
puts "--[now]-- $fnameo ([file size $fnameo]/$size)"


[UNIX] Download Accelerator using curl

#! /bin/sh


PATH=/usr/bin:/bin:/usr/sbin:$HOME/bin


if [ $# -lt 1 -o $# -gt 2 ]; then
 echo "$0  [nthreads=4]"
 exit 1
fi
url=$1
thread=${2:-4}
 

size=`curl -s --head $url | awk '/Content-[Ll]ength:/ {printf("%d",$NF)}'`
count=1
end=-1
step=`expr $size / $thread`
jobs=""
while [ $count -le $thread ]
do
 start=`expr $end + 1`
 if [ $count -eq $thread ]; then
  end=$size
 else
  end=`expr $count \* $step`
  end=`expr $end - 1`
 fi
 pad=`echo $count | awk -v count=$count '{printf("%04d",count)}'`
 curl -s -r ${start}-${end} -o $TMPDIR/tmp$$.$pad $url &
 job=$!
 if [ $count -eq 1 ]; then
  jobs="$job"
 else
  jobs="$jobs $job"
 fi
 count=`expr $count + 1`
done

# wait for all background jobs to finish
while :
do
 running=0
 for j in $jobs
 do
  ps -ef | grep $j | grep -v grep > /dev/null 2>&1
  if [ $? -eq 0 ]; then
   running=`expr $running + 1`
  fi
 done
 if [ $running -eq 0 ]; then
  break
 fi
 sleep 1
done


count=1
while [ $count -le $thread ]
do
 pad=`echo $count | awk -v count=$count '{printf("%04d",count)}'`
 cat $TMPDIR/tmp$$.$pad >> `basename $url`
 rm -f $TMPDIR/tmp$$.$pad
 count=`expr $count + 1`
done

[UNIX] cat equivalent

cat1()
{
 dd if=$1 2>/dev/null
}
cat2()
{
 sed -e 's/ / /' $1
}
cat3()
{
 awk '{print}' $1
}
cat4()
{
 tr -s 1 1 < $1
}
cat5()
{
 paste $1 /dev/null
}
cat6()
{
 diff $1 /dev/null | sed -e 's/^< //' | awk 'NR>1 {print}'
}
cat7()
{
 gzip - < $1 | gunzip -
}
cat8()
{
 bzip2 - < $1 | bunzip2 -
}
cat9()
{
 perl -lane 'print' $1
}
cat10()
{
 grep '' $1
}
cat11()
{
 cp $1 /dev/tty
}
cat12()
{
 unix2dos < $1 | dos2unix
}