| Revision 61,
1.4 KB
checked in by lenny, 8 years ago
(diff) |
|
|
| Line | |
|---|
| 1 | #!/usr/local/bin/tclsh |
|---|
| 2 | |
|---|
| 3 | package require http |
|---|
| 4 | package require htmlparse |
|---|
| 5 | package require struct |
|---|
| 6 | package require fileutil |
|---|
| 7 | |
|---|
| 8 | # Get random url |
|---|
| 9 | set q [::http::geturl "http://en.wikipedia.org/wiki/Special:Random"] |
|---|
| 10 | array set meta [set [set q](meta)] |
|---|
| 11 | set url $meta(Location) |
|---|
| 12 | array unset meta |
|---|
| 13 | ::http::cleanup $q |
|---|
| 14 | |
|---|
| 15 | # Modify page url |
|---|
| 16 | regexp {/([^/]+)$} $url m title |
|---|
| 17 | set url "http://en.wikipedia.org/w/index.php?title=$title&action=edit" |
|---|
| 18 | |
|---|
| 19 | # Get and parse random page |
|---|
| 20 | set q [::http::geturl $url] |
|---|
| 21 | ::struct::tree ht |
|---|
| 22 | ::htmlparse::2tree [::http::data $q] ht |
|---|
| 23 | ::http::cleanup $q |
|---|
| 24 | |
|---|
| 25 | set ta_found 0 |
|---|
| 26 | set ta_tmp [::fileutil::tempfile] |
|---|
| 27 | |
|---|
| 28 | ht walk root -- nid { |
|---|
| 29 | |
|---|
| 30 | array set nd [ht getall $nid] |
|---|
| 31 | |
|---|
| 32 | switch -exact -- $nd(type) { |
|---|
| 33 | |
|---|
| 34 | textarea { |
|---|
| 35 | |
|---|
| 36 | set ta_found 1 |
|---|
| 37 | } |
|---|
| 38 | |
|---|
| 39 | |
|---|
| 40 | PCDATA { |
|---|
| 41 | |
|---|
| 42 | if {$ta_found} { |
|---|
| 43 | |
|---|
| 44 | set fd [open $ta_tmp w] |
|---|
| 45 | puts $fd [::htmlparse::mapEscapes $nd(data)] |
|---|
| 46 | close $fd |
|---|
| 47 | |
|---|
| 48 | set ta_found 0 |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | } |
|---|
| 52 | } |
|---|
| 53 | |
|---|
| 54 | ht destroy |
|---|
| 55 | |
|---|
| 56 | set ta_spell [exec aspell -d en -a < $ta_tmp 2> /dev/null] |
|---|
| 57 | file delete -force -- $ta_tmp |
|---|
| 58 | |
|---|
| 59 | array set ta_rep "" |
|---|
| 60 | |
|---|
| 61 | foreach l [split $ta_spell "\n"] { |
|---|
| 62 | |
|---|
| 63 | if {[regexp {^[&] ([^ ]+) ([^:]+): (.+)$} $l m a b c]} { |
|---|
| 64 | |
|---|
| 65 | array set ta_rep [list $a [split [regsub -all -- {, } $c "|"] "|"]] |
|---|
| 66 | |
|---|
| 67 | } elseif {[regexp {^[#] ([^ ]+) } $l m a]} { |
|---|
| 68 | |
|---|
| 69 | array set ta_rep [list $a {}] |
|---|
| 70 | } |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | puts $url |
|---|
| 74 | puts ---- |
|---|
| 75 | |
|---|
| 76 | foreach {k v} [array get ta_rep] { |
|---|
| 77 | |
|---|
| 78 | puts [format "%20s | %-56s" $k [string range $v 0 55]] |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | puts ---- |
|---|
Note: See
TracBrowser
for help on using the repository browser.