User:Crowley666/dump

維基詞典,自由的多語言詞典
bzcat zhwiktionary-20210701-pages-articles.xml.bz2 | awk '/^      <text/,/<[/]text>/'
# 左右等号数不匹配
grep -E -e '^=(=+)[^=]+\1$' -e '<title>' zhwiktionary-20210820-pages-articles.xml | grep -B 1 '=='
grep -E -e '^(=+)[^=]+\1=$' -e '<title>' zhwiktionary-20210820-pages-articles.xml | grep -B 1 '=='
$ time grep '\[\[en:' zhwiktionary-20210701-pages-articles.xml > /tmp/tmp
real	0m0.678s
user	0m0.493s
sys	0m0.182s
$ time awk -F '>' 'match($0, /^    <title>([^<]*)/, a){title=a[1]; next}/\{\{m[|}]/{print title;print; next}' zhwiktionary-20210701-pages-articles.xml > /tmp/tmp
real	0m53.171s
user	0m52.119s
sys	0m0.368s
$ time bzip2 -cdk zhwiktionary-20210701-pages-articles.xml.bz2 | awk '/\[\[en:/' > /tmp/tmp
real	0m48.249s
user	0m59.468s
sys	0m1.473s
$ time bzgrep '\[\[en:' zhwiktionary-20210701-pages-articles.xml.bz2 > /tmp/tmp
real	0m50.168s
user	0m50.792s
sys	0m1.694s
$ time LANG=C bzgrep '\[\[en:' zhwiktionary-20210701-pages-articles.xml.bz2 > /tmp/tmp
real	1m27.231s
user	1m26.125s
sys	0m3.279s
$ time bzip2 -cdk zhwiktionary-20210701-pages-articles.xml.bz2 > /dev/null
real	0m45.721s
user	0m45.433s
sys	0m0.113s
$ time bzcat enwiktionary-20210701-pages-articles.xml.bz2 > /dev/null
real	5m14.347s
user	5m10.504s
sys	0m1.505s