May 19, 2004
Searching result scripts::[Search Engine]

Thanks for Chedong, I remade the script he suggested to me and generate a new script which can find out the keywords and make a easy stastic from access_log:
The searching result for this website seems really funny and we do can find sth interest here: Searching result of wespoke.com.
Here is script
#NAME="access_log.`date +%m-%d-%y`"
NAME=$1
grep baidu.com/baidu $NAME > baidu.search
cat baidu.search | gawk ' { print $7 } ' | sort | uniq > list.search
cat baidu.search | sed "s/\ /#/g" > baidu.tmp
mv baidu.tmp baidu.search
echo ""
echo "===============Baidu===============
"
for VAL in `cat list.search`
do
COUNT=`grep "#$VAL"# baidu.search | wc -l `
echo "[$COUNT] http://blog.wespoke.com/$VAL"
echo "
"
grep "#$VAL"# baidu.search | tr "#" ' '|awk '{print $11}' | perl -pe 's/\\x(\w+)/%\1/gi' |perl -p -e 's/%(..)/pack("c", hex($1))/eg' | perl -pe 's/(.*)?(word=(.*?))[&"].*/$3/gi' > keys.search
for VAR in `cat keys.search | sort | uniq `
do
KEYCOUNT=`grep $VAR keys.search | wc -l `
echo "[$KEYCOUNT]$VAR||"
done
echo "
"
done
rm baidu.search list.search keys.search
echo "===============Google===============
"
NAME="access_log.`date +%m-%d-%y`"
grep google.com $NAME > google.search
cat google.search | gawk ' { print $7 } ' | sort | uniq > list.search
cat google.search | sed "s/\ /#/g" > google.tmp
mv google.tmp google.search
for VAL in `cat list.search`
do
COUNT=`grep "#$VAL"# google.search |grep search | wc -l `
echo "[$COUNT] http://blog.wespoke.com/$VAL"
echo "
"
grep "#$VAL"# google.search | grep search | tr "#" ' ' | awk '{print $11}' | perl -pe 's/\\x(\w+)/%\1/gi' | perl -p -e 's/%(..)/pack("c", hex($1))/eg' | perl -pe 's/(.*)?(q=(.*?))[&"].*/$3/gi' | jv-convert --from utf-8 -to gb18030 > keys.search
for VAR in `cat keys.search | sort | uniq `
do
KEYCOUNT=`grep $VAR keys.search | wc -l `
echo "[$KEYCOUNT]$VAR||"
done
echo "
"
done
echo "===============3721===============
"
grep 3721.com $NAME > 3721.search
cat 3721.search | gawk ' { print $7 } ' | sort | uniq > list.search
cat 3721.search | sed "s/\ /#/g" > 3721.tmp
mv 3721.tmp 3721.search
echo ""
for VAL in `cat list.search`
do
COUNT=`grep "#$VAL"# 3721.search | wc -l `
echo "[$COUNT] http://blog.wespoke.com/$VAL"
echo "
"
grep "#$VAL"# 3721.search | tr "#" ' '|awk '{print $11}' | perl -pe 's/\\x(\w+)/%\1/gi' |perl -p -e 's/%(..)/pack("c",hex($1))/eg' | perl -pe 's/(.*)?((p|name)=(.*?))[&"].*/$4/gi' > keys.search
for VAR in `cat keys.search`
do
echo "$VAR||"
done
echo "
"
done
echo "===============Yahoo===============
"
grep search.yahoo.com $NAME > yahoo.search
cat yahoo.search | gawk ' { print $7 } ' | sort | uniq > list.search
cat yahoo.search | sed "s/\ /#/g" > yahoo.tmp
mv yahoo.tmp yahoo.search
echo ""
for VAL in `cat list.search`
do
COUNT=`grep "#$VAL"# yahoo.search | wc -l `
echo "[$COUNT] http://blog.wespoke.com/$VAL"
echo "
"
grep "#$VAL"# yahoo.search | tr "#" ' '|awk '{print $11}' | perl -pe 's/\\x(\w+)/%\1/gi' |perl -p -e 's/%(..)/pack("c", hex($1))/eg' | perl -pe 's/(.*)?(p=(.*?))[&"].*/$3/gi' > keys.search
for VAR in `cat keys.search | sort | uniq `
do
KEYCOUNT=`grep $VAR keys.search | wc -l `
echo "[$KEYCOUNT]$VAR||"
done
echo "
"
done
echo "===============Sina===============
"
grep sina.com.cn $NAME > sina.search
cat sina.search | gawk ' { print $7 } ' | sort | uniq > list.search
cat sina.search | sed "s/\ /#/g" > sina.tmp
mv sina.tmp sina.search
echo ""
for VAL in `cat list.search`
do
COUNT=`grep "#$VAL"# sina.search | wc -l `
echo "[$COUNT] http://blog.wespoke.com/$VAL"
echo "
"
grep "#$VAL"# sina.search | tr "#" ' '|awk '{print $11}' | perl -pe 's/\\x(\w+)/%\1/gi' | perl -p -e 's/%(..)/pack("c", hex($1))/eg' | perl -pe 's/(.*)?((_searchkey|word)=(.*?))[&"].*/$4/gi' > keys.search
for VAR in `cat keys.search | sort | uniq `
do
KEYCOUNT=`grep $VAR keys.search | wc -l `
echo "[$KEYCOUNT]$VAR||"
done
echo "
"
done
echo ""
rm google.search list.search keys.search
Trackback
You can ping this entry by using http://www.wespoke.com/cgi-bin/mt/mt-tb.cgi/463
