Bash Examples
#xml-parser
#/bin/sh
#sed -e '/, Artikel/d' test.xml > temp2.xml
#sed -e '/, Text/d' temp2.xml > temp.xml
#sed -e '/, Kontaktformular/d' temp.xml > temp2.xml
sed -e "s/<z:row/<item>/" test.xml > temp.xml
sed -e "s/\/>/<\/description2><\/item>/" temp.xml > temp2.xml
sed -e "s/ows_Title='/<title>/" temp2.xml > temp.xml
sed -e "s/' ows_OvMetaDate='/<\/title><pubDate>/" temp.xml > temp2.xml
sed -e "s/' ows_OvMetaText='/<\/pubDate><description>/" temp2.xml > temp.xml
sed -e "s/' ows_Text1='/<\/description><description2>/" temp.xml > temp2.xml
i=1
text=""
temp=""
cat temp2.xml |
while read line
do
i=`expr $i + 1`
if [ ${line:0:6} == "<item>" ]
then
if [ `echo $temp | awk '{ print index($0,"</item>") }'` -gt 0 ]
then
echo $temp | awk '{ print index($0,"<description>") }'
if [ `echo $temp | awk '{ print index($0,"<description>") }'` -gt 0 ]
then
text=$text$temp
fi
fi
temp=""
fi
temp=$temp$line
if [ ${line:0:6} == "</xml>" ]
then
echo $text > testfile
fi
done
echo $temp
#html parser 2. version
#!/bin/sh
wget http://www.quickline.com/Pages/default.aspx?flash=off &&
awk '/News Area/,/Promo Area/{ if (/News Area/ || /Promo Area/) next; print }' default.aspx\?flash\=off | sed 's/.*\/div><div/<div/' > temp.html &&
rm -rf default.aspx\?flash\=off
if [ `cat temp.html | wc -m` -gt 100 ] && [ `cat temp.html | wc -l` -lt 5 ]
then
echo "news has been parsed!"
cp temp.html news.html
else
echo "There was an error!"
fi
rm -rf temp.html
