clean.sh

 

##
## clean.sh
##
## Clean documentation
##
 
 
 
 for i in `find . -name \*.html -print`
 do
 
 echo $i
 
#####################################################################
#
#
# Generic cleaning one-liners
#
#
#
#
#####################################################################


#
# Add blank line after string
#
#    sed '/string/a\
#    ' $i > tmp.out
#    mv tmp.out $i
#


##
## Add line above string
##
##  sed '/<blockquote>/i\
##  ' $i > tmp.out
##  mv tmp.out $i




## Delete text between phrases
##
## sed '/Last update/,/-- Body text begins here --/d' $i > tmp.out
## mv tmp.out $i
##




### If line ends with phrase, join with line below
###
###   sed -e :a -e "/phrase$/N;s/\n//;ta" $i > tmp.out
###   mv tmp.out $i
###


###
### WebLogic stuff
###
sed "s/ leftmargin=\".\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ marginheight=\".\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ marginwidth=\".\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ topmargin=\".\"//g" $i > tmp.out
mv tmp.out $i
grep -v "floatwin.js" $i > tmp.out
mv tmp.out $i
grep -v "map name=" $i > tmp.out
mv tmp.out $i
grep -v "area coords=" $i > tmp.out
mv tmp.out $i
grep -v "area alt=" $i > tmp.out
mv tmp.out $i
grep -v "dev2dev.bea.com/images/styles.css" $i > tmp.out
mv tmp.out $i
grep -v "</map>" $i > tmp.out
mv tmp.out $i
grep -v "\.css\"" $i > tmp.out
mv tmp.out $i


sed -e :a -e "$!N;s/\n--\&gt;/--\&gt;/;ta" -e "P;D" $i > $i.tmp
mv $i.tmp $i



sed "/<p class=\"chaphead\">/s/$/<\/h1>/g" $i > tmp.out
mv tmp.out $i

sed "/<p class=\"head1\">/s/$/<\/h2>/g" $i > tmp.out
mv tmp.out $i

sed "/<p class=\"head2\">/s/$/<\/h3>/g" $i > tmp.out
mv tmp.out $i

sed "/<p class=\"head3\">/s/$/<\/h4>/g" $i > tmp.out
mv tmp.out $i

sed "/<p class=\"head4\">/s/$/<\/h5>/g" $i > tmp.out
mv tmp.out $i

sed "s/<p class=\"chaphead\">/<p>\&nbsp;<\/p><h2>/" $i > tmp.out
mv tmp.out $i

sed "s/<p class=\"head1\">/<h2>/" $i > tmp.out
mv tmp.out $i

sed "s/<p class=\"head2\">/<h3>/" $i > tmp.out
mv tmp.out $i

sed "s/<p class=\"head3\">/<h4>/" $i > tmp.out
mv tmp.out $i

sed "s/<p class=\"head4\">/<h5>/" $i > tmp.out
mv tmp.out $i

sed "s/-- CONTENT  --/-- CONTENT --/" $i > tmp.out
mv tmp.out $i

sed "/<table cellpadding=/,/-- CONTENT --/d" $i > tmp.out
mv tmp.out $i

sed "/-- BACK TO TOP --/,/-- footer --/d" $i > tmp.out
mv tmp.out $i

sed "/-- FOOTER start --/,/-- FOOTER end --/d" $i > tmp.out
mv tmp.out $i


#sed "s/<a name=\".*\"><\/a>//g" $i > tmp.out
#mv tmp.out $i






##
## Lowercase
##

sed "s/<\/UL>/<\/ul>/g" $i > tmp.out
mv tmp.out $i

sed "s/<UL>/<ul>/g" $i > tmp.out
mv tmp.out $i

sed "s/<\/OL>/<\/ol>/g" $i > tmp.out
mv tmp.out $i

sed "s/<OL>/<ol>/g" $i > tmp.out
mv tmp.out $i

sed "s/<HTML>/<html>/g"  $i > tmp.out
mv tmp.out $i

sed "s/<\/HTML>/<\/html>/g"  $i > tmp.out
mv tmp.out $i

sed "s/<HEAD>/<head>/g" $i > tmp.out
mv tmp.out $i

sed "s/<\/HEAD>/<\/head>/g" $i > tmp.out
mv tmp.out $i

sed "s/<TITLE>/<title>/g" $i > tmp.out
mv tmp.out $i

sed "s:</TITLE>:</title>:" $i > tmp.out
mv tmp.out $i

sed "s/<BODY/<body/g" $i > tmp.out
mv tmp.out $i

sed "s:</BODY>:</body>:" $i > tmp.out
mv tmp.out $i

sed "s/<TABLE/<table/" $i > tmp.out
mv tmp.out $i

sed "s:</TABLE>:</table>:" $i > tmp.out
mv tmp.out $i

sed "s/A HREF/a href/" $i > tmp.out
mv tmp.out $i

sed "s/H1>/h1>/g" $i > tmp.out
mv tmp.out $i

sed "s/H2>/h2>/g" $i > tmp.out
mv tmp.out $i

sed "s/H3>/h3>/g" $i > tmp.out
mv tmp.out $i

sed "s/H4>/h4>/g" $i > tmp.out
mv tmp.out $i

sed "s/<A NAME/<a name/g" $i > tmp.out
mv tmp.out $i

sed "s/<P>/<p>/g" $i > tmp.out
mv tmp.out $i

sed "s:</P>:</p>:" $i > tmp.out
mv tmp.out $i

sed "s/<TD/<td/" $i > tmp.out
mv tmp.out $i

sed "s:</TD>:</td>:" $i > tmp.out
mv tmp.out $i

sed "s/TR>/tr>/" $i > tmp.out
mv tmp.out $i

sed "s/<TR/<tr/" $i > tmp.out
mv tmp.out $i

sed "s/<CODE/<code/" $i > tmp.out
mv tmp.out $i

sed "s:</CODE>:</code>:" $i > tmp.out
mv tmp.out $i

sed "s/WIDTH/width/g" $i > tmp.out
mv tmp.out $i

sed "s/HEIGHT/height/g" $i > tmp.out
mv tmp.out $i

sed "s/BGCOLOR/bgcolor/" $i > tmp.out
mv tmp.out $i

sed "s/<FONT/<font/" $i > tmp.out
mv tmp.out $i

sed "s/<\/FONT>/<\/font>/" $i > tmp.out
mv tmp.out $i

sed "s/SIZE/size/g" $i > tmp.out
mv tmp.out $i

sed "s/CLASS/class/g" $i > tmp.out
mv tmp.out $i

sed "s/CELLSPACING/cellspacing/g" $i > tmp.out
mv tmp.out $i

sed "s/CELLPADDING/cellpadding/g" $i > tmp.out
mv tmp.out $i



##
## Add blank line under ul tags
##
sed '/<\/ul>/a\
' $i > tmp.out
mv tmp.out $i


##
## Add blank line under ol tags
##
sed '/<\/ul>/a\
' $i > tmp.out
mv tmp.out $i






##
## Remove table borders
##

sed "s/border=\".\"/border=0/" $i > tmp.out
mv tmp.out $i

sed "s/BORDER=\".\"/border=0/" $i > tmp.out
mv tmp.out $i




## Get rid of font sizes

sed "s/<font size=\".\">//g" $i > tmp.out
mv tmp.out $i

sed "s/<font size=\"..\">//g" $i > tmp.out
mv tmp.out $i

sed "s/<font size=\"...\">//g" $i > tmp.out
mv tmp.out $i

sed "s/<font size=\"....\">//g" $i > tmp.out
mv tmp.out $i

sed "s/<font size=\".....\">//g" $i > tmp.out
mv tmp.out $i

sed "s/<font size=\"......\">//g" $i > tmp.out
mv tmp.out $i








##
## Clean out CLASS tags
##

sed "s/class=\".\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"..\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"...\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"....\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\".....\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"......\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\".......\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"........\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\".........\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"..........\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"...........\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"............\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\".............\"//g" $i > tmp.out
mv tmp.out $i
sed "s/class=\"..............\"//g" $i > tmp.out
mv tmp.out $i





##
## Cellspacing
##
sed "s/cellspacing=.//" $i > tmp.out
mv tmp.out $i
sed "s/cellspacing=...//" $i > tmp.out
mv tmp.out $i




## 
## cellpadding
## 

sed "s/cellpadding=\"1\"/cellpadding=10/" $i > tmp.out
mv tmp.out $i




##
## Remove code tags
##

sed "s:<code>::g" $i > tmp.out
mv tmp.out $i
sed "s:</code>::g" $i > tmp.out
mv tmp.out $i



##
## Add blank lines 
##

sed "s/<HR>/<p><hr>/g" $i > tmp.out
mv tmp.out $i

sed "s:<h2>:<p>\&nbsp;</p><h2>:g" $i > tmp.out
mv tmp.out $i

sed "s:<H2>:<p>\&nbsp;</p><h2>:g" $i > tmp.out
mv tmp.out $i

sed "s:<H3>:<p>\&nbsp;</p><h3>:g" $i > tmp.out
mv tmp.out $i

sed "s:<h3>:<p>\&nbsp;</p><h3>:g" $i > tmp.out
mv tmp.out $i



##
## Remove WIDTH attribute
##

sed "s/ width=\".\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\"..\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\"...\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\"....\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\".....\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\"......\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ width=\".\"//g" $i > tmp.out
mv tmp.out $i




##
## Remove HEIGHT attribute
##

sed "s/ height=\".\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\"..\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\"...\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\"....\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\".....\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\"......\"//g" $i > tmp.out
mv tmp.out $i

sed "s/ height=\".\"//g" $i > tmp.out
mv tmp.out $i




##
## Remove windows line feeds
##

sed "s/
//g" $i > tmp.out
mv tmp.out $i



##
## Remove multiple space elements
##
sed "s/\&nbsp;\&nbsp;/  /g" $i > tmp.out
mv tmp.out $i



## Add a blank line before a table

sed "s/<table/<p>\&nbsp;<\/p><table/g" $i > tmp.out
mv tmp.out $i



## Put tables between blockquotes

sed "s:<table:<blockquote><table:g" $i > tmp.out
mv tmp.out $i
sed "s:</table>:</table></blockquote>:g" $i > tmp.out
mv tmp.out $i



##
## Top align table rows
##

sed "s/<td>/<td valign="top">/" $i > tmp.out
mv tmp.out $i



##
## Add stylesheet
##

sed "s:</head>:<meta content=text/css http-equiv=Content-Style-Type> <link href=\"http\://www.setgetweb.com/p/black.css\" rel=\"stylesheet\" type=\"text/css\"></head>:"  $i > tmp.out
mv tmp.out $i




## 
## Remove color tags
## 

sed "s/ bgcolor=\".\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\"..\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\"...\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\"....\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\".....\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\"......\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\".......\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\".......\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\"........\"//g" $i > tmp.out
mv tmp.out $i
sed "s/ bgcolor=\".........\"//g" $i > tmp.out
mv tmp.out $i




##
## Add font style and blockquotes
##

sed "s/<body>/<body><blockquote><blockquote><font class=grey2>/" $i > tmp.out
mv tmp.out $i

sed "s/<\/font>//g" $i > tmp.out
mv tmp.out $i

sed '/BEGIN META TAGS/,/END META TAGS/d' $i > tmp.out
mv tmp.out $i



##
## Add blank line under <br> tags
##
sed '/<\/br>/a\
' $i > tmp.out
mv tmp.out $i


####
####  Convert dd/dt/dl tags into table tags
####
 
 sed "s/<dl>/<p><blockquote><table cellpadding=10>/g" $i > tmp.out
 mv tmp.out $i
 sed "s/<\/dl>/<\/table><\/blockquote>/g" $i > tmp.out
 mv tmp.out $i
 sed "s/<dt>/<tr><td valign=top>/g" $i > tmp.out
 mv tmp.out $i
 sed "s/<\/dt>/<\/td>/g" $i > tmp.out
 mv tmp.out $i
 sed "s/<dd>/<td valign=top>/g" $i > tmp.out
 mv tmp.out $i
 sed "s/<\/dd>/<\/td><\/tr>/g" $i > tmp.out
 mv tmp.out $i
 
 




sed "s/<sup>TM<\/sup>//" $i > tmp.out
mv tmp.out $i


 sed "s/<font>//" $i > tmp.out
 mv tmp.out $i

 sed "s/<font >//" $i > tmp.out
 mv tmp.out $i

 sed "s/ TARGET=_blank//g" $i > tmp.out
 mv tmp.out $i


sed "s/<pre>/<blockquote><pre>/" $i > tmp.out
mv tmp.out $i

sed "s/<\/pre>/<\/pre><\/blockquote>/" $i > tmp.out
mv tmp.out $i




sed "s/<div>//" $i > tmp.out
mv tmp.out $i

sed "s/<div align=\"CENTER\">//" $i > tmp.out
mv tmp.out $i

sed "s/<\/div>//" $i > tmp.out
mv tmp.out $i

sed "s/<strong>//" $i > tmp.out
mv tmp.out $i

sed "s/<\/strong>//" $i > tmp.out
mv tmp.out $i
 
 
 done