cleanMSProject.sh

 

### cleanoffice.sh
###
### Clean redundancies from MS Office generated HTML files
###
###
### If xyz found as part of a sentence,
### move to its own line
###
###     sed "s/xyz/\\
###     xyz/g" $1 > $1.tmp
###     mv $1.tmp $1
###
###


echo "Cleaning $1"
file=$1


sed "s/<\!--MSProjectTemplate_ProjectTitle-->//g" $file > $file.tmp
mv $file.tmp $file
sed "s/<\!--MSProjectTemplate_Image-->//g" $file > $file.tmp
mv $file.tmp $file
sed "s/<\!--MSProjectTemplate_StartDate-->//g" $file > $file.tmp
mv $file.tmp $file
sed "s/<\!--MSProjectTemplate_FinishDate-->//g" $file > $file.tmp
mv $file.tmp $file
sed "s/<\!--MSProjectTemplate_TaskTableTitle-->//g" $file > $file.tmp
mv $file.tmp $file
sed "s/<\!--MSProjectTemplate_TaskTable-->//g" $file > $file.tmp
mv $file.tmp $file


sed "s/<\!--MSProjectTemplate_ResourceTableTitle-->//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<\!--MSProjectTemplate_ResourceTable-->//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<\!--MSProjectTemplate_AssignmentTableTitle-->//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<\!--MSProjectTemplate_AssignmentTable-->//g" $file > $file.tmp
mv $file.tmp $file



sed "s/BGCOLOR=\".......\"//g" $file > $file.tmp
mv $file.tmp $file


sed "s/
//g" $file > $file.tmp
mv $file.tmp $file


sed "s/<TH/\\
    <th/g" $file > $file.tmp
mv $file.tmp $file

sed "s/<TD/\\
    <td/g" $file > $file.tmp
mv $file.tmp $file


sed "s/<DIV STYLE=\"..........\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\"...........\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\"............\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\".............\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\"..............\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\"...............\">//g" $file > $file.tmp
mv $file.tmp $file

sed "s/<DIV STYLE=\"................\">//g" $file > $file.tmp
mv $file.tmp $file



sed "s/<TR >/<tr>/g" $file > $file.tmp
mv $file.tmp $file

sed "s,</TH>,,g" $file > $file.tmp
mv $file.tmp $file

sed "s,</TD>,,g" $file > $file.tmp
mv $file.tmp $file

sed "s,</TR>,,g" $file > $file.tmp
mv $file.tmp $file

sed "s,</DIV>,,g" $file > $file.tmp
mv $file.tmp $file


sed "s,<B>,<b>,g" $file > $file.tmp
mv $file.tmp $file

sed "s,</B>,</b>,g" $file > $file.tmp
mv $file.tmp $file

sed "s/NOWRAP/nowrap/g" $file > $file.tmp
mv $file.tmp $file

sed "s/ALIGN/align/g" $file > $file.tmp
mv $file.tmp $file


sed "s/<TR  align=right>/<tr align=right>/g" $file > $file.tmp
mv $file.tmp $file


sed "s/<BODY>/<body><blockquote>/g" $file > $file.tmp
mv $file.tmp $file

sed "s,</BODY>,<p>\&nbsp;</p><p>\&nbsp;</p>This document was cleaned using the cleanMSProject.sh script. <p>\&nbsp;</p><p>\&nbsp;</p><p>\&nbsp;</p></body>,g" $file > $file.tmp
mv $file.tmp $file


sed "s/<TABLE BORDER>/<table cellpadding=10 border=1>/g" $file > $file.tmp
mv $file.tmp $file