Part of Programming Office PIA and OpenXML SDK
Our tasks:
1. Get the total word count of the file
1. Extract the content text in the doc/docx files
2. Modify the sentences in the doc/docx files, replace with a sequence tag #ORDER#
Develop environment:
Visual Studio 2010
Office 2007
Firstly, add the COM reference of Microsoft.Office.Interop.Word in your project.
Change the Embed Interop Types to false.
static object Unknown = Type.Missing;
ApplicationClass app = new ApplicationClass();
app.Visible = true;
object filePath = (object)”c:\\test1.docx”;
Document doc;
// Open the document
var doc = (DocumentClass)app.Documents.Open(ref filePath, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown,
ref Unknown, ref Unknown);
// get total word count of the document
WdStatistic stat = WdStatistic.wdStatisticWords;
int num = doc.ComputeStatistics(stat, ref Unknown);
// get the sentences
Sentences sentences = doc.Sentences;
System.Collections.IEnumerator ie = sentences.GetEnumerator();
for (int i = sentences.Count; i >= 1; i–)
{
// select the sentence range
object startLocation = sentences[i].Start;
object endLocation = sentences[i].End – 1;
Range rng = doc.Range(ref startLocation, ref endLocation);
rng.Select();
app.Selection.TypeText(“#”+ i + “#”);
// add your words
// app.Selection.Font.Bold = -1;
// app.Selection.TypeText(“test”);
// sentences[i].Revisions.AcceptAll();
// get the xml of the range
content += sentences[i].XML + “\r\n”;
// get the text of the sentence
//sentences[i].Text;
}
app.Quit();



