@@ -22,6 +22,8 @@ public class RtfToDocxConverter : ITextToDocxConverter
2222 private List < ( int R , int G , int B ) > colorTable = new ( ) ;
2323 private Encoding ? codePageEncoding ;
2424 private BorderType ? currentBorder ;
25+ private SectionProperties ? defaultSectPr ;
26+ private SectionProperties ? currentSectPr ;
2527
2628#if ! NETFRAMEWORK
2729 static RtfToDocxConverter ( )
@@ -57,6 +59,13 @@ static RtfToDocxConverter()
5759 /// <param name="targetDocument"></param>
5860 public void BuildDocx ( TextReader input , WordprocessingDocument targetDocument )
5961 {
62+ fontTable = new ( ) ;
63+ colorTable = new ( ) ;
64+ codePageEncoding = null ;
65+ currentBorder = null ;
66+ defaultSectPr = null ;
67+ currentSectPr = null ;
68+
6069 if ( targetDocument . MainDocumentPart == null )
6170 targetDocument . AddMainDocumentPart ( ) ;
6271
@@ -67,6 +76,23 @@ public void BuildDocx(TextReader input, WordprocessingDocument targetDocument)
6776
6877 var rtfDocument = RtfReader . ReadRtf ( input ) ;
6978 ConvertGroup ( rtfDocument . Root , targetDocument . MainDocumentPart . Document . Body , targetDocument . MainDocumentPart ) ;
79+
80+ if ( ! targetDocument . MainDocumentPart . Document . Body . Descendants < SectionProperties > ( ) . Any ( ) )
81+ {
82+ // If the document does not contain sections, add the default section properties as last body element,
83+ // so that it's applied by default in DOCX too.
84+ // This preserves page size and other properties if they are specified as document-level settings only (\paperw, \paperh, ...)
85+ // but no section is present.
86+ if ( defaultSectPr != null )
87+ targetDocument . MainDocumentPart . Document . Body . AppendChild ( defaultSectPr . CloneNode ( true ) ) ;
88+ }
89+ else
90+ {
91+ // If at least a section was created, add the last section properties (that was not added to a paragraph)
92+ // as last body element, so that it's applied by default to new DOCX sections.
93+ if ( currentSectPr != null )
94+ targetDocument . MainDocumentPart . Document . Body . AppendChild ( currentSectPr . CloneNode ( true ) ) ;
95+ }
7096 }
7197
7298 private void ConvertGroup ( RtfGroup group , OpenXmlElement parentElement , MainDocumentPart targetDocument )
@@ -103,7 +129,7 @@ private void ConvertGroup(RtfGroup group, OpenXmlElement parentElement, MainDocu
103129 var pPr = new ParagraphProperties ( ) ;
104130 Paragraph ? currentParagraph = null ;
105131 Run ? currentRun = null ;
106- ConvertGroupInner ( group , parentElement , targetDocument , fmtStack , pPr , ref currentParagraph , ref currentRun ) ;
132+ ConvertGroupInner ( group , parentElement , targetDocument , fmtStack , pPr , currentParagraph , currentRun ) ;
107133 }
108134
109135 private FormattingState TryPeek ( Stack < FormattingState > stack )
@@ -119,7 +145,7 @@ private void TryPop(Stack<FormattingState> stack)
119145 stack . Pop ( ) ;
120146 }
121147
122- private void ConvertGroupInner ( RtfGroup group , OpenXmlElement parentElement , MainDocumentPart targetDocument , Stack < FormattingState > fmtStack , ParagraphProperties pPr , ref Paragraph ? currentParagraph , ref Run ? currentRun )
148+ private void ConvertGroupInner ( RtfGroup group , OpenXmlElement parentElement , MainDocumentPart targetDocument , Stack < FormattingState > fmtStack , ParagraphProperties pPr , Paragraph ? currentParagraph , Run ? currentRun )
123149 {
124150 // push a clone for this group's local modifications
125151 fmtStack . Push ( TryPeek ( fmtStack ) . Clone ( ) ) ;
@@ -133,7 +159,8 @@ private void ConvertGroupInner(RtfGroup group, OpenXmlElement parentElement, Mai
133159 {
134160 if ( destination . IsIgnorable )
135161 {
136- // This subgroup is an ignorable destination (starts with *), skip it for now
162+ // This subgroup is an ignorable destination (starts with *), skip it for now.
163+ // In the future, we should support at least listtable and listoverridetable.
137164 continue ;
138165 }
139166 else
@@ -150,12 +177,36 @@ private void ConvertGroupInner(RtfGroup group, OpenXmlElement parentElement, Mai
150177 ParseColorTable ( destination ) ;
151178 continue ;
152179 }
180+ else if ( dname == "header" )
181+ {
182+ }
183+ else if ( dname == "headerf" )
184+ {
185+ }
186+ else if ( dname == "headerl" )
187+ {
188+ }
189+ else if ( dname == "headerr" )
190+ {
191+ }
192+ else if ( dname == "footer" )
193+ {
194+ }
195+ else if ( dname == "footerf" )
196+ {
197+ }
198+ else if ( dname == "footerl" )
199+ {
200+ }
201+ else if ( dname == "footerr" )
202+ {
203+ }
153204 else if ( dname == "upr" )
154205 {
155206 // Process the Unicode group only, ignore the ANSI equivalent
156207 var udGroup = group . Tokens . OfType < RtfDestination > ( ) . FirstOrDefault ( d => d . Name == "ud" ) ;
157208 if ( udGroup != null )
158- ConvertGroupInner ( udGroup , parentElement , targetDocument , fmtStack , pPr , ref currentParagraph , ref currentRun ) ;
209+ ConvertGroupInner ( udGroup , parentElement , targetDocument , fmtStack , pPr , currentParagraph , currentRun ) ;
159210
160211 continue ;
161212 }
@@ -172,7 +223,7 @@ private void ConvertGroupInner(RtfGroup group, OpenXmlElement parentElement, Mai
172223 }
173224
174225 // Recurse
175- ConvertGroupInner ( subGroup , parentElement , targetDocument , fmtStack , pPr , ref currentParagraph , ref currentRun ) ;
226+ ConvertGroupInner ( subGroup , parentElement , targetDocument , fmtStack , pPr , currentParagraph , currentRun ) ;
176227 break ;
177228 case RtfControlWord cw :
178229 HandleControlWord ( cw , ref currentParagraph , ref currentRun , parentElement , TryPeek ( fmtStack ) , pPr ) ;
@@ -181,18 +232,18 @@ private void ConvertGroupInner(RtfGroup group, OpenXmlElement parentElement, Mai
181232 // Ensure paragraph and run exist
182233 var encoding = codePageEncoding ?? Encoding . GetEncoding ( CultureInfo . CurrentCulture . TextInfo . ANSICodePage ) ;
183234 string s = encoding . GetString ( [ ch . CharCode ] ) ;
184- HandleText ( s , ref currentParagraph , ref currentRun , parentElement , TryPeek ( fmtStack ) , pPr ) ;
235+ HandleText ( s , currentParagraph , currentRun , parentElement , TryPeek ( fmtStack ) , pPr ) ;
185236 break ;
186237 case RtfText text :
187- HandleText ( text . Text , ref currentParagraph , ref currentRun , parentElement , TryPeek ( fmtStack ) , pPr ) ;
238+ HandleText ( text . Text , currentParagraph , currentRun , parentElement , TryPeek ( fmtStack ) , pPr ) ;
188239 break ;
189240 }
190241 }
191242 // restore parent formatting state
192243 TryPop ( fmtStack ) ;
193244 }
194245
195- private void HandleText ( string text , ref Paragraph ? currentParagraph , ref Run ? currentRun , OpenXmlElement parentElement , FormattingState runState , ParagraphProperties pPr )
246+ private void HandleText ( string text , Paragraph ? currentParagraph , Run ? currentRun , OpenXmlElement parentElement , FormattingState runState , ParagraphProperties pPr )
196247 {
197248 text ??= string . Empty ;
198249
@@ -232,17 +283,23 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
232283 switch ( name )
233284 {
234285 case "sect" :
235- // end current section
236- // TODO
286+ // End current section
287+ if ( parentElement is Body body )
288+ {
289+ EnsureParagraph ( ref currentParagraph , ref currentRun , parentElement , pPr ) ;
290+ currentParagraph ! . ParagraphProperties ??= new ParagraphProperties ( ) ;
291+ currentSectPr ??= new SectionProperties ( ) ;
292+ currentParagraph . ParagraphProperties . SectionProperties = ( SectionProperties ) currentSectPr . CloneNode ( true ) ;
293+ }
237294 break ;
238295 case "par" :
239- // end current paragraph
296+ // End current paragraph
240297 currentParagraph = null ;
241298 currentRun = null ;
242299 break ;
243300
244301 case "sectd" : // reset section formatting
245- // sectionState.Clear ();
302+ ResetSectionProperties ( ) ;
246303 break ;
247304 case "pard" : // reset paragraph formatting
248305 pPr . RemoveAllChildren ( ) ;
@@ -253,6 +310,7 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
253310 runState . Clear ( ) ;
254311 break ;
255312
313+ // RTF header
256314 case "ansi" :
257315 // If ANSI is specified, use the system ANSI code page,
258316 // unless the DefaultCodePage value is set to a different value.
@@ -293,6 +351,123 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
293351 }
294352 break ;
295353
354+ // Document settings
355+ case "paperw" :
356+ if ( cw . HasValue )
357+ {
358+ defaultSectPr ??= new SectionProperties ( ) ;
359+ var pageSize = defaultSectPr . GetFirstChild < PageSize > ( ) ?? defaultSectPr . AppendChild ( new PageSize ( ) ) ;
360+ pageSize . Width = ( uint ) cw . Value ! . Value ;
361+ }
362+ break ;
363+ case "paperh" :
364+ if ( cw . HasValue )
365+ {
366+ defaultSectPr ??= new SectionProperties ( ) ;
367+ var pageSize = defaultSectPr . GetFirstChild < PageSize > ( ) ?? defaultSectPr . AppendChild ( new PageSize ( ) ) ;
368+ pageSize . Height = ( uint ) cw . Value ! . Value ;
369+ }
370+ break ;
371+
372+ // Section properties
373+ case "lndscpsxn" :
374+ if ( cw . HasValue )
375+ {
376+ currentSectPr ??= new SectionProperties ( ) ;
377+ var pageSize = currentSectPr . GetFirstChild < PageSize > ( ) ?? currentSectPr . AppendChild ( new PageSize ( ) ) ;
378+ pageSize . Orient = PageOrientationValues . Landscape ;
379+ }
380+ break ;
381+ case "margbsxn" :
382+ if ( cw . HasValue )
383+ {
384+ currentSectPr ??= new SectionProperties ( ) ;
385+ var pageMargin = currentSectPr . GetFirstChild < PageMargin > ( ) ?? currentSectPr . AppendChild ( new PageMargin ( ) ) ;
386+ pageMargin . Bottom = cw . Value ! . Value ;
387+ }
388+ break ;
389+ case "marglsxn" :
390+ if ( cw . HasValue )
391+ {
392+ currentSectPr ??= new SectionProperties ( ) ;
393+ var pageMargin = currentSectPr . GetFirstChild < PageMargin > ( ) ?? currentSectPr . AppendChild ( new PageMargin ( ) ) ;
394+ pageMargin . Left = ( uint ) cw . Value ! . Value ;
395+ }
396+ break ;
397+ case "margrsxn" :
398+ if ( cw . HasValue )
399+ {
400+ currentSectPr ??= new SectionProperties ( ) ;
401+ var pageMargin = currentSectPr . GetFirstChild < PageMargin > ( ) ?? currentSectPr . AppendChild ( new PageMargin ( ) ) ;
402+ pageMargin . Right = ( uint ) cw . Value ! . Value ;
403+ }
404+ break ;
405+ case "margtsxn" :
406+ if ( cw . HasValue )
407+ {
408+ currentSectPr ??= new SectionProperties ( ) ;
409+ var pageMargin = currentSectPr . GetFirstChild < PageMargin > ( ) ?? currentSectPr . AppendChild ( new PageMargin ( ) ) ;
410+ pageMargin . Top = cw . Value ! . Value ;
411+ }
412+ break ;
413+ case "pgwsxn" :
414+ if ( cw . HasValue )
415+ {
416+ currentSectPr ??= new SectionProperties ( ) ;
417+ var pageSize = currentSectPr . GetFirstChild < PageSize > ( ) ?? currentSectPr . AppendChild ( new PageSize ( ) ) ;
418+ pageSize . Width = ( uint ) cw . Value ! . Value ;
419+ }
420+ break ;
421+ case "pghsxn" :
422+ if ( cw . HasValue )
423+ {
424+ currentSectPr ??= new SectionProperties ( ) ;
425+ var pageSize = currentSectPr . GetFirstChild < PageSize > ( ) ?? currentSectPr . AppendChild ( new PageSize ( ) ) ;
426+ pageSize . Height = ( uint ) cw . Value ! . Value ;
427+ }
428+ break ;
429+ case "sbknone" :
430+ if ( cw . HasValue )
431+ {
432+ currentSectPr ??= new SectionProperties ( ) ;
433+ var sectionType = currentSectPr . GetFirstChild < SectionType > ( ) ?? currentSectPr . AppendChild ( new SectionType ( ) ) ;
434+ sectionType . Val = SectionMarkValues . Continuous ;
435+ }
436+ break ;
437+ case "sbkcol" :
438+ if ( cw . HasValue )
439+ {
440+ currentSectPr ??= new SectionProperties ( ) ;
441+ var sectionType = currentSectPr . GetFirstChild < SectionType > ( ) ?? currentSectPr . AppendChild ( new SectionType ( ) ) ;
442+ sectionType . Val = SectionMarkValues . NextColumn ;
443+ }
444+ break ;
445+ case "sbkodd" :
446+ if ( cw . HasValue )
447+ {
448+ currentSectPr ??= new SectionProperties ( ) ;
449+ var sectionType = currentSectPr . GetFirstChild < SectionType > ( ) ?? currentSectPr . AppendChild ( new SectionType ( ) ) ;
450+ sectionType . Val = SectionMarkValues . OddPage ;
451+ }
452+ break ;
453+ case "sbkeven" :
454+ if ( cw . HasValue )
455+ {
456+ currentSectPr ??= new SectionProperties ( ) ;
457+ var sectionType = currentSectPr . GetFirstChild < SectionType > ( ) ?? currentSectPr . AppendChild ( new SectionType ( ) ) ;
458+ sectionType . Val = SectionMarkValues . EvenPage ;
459+ }
460+ break ;
461+ case "sbkpage" :
462+ if ( cw . HasValue )
463+ {
464+ currentSectPr ??= new SectionProperties ( ) ;
465+ var sectionType = currentSectPr . GetFirstChild < SectionType > ( ) ?? currentSectPr . AppendChild ( new SectionType ( ) ) ;
466+ sectionType . Val = SectionMarkValues . NextPage ;
467+ }
468+ break ;
469+
470+ // Breaks
296471 case "line" :
297472 // text-wrapping line break. Avoid emitting duplicate breaks when previous token
298473 // already produced a text-wrapping break (some RTF producers emit both \line and \lbr).
@@ -341,6 +516,7 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
341516 }
342517 break ;
343518
519+ // Special characters
344520 // TODO: use the current culture specified in RTF for the fallback string of chdate and chtime
345521 case "chdate" :
346522 CreateField ( "date" , DateTime . Now . ToShortDateString ( ) , ref currentParagraph , ref currentRun , parentElement , runState , pPr ) ;
@@ -360,7 +536,6 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
360536 case "sectnum" : // TODO: keep track of the current section number and write it as fallback
361537 CreateSimpleField ( " SECTION \\ * MERGEFORMAT " , "1" , ref currentParagraph , ref currentRun , parentElement , runState , pPr ) ;
362538 break ;
363-
364539 // TODO: create comments and footnotes/endnotes (followed by the content group)
365540 // case "chatn":
366541 // break;
@@ -412,7 +587,7 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
412587 charCode += 65536 ;
413588 }
414589 string s = char . ConvertFromUtf32 ( charCode ) ;
415- HandleText ( s , ref currentParagraph , ref currentRun , parentElement , runState , pPr ) ;
590+ HandleText ( s , currentParagraph , currentRun , parentElement , runState , pPr ) ;
416591 // After emitting the Unicode character, the RTF specification says that
417592 // the following "uc" ANSI characters should be ignored. Track how many
418593 // to skip on the formatting state so subsequent text tokens can consume them.
@@ -711,6 +886,7 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
711886 pPr . TextAlignment = new TextAlignment ( ) { Val = VerticalTextAlignmentValues . Baseline } ;
712887 break ;
713888 case "favar" :
889+ case "fafixed" :
714890 pPr . TextAlignment = new TextAlignment ( ) { Val = VerticalTextAlignmentValues . Bottom } ;
715891 break ;
716892 case "facenter" :
@@ -968,18 +1144,37 @@ private void HandleControlWord(RtfControlWord cw, ref Paragraph? currentParagrap
9681144 }
9691145 }
9701146
971- private void EnsureRun ( ref Paragraph ? currentParagraph , ref Run ? currentRun , OpenXmlElement parentElement , FormattingState runState , ParagraphProperties pPr )
1147+ private void ResetSectionProperties ( )
1148+ {
1149+ if ( defaultSectPr != null )
1150+ {
1151+ currentSectPr = ( SectionProperties ) defaultSectPr . CloneNode ( true ) ;
1152+ }
1153+ else
1154+ {
1155+ currentSectPr ??= new SectionProperties ( ) ;
1156+ currentSectPr . RemoveAllChildren ( ) ;
1157+ currentSectPr . ClearAllAttributes ( ) ;
1158+ }
1159+ }
1160+
1161+ private void EnsureParagraph ( ref Paragraph ? currentParagraph , ref Run ? currentRun , OpenXmlElement parentElement , ParagraphProperties pPr )
9721162 {
9731163 if ( currentParagraph == null )
9741164 {
9751165 currentParagraph = CreateParagraphWithProperties ( pPr ) ;
9761166 parentElement . Append ( currentParagraph ) ;
9771167 currentRun = null ;
9781168 }
1169+ }
1170+
1171+ private void EnsureRun ( ref Paragraph ? currentParagraph , ref Run ? currentRun , OpenXmlElement parentElement , FormattingState runState , ParagraphProperties pPr )
1172+ {
1173+ EnsureParagraph ( ref currentParagraph , ref currentRun , parentElement , pPr ) ;
9791174 if ( currentRun == null )
9801175 {
9811176 currentRun = CreateRunWithProperties ( runState ) ;
982- currentParagraph . Append ( currentRun ) ;
1177+ currentParagraph ! . Append ( currentRun ) ;
9831178 }
9841179 }
9851180
0 commit comments