Using HtmlWorker to parse HTML snippets and convert to PDF

HtmlWorker Class of iTextSharp support part of html tag and css tag, with the StyleSheet, it can support more css tag. So we can use it to parse Html snippets and convert to PDF.
from HtmlWorker source code, we know it support the following html tag,

ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike h1 h2 h3 h4 h5 h6 img

and here is example sample convert from iTextInAction.

/* chapter14/ParsingHtmlSnippets.java */
using System;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.html.simpleparser;
using System.Text;
using System.IO;

namespace chapter14
{
	
	///  This example was written by Bruno Lowagie. It is part of the book 'iText in
	/// Action' by Manning Publications. 
	/// ISBN: 1932394796
	/// http://itext.ugent.be/itext-in-action/ 
	/// http://www.manning.com/lowagie/
	/// 
	
	public class ParsingHtmlSnippets
	{		
		///  Generates a PDF file with the text 'Hello World'
		/// 
		/// 
		/// args
		/// no arguments needed here
		/// 
		[STAThread]
		public static void  Main(System.String[] args)
		{
			System.Console.Out.WriteLine("Chapter 14: HTML parse example");
			System.Console.Out.WriteLine("-> Parses an HTML file into PDF.");
			System.Console.Out.WriteLine("-> jars needed: iText.jar");
			System.Console.Out.WriteLine("-> resource needed: list.html");
			System.Console.Out.WriteLine("-> files generated in /results subdirectory:");
			System.Console.Out.WriteLine("   html3.pdf");
			
			Document document = new Document();
			try
			{
				StyleSheet styles = new StyleSheet();
				styles.LoadTagStyle("ol", "leading", "16,0");

				PdfWriter.GetInstance(document, new FileStream("html3.pdf", FileMode.Create));
				document.Open();
				System.Collections.ArrayList objects;

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				FontFactory.Register("c:\\windows\\fonts\\gara.ttf");
                                styles.LoadTagStyle("li", "face", "garamond");
                                styles.LoadTagStyle("span", "size", "8px");

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				styles.LoadStyle("sf", "color", "blue");
                                styles.LoadStyle("sf", "b", "");
                                styles.LoadStyle("classic", "color", "red");
                                styles.LoadStyle("classic", "i", "");

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
			}
			catch (System.Exception e)
			{
				System.Console.Error.WriteLine(e.StackTrace);

				System.Console.Error.WriteLine(e.Message);
			}
			document.Close();
		}
	}
}

list.htm

Using HtmlWorker to parse HTML snippets and convert to PDF

1 thought on “Using HtmlWorker to parse HTML snippets and convert to PDF”

Leave a Reply