Archive for the 'iText in Action' Category

Using HtmlWorker to parse HTML snippets and convert to PDF

HtmlWorker Class of iTextSharp support part of html tag and css tag, with the StyleSheet, it can support more css tag. So we can use it to parse Html snippets and convert to PDF.
from HtmlWorker source code, we know it support the following html tag,

ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike h1 h2 h3 h4 h5 h6 img

and here is example sample convert from iTextInAction.

/* chapter14/ParsingHtmlSnippets.java */
using System;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.html.simpleparser;
using System.Text;
using System.IO;

namespace chapter14
{

	///  This example was written by Bruno Lowagie. It is part of the book 'iText in
	/// Action' by Manning Publications.
	/// ISBN: 1932394796
	/// http://itext.ugent.be/itext-in-action/
	/// http://www.manning.com/lowagie/
	/// 

	public class ParsingHtmlSnippets
	{
		///  Generates a PDF file with the text 'Hello World'
		///
		/// 
		///
args
		/// no arguments needed here
		/// 
		[STAThread]
		public static void  Main(System.String[] args)
		{
			System.Console.Out.WriteLine("Chapter 14: HTML parse example");
			System.Console.Out.WriteLine("-> Parses an HTML file into PDF.");
			System.Console.Out.WriteLine("-> jars needed: iText.jar");
			System.Console.Out.WriteLine("-> resource needed: list.html");
			System.Console.Out.WriteLine("-> files generated in /results subdirectory:");
			System.Console.Out.WriteLine("   html3.pdf");

			Document document = new Document();
			try
			{
				StyleSheet styles = new StyleSheet();
				styles.LoadTagStyle("ol", "leading", "16,0");

				PdfWriter.GetInstance(document, new FileStream("html3.pdf", FileMode.Create));
				document.Open();
				System.Collections.ArrayList objects;

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				FontFactory.Register("c:\\windows\\fonts\\gara.ttf");
                                styles.LoadTagStyle("li", "face", "garamond");
                                styles.LoadTagStyle("span", "size", "8px");

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				styles.LoadStyle("sf", "color", "blue");
                                styles.LoadStyle("sf", "b", "");
                                styles.LoadStyle("classic", "color", "red");
                                styles.LoadStyle("classic", "i", "");

				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
			}
			catch (System.Exception e)
			{
				System.Console.Error.WriteLine(e.StackTrace);

				System.Console.Error.WriteLine(e.Message);
			}
			document.Close();
		}
	}
}

list.htm

Share and Enjoy:
  • Digg
  • del.icio.us
  • Netvouz
  • DZone
  • ThisNext
  • MisterWong
  • Wists
  • BlinkList
  • blogmarks
  • blogtercimlap
  • connotea
  • DotNetKicks
  • Fark
  • Fleck
  • Gwar
  • Haohao
  • IndianPad
  • Internetmedia
  • LinkaGoGo
  • MyShare
  • Netscape
  • NewsVine
  • Rec6
  • Reddit
  • Scoopeo
  • Slashdot
  • StumbleUpon
  • Technorati
  • Webride

iText and iTextSharp support PDF/A-1 now

From iTextSharp mail list, I got a great news, iText and iTextSharp support PDF/A-1 now.

I’ve added support for PDF/A-1 and is available in the iText SVN and the iTextSharp CVS. An example:

example for java:
Document doc = new Document(PageSize.A4);
PdfWriter writer = PdfWriter.getInstance(doc, new FileOutputStream("C:\\hello_A1-b.pdf"));
writer.setPDFXConformance(PdfWriter.PDFA1B);
doc.open();
PdfDictionary outi = new PdfDictionary(PdfName.OUTPUTINTENT);
outi.put(PdfName.OUTPUTCONDITIONIDENTIFIER, new PdfString("sRGB IEC61966-2.1"));
outi.put(PdfName.INFO, new PdfString("sRGB IEC61966-2.1"));
outi.put(PdfName.S, PdfName.GTS_PDFA1);
ICC_Profile icc = ICC_Profile.getInstance(new FileInputStream("c:\\srgb.profile"));
PdfICCBased ib = new PdfICCBased(icc);
ib.remove(PdfName.ALTERNATE);
outi.put(PdfName.DESTOUTPUTPROFILE, writer.addToBody(ib).getIndirectReference());
writer.getExtraCatalog().put(PdfName.OUTPUTINTENTS, new PdfArray(outi));

BaseFont bf = BaseFont.createFont("c:\\windows\\fonts\\arial.ttf", BaseFont.WINANSI, true);
Font f = new Font(bf, 12);
doc.add(new Paragraph("hello", f));
writer.createXmpMetadata();
doc.close();

And for C#:

Document doc = new Document(PageSize.A4);
PdfWriter writer = PdfWriter.GetInstance(doc, new FileStream("C:\\hello_A1-b_cs.pdf", FileMode.Create));
writer.PDFXConformance = PdfWriter.PDFA1B;
doc.Open();
PdfDictionary outi = new PdfDictionary(PdfName.OUTPUTINTENT);
outi.Put(PdfName.OUTPUTCONDITIONIDENTIFIER, new PdfString("sRGB IEC61966-2.1"));
outi.Put(PdfName.INFO, new PdfString("sRGB IEC61966-2.1"));
outi.Put(PdfName.S, PdfName.GTS_PDFA1);
ICC_Profile icc = ICC_Profile.GetInstance("c:\\srgb.profile");
PdfICCBased ib = new PdfICCBased(icc);
ib.Remove(PdfName.ALTERNATE);
outi.Put(PdfName.DESTOUTPUTPROFILE, writer.AddToBody(ib).IndirectReference);
writer.ExtraCatalog.Put(PdfName.OUTPUTINTENTS, new PdfArray(outi));

BaseFont bf = BaseFont.CreateFont("c:\\windows\\fonts\\arial.ttf", BaseFont.WINANSI, true);
Font f = new Font(bf, 12);
doc.Add(new Paragraph("hello", f));
writer.CreateXmpMetadata();
doc.Close();

related files:
srgb.profile (4K) Download Attachment
hello_A1-b.pdf (23K) Download Attachment

Share and Enjoy:
  • Digg
  • del.icio.us
  • Netvouz
  • DZone
  • ThisNext
  • MisterWong
  • Wists
  • BlinkList
  • blogmarks
  • blogtercimlap
  • connotea
  • DotNetKicks
  • Fark
  • Fleck
  • Gwar
  • Haohao
  • IndianPad
  • Internetmedia
  • LinkaGoGo
  • MyShare
  • Netscape
  • NewsVine
  • Rec6
  • Reddit
  • Scoopeo
  • Slashdot
  • StumbleUpon
  • Technorati
  • Webride

Freely fill PDF form with the help of iText or iTextSharp

iText(java version) and iTextSharp(dotnet version) are very powerful libraries to create and manipulate PDF, but this time, I just want to talk about how to fill PDF form with the help of iText or iTextSharp.

for java version, please visit http://itextdocs.lowagie.com/tutorial/#part5, there are many examples about fill or create PDF form.

for dotnet version(C#),
the easiest example is
PdfReader reader = new PdfReader("SimpleRegistrationForm.pdf");
PdfStamper stamp1 = new PdfStamper(reader, new FileStream("registered.pdf",FileMode.Create));
AcroFields form1 = stamp1.AcroFields;

form1.SetField("name", "Steven");
form1.SetField("address", "http://blog.rubypdf.com");
form1.SetField("postal_code", "200051");
form1.SetField("email", "rocsky@gmail.com");
stamp1.Close();
reader.Close();

and if you want to fill CJK(Chinese, Japanese and Korean) characters to PDF from, have a look the following code,

BaseFont.AddToResourceSearch("iTextAsian-1.0.dll");
BaseFont.AddToResourceSearch("iTextAsianCmaps-1.0.dll");
BaseFont font = BaseFont.CreateFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
//if you want to use truetype fonts
//BaseFont font = BaseFont.CreateFont("c:\\windows\\fonts\\STSONG.TTF", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);

PdfReader reader = new PdfReader("SimpleRegistrationForm.pdf");
PdfStamper stamp1 = new PdfStamper(reader, new FileStream("registered.pdf",FileMode.Create));

AcroFields form1 = stamp1.AcroFields;

//if the field you want to fill CJK characters, but the font it used does not support cjk, you need modify it before fill.
form1.SetFieldProperty("name","textfont",font,null);

//fill the form now
form1.SetField("name", "小李");
form1.SetField("address", "http://blog.rubypdf.com");
form1.SetField("postal_code", "200051");
form1.SetField("email", "rocsky@gmail.com");
stamp1.Close();
reader.Close();

related resources from my Chinese Blog and my free asp.net hosting,

Share and Enjoy:
  • Digg
  • del.icio.us
  • Netvouz
  • DZone
  • ThisNext
  • MisterWong
  • Wists
  • BlinkList
  • blogmarks
  • blogtercimlap
  • connotea
  • DotNetKicks
  • Fark
  • Fleck
  • Gwar
  • Haohao
  • IndianPad
  • Internetmedia
  • LinkaGoGo
  • MyShare
  • Netscape
  • NewsVine
  • Rec6
  • Reddit
  • Scoopeo
  • Slashdot
  • StumbleUpon
  • Technorati
  • Webride