» Archive for October, 2007

Word to PDF Converter in PHP using OpenOffice (Word2PDF)

Wednesday, October 10th, 2007 by rubypdf

Lots of people want to convert Word docs into PDF’s (Word2PDF). Here’s a Windows PHP converter of Word documents to PDF using OpenOffice via a COM wrapper of the OpenOffice API’s. It’s a lot of description for something fairly simple.

<?php
set_time_limit(0);
function MakePropertyValue($name,$value,$osm){
$oStruct = $osm->Bridge_GetStruct("com.sun.star.beans.PropertyValue");
$oStruct->Name = $name;
$oStruct->Value = $value;
return $oStruct;
}
function word2pdf($doc_url, $output_url){
//Invoke the OpenOffice.org service manager
$osm = new COM("com.sun.star.ServiceManager") or die ("Please be sure that OpenOffice.org is installed.\n");
//Set the application to remain hidden to avoid flashing the document onscreen
$args = array(MakePropertyValue("Hidden",true,$osm));
//Launch the desktop
$oDesktop = $osm->createInstance("com.sun.star.frame.Desktop");
//Load the .doc file, and pass in the "Hidden" property from above
$oWriterDoc = $oDesktop->loadComponentFromURL($doc_url,"_blank", 0, $args);
//Set up the arguments for the PDF output
$export_args = array(MakePropertyValue("FilterName","writer_pdf_Export",$osm));
//Write out the PDF
$oWriterDoc->storeToURL($output_url,$export_args);
$oWriterDoc->close(true);
}
$output_dir = "C:/dev/openofficeintegration/docconverter/";
$doc_file = "C:/dev/openofficeintegration/docconverter/DpmR5Reqv1.20.doc";
$pdf_file = "DpmR5Reqv1.20.pdf";
$output_file = $output_dir . $pdf_file;
$doc_file = "file:///" . $doc_file;
$output_file = "file:///" . $output_file;
word2pdf($doc_file,$output_file);
?>

from http://www.wynia.org/wordpress/word-to-pdf-converter-in-php-word2pdf/

Using HtmlWorker to parse HTML snippets and convert to PDF

Wednesday, October 10th, 2007 by rubypdf

HtmlWorker Class of iTextSharp support part of html tag and css tag, with the StyleSheet, it can support more css tag. So we can use it to parse Html snippets and convert to PDF.
from HtmlWorker source code, we know it support the following html tag,

ol ul li a pre font span br p div body table td th tr i b u sub sup em strong s strike h1 h2 h3 h4 h5 h6 img

and here is example sample convert from iTextInAction.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/* chapter14/ParsingHtmlSnippets.java */
using System;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.html.simpleparser;
using System.Text;
using System.IO;
 
namespace chapter14
{
 
	/// <summary> This example was written by Bruno Lowagie. It is part of the book 'iText in
	/// Action' by Manning Publications. 
	/// ISBN: 1932394796
	/// http://itext.ugent.be/itext-in-action/ 
	/// http://www.manning.com/lowagie/
	/// </summary>
 
	public class ParsingHtmlSnippets
	{		
		/// <summary> Generates a PDF file with the text 'Hello World'
		/// 
		/// </summary>
		/// <param name="">args
		/// no arguments needed here
		/// </param>
		[STAThread]
		public static void  Main(System.String[] args)
		{
			System.Console.Out.WriteLine("Chapter 14: HTML parse example");
			System.Console.Out.WriteLine("-> Parses an HTML file into PDF.");
			System.Console.Out.WriteLine("-> jars needed: iText.jar");
			System.Console.Out.WriteLine("-> resource needed: list.html");
			System.Console.Out.WriteLine("-> files generated in /results subdirectory:");
			System.Console.Out.WriteLine("   html3.pdf");
 
			Document document = new Document();
			try
			{
				StyleSheet styles = new StyleSheet();
				styles.LoadTagStyle("ol", "leading", "16,0");
 
				PdfWriter.GetInstance(document, new FileStream("html3.pdf", FileMode.Create));
				document.Open();
				System.Collections.ArrayList objects;
 
				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				FontFactory.Register("c:\\windows\\fonts\\gara.ttf");
                                styles.LoadTagStyle("li", "face", "garamond");
                                styles.LoadTagStyle("span", "size", "8px");
 
				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
				styles.LoadStyle("sf", "color", "blue");
                                styles.LoadStyle("sf", "b", "");
                                styles.LoadStyle("classic", "color", "red");
                                styles.LoadStyle("classic", "i", "");
 
				objects = HTMLWorker.ParseToList(new StreamReader("../resources/list.htm", Encoding.Default), styles);
				for (int k = 0; k < objects.Count; ++k)
					document.Add((IElement) objects[k]);
			}
			catch (System.Exception e)
			{
				System.Console.Error.WriteLine(e.StackTrace);
 
				System.Console.Error.WriteLine(e.Message);
			}
			document.Close();
		}
	}
}

list.htm

iText 2.0.6 supports JPEG2000 and base64 encoded images in XFA forms

Monday, October 8th, 2007 by rubypdf

iText 2.0.6 was released on 2007-10-05 09:51, and announced it supports JPEG2000 (both jp2 and j2k) and base64 encoded images in XFA forms.