public static void main (String [] args) throws IOException, PjException
{
/*PjWrapper testWrapper = new PjWrapper(args[0]);
LinkedList textList = testWrapper.getAllText();*/
}
/**
* Returns as much text as we can extract from the PDF.
* This currently includes:
*
* NOTE: Pj does not support LZW, so some text in some PDF's may not
* be indexable
*/
public LinkedList getAllText() throws PjException {
LinkedList stringList = new LinkedList();
Iterator streamIter = getAllContentsStreams().iterator();
PjStream stream;
String streamData;
String streamText;
boolean moreData;
int textStart, textEnd;
//System.out.println("Going through streams...");
while(streamIter.hasNext()) {
//System.out.println("Getting next stream");
stream = (PjStream) streamIter.next();
//System.out.println("Adding text from stream with filter: "
+getFilterString(stream);
stream = stream.flateDecompress();
//System.out.println("Adding text from stream with filter
afterdecompress: " + getFilterString(stream));
streamData = new String(stream.getBuffer());
/**
* Performs a post-order traversal of the pages tree
* from the root node and gets all of the contents streams
* @returns a list of all the contents of all the pages
*/
public LinkedList getAllContentsStreams() throws
InvalidPdfObjectException {
return getContentsStreams(getAllPages());
}
/**
* Get contents streams from the list of PjPage objects
* @returns a list of all the contents of the pages
*/
public LinkedList getContentsStreams(LinkedList pages) throws
InvalidPdfObjectException {
public class GetPDFInfo {
public static void main (String args[]) {
try {
Pdf pdf = new Pdf(args[0]);
System.out.println("# of pages is " + pdf.getPageCount());
int y = pdf.getMaxObjectNumber();
for (int x=1; x <= y; x++) {
PjObject obj = pdf.getObject(x);
if (obj instanceof PjInfo) {
System.out.println("Author: " + ((PjInfo)
obj).getAuthor());
System.out.println("Creator: " + ((PjInfo)
obj).getCreator());
System.out.println("Subject: " + ((PjInfo)
obj).getSubject());
System.out.println("Keywords: " + ((PjInfo)
obj).getKeywords());