/* * AppleCommander - An Apple ][ image utility. * Copyright (C) 2002-2022 by Robert Greene * robgreene at users.sourceforge.net * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.webcodepro.applecommander.storage.filters; import java.io.ByteArrayOutputStream; import java.io.PrintWriter; import java.util.StringTokenizer; import com.webcodepro.applecommander.storage.FileEntry; import com.webcodepro.applecommander.storage.FileFilter; import com.webcodepro.applecommander.ui.AppleCommander; /** * Extract the contents of an ancient word processor file (might be Word * Perfect) and convert to a text format. Currently supported formats are plain * text, HTML, or RTF. These are not exact duplicates, but they are close * approximations. RTF format is suitable for conversion to other word * processors. *
* To choose export format, use the appropriately named select method. *
* Date created: Dec 18, 2008 9:09:21 AM
*
* @author David Schmidt
*/
public class GutenbergFileFilter implements FileFilter {
/*
* This list identifies the various rendering options. As the internal
* format may change in the future, the internal representation is hidden
* and the developer should use the appropriate select method.
*/
private static final int RENDER_AS_TEXT = 0;
private static final int RENDER_AS_HTML = 1;
private static final int RENDER_AS_RTF = 2;
private int rendering = RENDER_AS_RTF;
/**
* Constructor for GutenbergFileFilter.
*/
public GutenbergFileFilter() {
super();
}
/**
* Process the given FileEntry and return a byte array with filtered data.
*
* @see com.webcodepro.applecommander.storage.FileFilter#filter(FileEntry)
*/
public byte[] filter(FileEntry fileEntry) {
byte[] fileData = fileEntry.getFileData();
int offset = 0;
ByteArrayOutputStream byteArray = new ByteArrayOutputStream(
fileData.length);
PrintWriter printWriter = new PrintWriter(byteArray, true);
while (offset < fileData.length) {
fileData[offset] = (byte) (fileData[offset++] & 0x7f);
}
String preprocess = new String(fileData).trim();
handleTranslation(preprocess, printWriter, rendering);
printWriter.flush();
return byteArray.toByteArray();
}
/**
* Transform text into desired destination format
*/
protected void handleTranslation(String raw, PrintWriter output, int rendering) {
boolean ignoreBr = false;
boolean inHeader = false;
boolean inItalics = false;
boolean inBold = false;
boolean inCenter = false;
boolean inUnderline = false;
boolean inSuperscript = false;
String cooked = raw.replaceAll("\\x00", ""); //$NON-NLS-1$ $NON-NLS-2$ Remove nulls
cooked=cooked.replaceAll("<[a|A]1>", ""); //$NON-NLS-1$ $NON-NLS-2$ File start
cooked=cooked.replaceAll(" "); //$NON-NLS-1$ $NON-NLS-2$ Tab level 1
cooked=cooked.replaceAll("<[t|T]2>(.*)", " "); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\par \\par \\li0 "); //$NON-NLS-1$
break;
default:
break;
}
ignoreBr = true;
}
else if (t.equalsIgnoreCase("UL") && (!inUnderline)) //$NON-NLS-1$
{
switch (rendering)
{
// Underline on
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\ul "); //$NON-NLS-1$
break;
default:
break;
}
inUnderline = true;
}
else if ((t.equalsIgnoreCase("KU") || t.equalsIgnoreCase("KL") || t.equalsIgnoreCase("UK")) && (inUnderline)) //$NON-NLS-1$ $NON-NLS-2$ $NON-NLS-3$
{
// Underline off
switch (rendering)
{
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\ulnone "); //$NON-NLS-1$
break;
default:
break;
}
inUnderline = false;
}
else if ((t.equalsIgnoreCase("BO") || t.equalsIgnoreCase("b1")) && (!inBold)) //$NON-NLS-1$ $NON-NLS-2$
{
// Bold on
switch (rendering)
{
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\b "); //$NON-NLS-1$
break;
default:
break;
}
inBold = true;
}
else if (t.equalsIgnoreCase("KB")) //$NON-NLS-1$
{
// Bold off
switch (rendering)
{
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\b0 "); //$NON-NLS-1$
break;
default:
break;
}
inUnderline = false;
}
else if ((t.equalsIgnoreCase("UFA") || t.equalsIgnoreCase("UFP") || t.equals("UFY") || t.equalsIgnoreCase("f1")) && (inSuperscript == false)) //$NON-NLS-1$ $NON-NLS-2$ $NON-NLS-3$ $NON-NLS-4$
{
// Superscript on
switch (rendering)
{
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\super "); //$NON-NLS-1$
break;
default:
break;
}
inSuperscript = true;
}
else if ((t.equalsIgnoreCase("UFM") || t.equalsIgnoreCase("f2")) && (inSuperscript == true)) //$NON-NLS-1$ $NON-NLS-2$
{
// Superscript off
switch (rendering)
{
case RENDER_AS_HTML:
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\nosupersub "); //$NON-NLS-1$
break;
default:
break;
}
inSuperscript = false;
}
else if (t.equalsIgnoreCase("co") && (inCenter == false)) //$NON-NLS-1$
{
switch (rendering)
{
case RENDER_AS_HTML:
output.print("$1
"); //$NON-NLS-1$ $NON-NLS-2$ Bound a h1 heading
cooked=cooked.replaceAll("<[h|H]2>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Bound a h2 heading
cooked=cooked.replaceAll("<[h|H]3>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Bound a h3 heading
cooked=cooked.replaceAll("<[h|H]4>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Bound a h4 heading
cooked=cooked.replaceAll("<[n|N]1>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Another kind of heading? Give it boundaries
cooked=cooked.replaceAll("<[n|N]2>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Another kind of heading? Give it boundaries
cooked=cooked.replaceAll("<[n|N]3>(.*)", "$1
"); //$NON-NLS-1$ $NON-NLS-2$ Another kind of heading? Give it boundaries
cooked=cooked.replaceAll("<[t|T]1>", ""); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h2")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
else if (t.equalsIgnoreCase("/h2")) //$NON-NLS-1$
output.print(""); //$NON-NLS-1$
else if (t.equalsIgnoreCase("/h3")) //$NON-NLS-1$
output.print(""); //$NON-NLS-1$
else if (t.equalsIgnoreCase("/h4")) //$NON-NLS-1$
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.print("\\b0\\par\\fs24 "); //$NON-NLS-1$
break;
default:
output.println();
break;
}
}
else if (t.startsWith("bq")) // Indent $NON-NLS-1$
{
switch (rendering)
{
case RENDER_AS_HTML:
if (t.equals("bq1")) //$NON-NLS-1$
output.print(""); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h3")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h4")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
if (t.equalsIgnoreCase("h1")) //$NON-NLS-1$
output.print("\\par\\par\\pard\\s1\\b\\fs48 "); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h2")) //$NON-NLS-1$
output.print("\\par\\par\\pard\\s2\\b\\fs36 "); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h3")) //$NON-NLS-1$
output.print("\\par\\par\\pard\\s3\\b\\fs27 "); //$NON-NLS-1$
else if (t.equalsIgnoreCase("h4")) //$NON-NLS-1$
output.print("\\par\\par\\pard\\s4\\b\\fs24 "); //$NON-NLS-1$
break;
default:
output.println();
break;
}
}
else if ((t.startsWith("/h")) && (inHeader))
{
ignoreBr = true;
inHeader = false;
switch (rendering)
{
case RENDER_AS_HTML:
if (t.equalsIgnoreCase("/h1")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
else if (t.equals("bq2")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
//output.println("\\par\\li0 "); //$NON-NLS-1$
break;
default:
break;
}
ignoreBr = true;
}
// else
// System.err.println("Ignored command: <"+t+">");
}
else
{
// System.out.println("Data: ["+t+"]");
output.print(t);
}
}
if (!ignoreBr)
handleReturn(output);
ignoreBr = false;
switch (rendering)
{
// turn off many types of formatting stuff at the end of lines
case RENDER_AS_HTML:
if (inItalics)
output.print(""); //$NON-NLS-1$
if (inBold)
output.print(""); //$NON-NLS-1$
if (inUnderline)
output.print(""); //$NON-NLS-1$
if (inSuperscript)
output.print(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
if (inItalics)
output.print("\\i0 "); //$NON-NLS-1$
if (inBold)
output.print("\\b0 "); //$NON-NLS-1$
if (inUnderline)
output.print("\\ulnone "); //$NON-NLS-1$
if (inSuperscript)
output.print("\\nosupersub "); //$NON-NLS-1$
break;
default:
break;
}
inItalics = false;
inBold = false;
inUnderline = false;
inSuperscript = false;
inHeader = false;
}
// Put the finishing touches on the document
switch (rendering)
{
case RENDER_AS_HTML:
output.println(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
output.println("}"); //$NON-NLS-1$
break;
default:
break;
}
return;
}
/**
* Deal with carriage-return.
*/
protected void handleReturn(PrintWriter printWriter) {
if (isHtmlRendering())
printWriter.println(""); //$NON-NLS-1$
break;
case RENDER_AS_RTF:
if (t.equals("bq1")) //$NON-NLS-1$
output.print("\\pard\\li720 "); //$NON-NLS-1$
else if (t.equals("bq2")) //$NON-NLS-1$
output.print("\\pard\\li1440 "); //$NON-NLS-1$
break;
default:
output.print(" "); //$NON-NLS-1$
break;
}
ignoreBr = true;
}
else if (t.startsWith("/bq")) // Outdent $NON-NLS-1$
{
switch (rendering)
{
case RENDER_AS_HTML:
if (t.equals("/bq1")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
else if (t.equals("/bq2")) //$NON-NLS-1$
output.print("
"); //$NON-NLS-1$
else if (isRtfRendering())
printWriter.println("\\par"); //$NON-NLS-1$
else
printWriter.println();
}
/**
* Give suggested file name.
*
* @see com.webcodepro.applecommander.storage.FileFilter#getSuggestedFileName(FileEntry)
*/
public String getSuggestedFileName(FileEntry fileEntry) {
String fileName = fileEntry.getFilename().trim();
String extension = ".txt"; //$NON-NLS-1$
if (isHtmlRendering())
extension = ".html"; //$NON-NLS-1$
else if (isRtfRendering())
extension = ".rtf"; //$NON-NLS-1$
if (!fileName.toLowerCase().endsWith(extension)) {
fileName = fileName + extension;
}
return fileName;
}
/**
* Set the rendering method.
*/
protected void setRendering(int rendering) {
this.rendering = rendering;
}
/**
* Indicates if this is a text rendering.
*/
public boolean isTextRendering() {
return rendering == RENDER_AS_TEXT;
}
/**
* Indicates if this is an HTML rendering.
*/
public boolean isHtmlRendering() {
return rendering == RENDER_AS_HTML;
}
/**
* Indicates if this is an RTF rendering.
*/
public boolean isRtfRendering() {
return rendering == RENDER_AS_RTF;
}
/**
* Selects the text rendering engine.
*/
public void selectTextRendering() {
rendering = RENDER_AS_TEXT;
}
/**
* Selects the HTML rendering engine.
*/
public void selectHtmlRendering() {
rendering = RENDER_AS_HTML;
}
/**
* Selects the RTF rendering engine.
*/
public void selectRtfRendering() {
rendering = RENDER_AS_RTF;
}
}