diff -u -r trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFText2HTML.java src/main/java/org/apache/pdfbox/util/PDFText2HTML.java
--- trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFText2HTML.java 2012-01-23 11:50:35.234525938 +0100
+++ src/main/java/org/apache/pdfbox/util/PDFText2HTML.java 2012-01-23 11:39:29.578528524 +0100
@@ -64,14 +64,12 @@
protected void writeHeader() throws IOException
{
StringBuffer buf = new StringBuffer(INITIAL_PDF_TO_HTML_BYTES);
- buf.append("\n");
buf.append("
");
buf.append("" + escape(getTitle()) + "\n");
if(outputEncoding != null)
{
buf.append("\n");
+ + this.outputEncoding + "\" />\n");
}
buf.append("\n");
buf.append("\n");
@@ -232,4 +230,23 @@
}
return builder.toString();
}
+
+ protected void startFontBlock(int fontsize) throws IOException
+ {
+ super.writeString("");
+ }
+
+ protected void endFontBlock() throws IOException
+ {
+ super.writeString("");
+ }
+
+ protected void endStyle(char type) throws IOException
+ {
+ super.writeString("" + type + ">");
+ }
+
+ protected void startStyle(char type) throws IOException {
+ super.writeString("<" + type + ">");
+ }
}
diff -u -r trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
--- trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java 2012-01-23 11:50:35.234525938 +0100
+++ src/main/java/org/apache/pdfbox/util/PDFTextStripper.java 2012-01-23 11:32:02.426523898 +0100
@@ -517,6 +517,26 @@
//default is to do nothing
}
+ protected void startFontBlock(int fontsize)
+ throws IOException
+ {
+ }
+
+ protected void endFontBlock()
+ throws IOException
+ {
+ }
+
+ protected void endStyle(char type)
+ throws IOException
+ {
+ }
+
+ protected void startStyle(char type)
+ throws IOException
+ {
+ }
+
private static final float ENDOFLASTTEXTX_RESET_VALUE = -1;
private static final float MAXYFORLINE_RESET_VALUE = -Float.MAX_VALUE;
private static final float EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE = -Float.MAX_VALUE;
@@ -539,6 +559,10 @@
float endOfLastTextX = ENDOFLASTTEXTX_RESET_VALUE;
float lastWordSpacing = LASTWORDSPACING_RESET_VALUE;
float maxHeightForLine = MAXHEIGHTFORLINE_RESET_VALUE;
+ int currentFont = 12;
+ boolean activeFontBlock = false;
+ boolean isBold = false;
+ boolean isItalic = false;
PositionWrapper lastPosition = null;
PositionWrapper lastLineStartPosition = null;
@@ -734,6 +758,18 @@
if(!overlap(positionY, positionHeight, maxYForLine, maxHeightForLine))
{
writeLine(normalize(line,isRtlDominant,hasRtl),isRtlDominant);
+
+ endFontBlock();
+ if (isBold) {
+ endStyle('b');
+ isBold = false;
+ }
+ if (isItalic) {
+ endStyle('i');
+ isItalic = false;
+ }
+ activeFontBlock = false;
+
line.clear();
lastLineStartPosition =
@@ -775,6 +811,23 @@
}
line.add(position);
}
+
+ currentFont = (int)position.getFontSizeInPt();
+
+ if (!activeFontBlock) {
+ String baseFont = position.getFont().getBaseFont().toLowerCase();
+ if (baseFont.contains("bold")) {
+ isBold = true;
+ startStyle('b');
+ }
+ if (baseFont.contains("italic")) {
+ isItalic = true;
+ startStyle('i');
+ }
+ startFontBlock(currentFont);
+ activeFontBlock = true;
+ }
+
maxHeightForLine = Math.max( maxHeightForLine, positionHeight );
minYTopForLine = Math.min(minYTopForLine,positionY - positionHeight);
lastPosition = current;
@@ -792,7 +845,20 @@
// print the final line
if (line.size() > 0)
{
+ if (!activeFontBlock) {
+ startFontBlock(currentFont);
+ }
writeLine(normalize(line,isRtlDominant,hasRtl),isRtlDominant);
+
+ endFontBlock();
+ if (isBold) {
+ endStyle('b');
+ isBold = false;
+ }
+ if (isItalic) {
+ endStyle('i');
+ isItalic = false;
+ }
writeParagraphEnd();
}