This represents a string and a position on the screen of those characters.
Method from org.apache.pdfbox.util.TextPosition Detail: |
public boolean contains(TextPosition tp2) {
double thisXstart = getXDirAdj();
double thisXend = getXDirAdj() + getWidthDirAdj();
double tp2Xstart = tp2.getXDirAdj();
double tp2Xend = tp2.getXDirAdj() + tp2.getWidthDirAdj();
/*
* No X overlap at all so return as soon as possible.
*/
if(tp2Xend < = thisXstart || tp2Xstart >= thisXend)
{
return false;
}
/*
* No Y overlap at all so return as soon as possible.
* Note: 0.0 is in the upper left and y-coordinate is
* top of TextPosition
*/
if((tp2.getYDirAdj() + tp2.getHeightDir() < getYDirAdj()) ||
(tp2.getYDirAdj() > getYDirAdj() + getHeightDir()))
{
return false;
}
/* We're going to calculate the percentage of overlap. If its less
* than a 15% x-coordinate overlap then we'll return false because its negligible.
* .15 was determined by trial and error in the regression test files.
*/
else if((tp2Xstart > thisXstart) && (tp2Xend > thisXend))
{
double overlap = thisXend - tp2Xstart;
double overlapPercent = overlap/getWidthDirAdj();
return (overlapPercent > .15);
}
else if((tp2Xstart < thisXstart) && (tp2Xend < thisXend))
{
double overlap = tp2Xend - thisXstart;
double overlapPercent = overlap/getWidthDirAdj();
return (overlapPercent > .15);
}
return true;
}
Determine if this TextPosition logically contains
another (i.e. they overlap and should be rendered on top
of each other). |
public String getCharacter() {
return str;
}
Return the string of characters stored in this object. |
public float getDir() {
float a = textPos.getValue(0,0);
float b = textPos.getValue(0,1);
float c = textPos.getValue(1,0);
float d = textPos.getValue(1,1);
// 12 0 left to right
// 0 12
if ((a > 0) && (Math.abs(b) < d) && (Math.abs(c) < a) && (d > 0))
{
return 0;
}
// -12 0 right to left (upside down)
// 0 -12
else if ((a < 0) && (Math.abs(b) < Math.abs(d)) && (Math.abs(c) < Math.abs(a)) && (d < 0))
{
return 180;
}
// 0 12 up
// -12 0
else if ((Math.abs(a) < Math.abs(c)) && (b > 0) && (c < 0) && (Math.abs(d) < b))
{
return 90;
}
// 0 -12 down
// 12 0
else if ((Math.abs(a) < c) && (b < 0) && (c > 0) && (Math.abs(d) < Math.abs(b)))
{
return 270;
}
return 0;
}
Return the direction/orientation of the string in this object
based on its text matrix. |
public PDFont getFont() {
return font;
}
This will get the font for the text being drawn. |
public float getFontSize() {
return fontSize;
}
This will get the font size that this object is
suppose to be drawn at. |
public float getFontSizeInPt() {
return fontSizePt;
}
This will get the font size in pt.
To get this size we have to multiply the pdf-fontsize and the scaling from the textmatrix |
public float getHeight() {
return maxTextHeight;
}
This will get the maximum height of all characters in this string. |
public float getHeightDir() {
// this is not really a rotation-dependent calculation, but this is defined for symmetry.
return maxTextHeight;
}
This will get the maximum height of all characters in this string. |
public float[] getIndividualWidths() {
return widths;
}
Get the widths of each individual character. |
public Matrix getTextPos() {
return textPos;
}
Return the Matrix textPos stored in this object. |
public float getWidth() {
return getWidthRot(rot);
}
This will get the width of the string when page rotation adjusted coordinates are used. |
public float getWidthDirAdj() {
return getWidthRot(getDir());
}
This will get the width of the string when text direction adjusted coordinates are used. |
public float getWidthOfSpace() {
return widthOfSpace;
}
This will get the width of a space character. This is useful for some
algorithms such as the text stripper, that need to know the width of a
space character. |
public float getWordSpacing() {
return wordSpacing;
}
This will get the current word spacing. |
public float getX() {
if(x==Float.NEGATIVE_INFINITY){
x = getXRot(rot);
}
return x;
}
This will get the page rotation adjusted x position of the character.
This is adjusted based on page rotation so that the upper
left is 0,0. |
public float getXDirAdj() {
return getXRot(getDir());
}
This will get the text direction adjusted x position of the character.
This is adjusted based on text direction so that the first character
in that direction is in the upper left at 0,0. |
public float getXScale() {
return textPos.getXScale();
}
|
public float getY() {
if(y==Float.NEGATIVE_INFINITY){
if ((rot == 0) || (rot == 180))
{
y = pageHeight - getYLowerLeftRot(rot);
}
else
{
y = pageWidth - getYLowerLeftRot(rot);
}
}
return y;
}
This will get the y position of the text, adjusted so that 0,0 is upper left and
it is adjusted based on the page rotation. |
public float getYDirAdj() {
float dir = getDir();
// some PDFBox code assumes that the 0,0 point is in upper left, not lower left
if ((dir == 0) || (dir == 180))
{
return pageHeight - getYLowerLeftRot(dir);
}
else
{
return pageWidth - getYLowerLeftRot(dir);
}
}
This will get the y position of the text, adjusted so that 0,0 is upper left and
it is adjusted based on the text direction. |
public float getYScale() {
return textPos.getYScale();
}
|
public boolean isDiacritic() {
String cText = this.getCharacter();
return (cText.length() == 1 && (Character.getType(cText.charAt(0)) == Character.NON_SPACING_MARK
|| Character.getType(cText.charAt(0)) == Character.MODIFIER_SYMBOL
|| Character.getType(cText.charAt(0)) == Character.MODIFIER_LETTER));
}
|
public void mergeDiacritic(TextPosition diacritic,
TextNormalize normalize) {
if (diacritic.getCharacter().length() > 1)
{
return;
}
float diacXStart = diacritic.getXDirAdj();
float diacXEnd = diacXStart + diacritic.widths[0];
float currCharXStart = getXDirAdj();
int strLen = str.length();
boolean wasAdded = false;
for (int i = 0; i < strLen && !wasAdded; i++)
{
float currCharXEnd = currCharXStart + widths[i];
/*
* This is the case where there is an overlap of the diacritic character with
* the current character and the previous character. If no previous character,
* just append the diacritic after the current one.
*/
if(diacXStart < currCharXStart && diacXEnd < = currCharXEnd)
{
if(i == 0)
{
insertDiacritic(i, diacritic, normalize);
}
else
{
float distanceOverlapping1 = diacXEnd - currCharXStart;
float percentage1 = distanceOverlapping1/widths[i];
float distanceOverlapping2 = currCharXStart - diacXStart;
float percentage2 = distanceOverlapping2/widths[i-1];
if(percentage1 >= percentage2)
{
insertDiacritic(i, diacritic, normalize);
}
else
{
insertDiacritic(i-1, diacritic, normalize);
}
}
wasAdded = true;
}
//diacritic completely covers this character and therefore we assume that
//this is the character the diacritic belongs to
else if(diacXStart < currCharXStart && diacXEnd > currCharXEnd)
{
insertDiacritic(i, diacritic, normalize);
wasAdded = true;
}
//Otherwise, The diacritic modifies this character because its completely
//contained by the character width
else if(diacXStart >= currCharXStart && diacXEnd < = currCharXEnd)
{
insertDiacritic(i, diacritic, normalize);
wasAdded = true;
}
/*
* Last character in the TextPosition so we add diacritic to the end
*/
else if(diacXStart >= currCharXStart && diacXEnd > currCharXEnd && i == (strLen - 1))
{
insertDiacritic(i, diacritic, normalize);
wasAdded = true;
}
/*
* Couldn't find anything useful so we go to the next character in the
* TextPosition
*/
currCharXStart += widths[i];
}
}
Merge a single character TextPosition into the current object.
This is to be used only for cases where we have a diacritic that
overlaps an existing TextPosition. In a graphical display, we could
overlay them, but for text extraction we need to merge them. Use the
contains() method to test if two objects overlap. |
public String toString() {
return getCharacter();
}
Show the string data for this text position. |