/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <vcl/fontcharmap.hxx>
#include <fontinstance.hxx>
#include <impfontcharmap.hxx>
#include <sal/log.hxx>
#include <vector>
#include <set>
CmapResult::CmapResult( bool bSymbolic,
const sal_UCS4* pRangeCodes, int nRangeCount )
: mpRangeCodes( pRangeCodes)
, mpStartGlyphs( nullptr)
, mpGlyphIds( nullptr)
, mnRangeCount( nRangeCount)
, mbSymbolic( bSymbolic)
, mbRecoded( false)
{}
static ImplFontCharMapRef g_pDefaultImplFontCharMap;
static const sal_UCS4 aDefaultUnicodeRanges[] = {0x0020,0xD800, 0xE000,0xFFF0};
static const sal_UCS4 aDefaultSymbolRanges[] = {0x0020,0x0100, 0xF020,0xF100};
ImplFontCharMap::~ImplFontCharMap()
{
if( !isDefaultMap() )
{
delete[] mpRangeCodes;
delete[] mpStartGlyphs;
delete[] mpGlyphIds;
}
}
ImplFontCharMap::ImplFontCharMap( const CmapResult& rCR )
: mpRangeCodes( rCR.mpRangeCodes )
, mpStartGlyphs( rCR.mpStartGlyphs )
, mpGlyphIds( rCR.mpGlyphIds )
, mnRangeCount( rCR.mnRangeCount )
, mnCharCount( 0 )
{
const sal_UCS4* pRangePtr = mpRangeCodes;
for( int i = mnRangeCount; --i >= 0; pRangePtr += 2 )
{
sal_UCS4 cFirst = pRangePtr[0];
sal_UCS4 cLast = pRangePtr[1];
mnCharCount += cLast - cFirst;
}
}
ImplFontCharMapRef const & ImplFontCharMap::getDefaultMap( bool bSymbols )
{
const sal_UCS4* pRangeCodes = aDefaultUnicodeRanges;
int nCodesCount = SAL_N_ELEMENTS(aDefaultUnicodeRanges);
if( bSymbols )
{
pRangeCodes = aDefaultSymbolRanges;
nCodesCount = SAL_N_ELEMENTS(aDefaultSymbolRanges);
}
CmapResult aDefaultCR( bSymbols, pRangeCodes, nCodesCount/2 );
g_pDefaultImplFontCharMap = ImplFontCharMapRef(new ImplFontCharMap(aDefaultCR));
return g_pDefaultImplFontCharMap;
}
bool ImplFontCharMap::isDefaultMap() const
{
const bool bIsDefault = (mpRangeCodes == aDefaultUnicodeRanges) || (mpRangeCodes == aDefaultSymbolRanges);
return bIsDefault;
}
static unsigned GetUInt( const unsigned char* p ) { return((p[0]<<24)+(p[1]<<16)+(p[2]<<8)+p[3]);}
static unsigned Getsal_uInt16( const unsigned char* p ){ return((p[0]<<8) | p[1]);}
static int GetSShort( const unsigned char* p ){ return static_cast<sal_Int16>((p[0]<<8)|p[1]);}
// TODO: move CMAP parsing directly into the ImplFontCharMap class
bool ParseCMAP( const unsigned char* pCmap, int nLength, CmapResult& rResult )
{
rResult.mpRangeCodes = nullptr;
rResult.mpStartGlyphs= nullptr;
rResult.mpGlyphIds = nullptr;
rResult.mnRangeCount = 0;
rResult.mbRecoded = false;
rResult.mbSymbolic = false;
// parse the table header and check for validity
if( !pCmap || (nLength < 24) )
return false;
if( Getsal_uInt16( pCmap ) != 0x0000 ) // simple check for CMAP corruption
return false;
int nSubTables = Getsal_uInt16( pCmap + 2 );
if( (nSubTables <= 0) || (nLength < (24 + 8*nSubTables)) )
return false;
const unsigned char* pEndValidArea = pCmap + nLength;
// find the most interesting subtable in the CMAP
rtl_TextEncoding eRecodeFrom = RTL_TEXTENCODING_UNICODE;
int nOffset = 0;
int nFormat = -1;
int nBestVal = 0;
for( const unsigned char* p = pCmap + 4; --nSubTables >= 0; p += 8 )
{
int nPlatform = Getsal_uInt16( p );
int nEncoding = Getsal_uInt16( p+2 );
int nPlatformEncoding = (nPlatform << 8) + nEncoding;
int nValue;
rtl_TextEncoding eTmpEncoding = RTL_TEXTENCODING_UNICODE;
switch( nPlatformEncoding )
{
case 0x000: nValue = 20; break; // Unicode 1.0
case 0x001: nValue = 21; break; // Unicode 1.1
case 0x002: nValue = 22; break; // iso10646_1993
case 0x003: nValue = 23; break; // UCS-2
case 0x004: nValue = 24; break; // UCS-4
case 0x100: nValue = 22; break; // Mac Unicode<2.0
case 0x103: nValue = 23; break; // Mac Unicode>2.0
case 0x300: nValue = 5; rResult.mbSymbolic = true; break; // Win Symbol
case 0x301: nValue = 28; break; // Win UCS-2
case 0x30A: nValue = 29; break; // Win-UCS-4
case 0x302: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_SHIFT_JIS; break;
case 0x303: nValue = 12; eTmpEncoding = RTL_TEXTENCODING_GB_18030; break;
case 0x304: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_BIG5; break;
case 0x305: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_949; break;
case 0x306: nValue = 11; eTmpEncoding = RTL_TEXTENCODING_MS_1361; break;
default: nValue = 0; break;
}
if( nValue <= 0 ) // ignore unknown encodings
continue;
int nTmpOffset = GetUInt( p+4 );
int nTmpFormat = Getsal_uInt16( pCmap + nTmpOffset );
if( nTmpFormat == 12 ) // 32bit code -> glyph map format
nValue += 3;
else if( nTmpFormat != 4 ) // 16bit code -> glyph map format
continue; // ignore other formats
if( nBestVal < nValue )
{
nBestVal = nValue;
nOffset = nTmpOffset;
nFormat = nTmpFormat;
eRecodeFrom = eTmpEncoding;
}
}
// parse the best CMAP subtable
int nRangeCount = 0;
sal_UCS4* pCodePairs = nullptr;
int* pStartGlyphs = nullptr;
std::vector<sal_uInt16> aGlyphIdArray;
aGlyphIdArray.reserve( 0x1000 );
aGlyphIdArray.push_back( 0 );
// format 4, the most common 16bit char mapping table
if( (nFormat == 4) && ((nOffset+16) < nLength) )
{
int nSegCountX2 = Getsal_uInt16( pCmap + nOffset + 6 );
nRangeCount = nSegCountX2/2 - 1;
pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
pStartGlyphs = new int[ nRangeCount ];
const unsigned char* pLimitBase = pCmap + nOffset + 14;
const unsigned char* pBeginBase = pLimitBase + nSegCountX2 + 2;
const unsigned char* pDeltaBase = pBeginBase + nSegCountX2;
const unsigned char* pOffsetBase = pDeltaBase + nSegCountX2;
sal_UCS4* pCP = pCodePairs;
for( int i = 0; i < nRangeCount; ++i )
{
const sal_UCS4 cMinChar = Getsal_uInt16( pBeginBase + 2*i );
const sal_UCS4 cMaxChar = Getsal_uInt16( pLimitBase + 2*i );
const int nGlyphDelta = GetSShort( pDeltaBase + 2*i );
const int nRangeOffset = Getsal_uInt16( pOffsetBase + 2*i );
if( cMinChar > cMaxChar ) { // no sane font should trigger this
SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
break;
}
if( cMaxChar == 0xFFFF ) {
SAL_WARN("vcl.gdi", "Format 4 char should not be 0xFFFF");
break;
}
if( !nRangeOffset ) {
// glyphid can be calculated directly
pStartGlyphs[i] = (cMinChar + nGlyphDelta) & 0xFFFF;
} else {
// update the glyphid-array with the glyphs in this range
pStartGlyphs[i] = -static_cast<int>(aGlyphIdArray.size());
const unsigned char* pGlyphIdPtr = pOffsetBase + 2*i + nRangeOffset;
const size_t nRemainingSize = pEndValidArea - pGlyphIdPtr;
const size_t nMaxPossibleRecords = nRemainingSize/2;
if (nMaxPossibleRecords == 0) { // no sane font should trigger this
SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
break;
}
const size_t nMaxLegalChar = cMinChar + nMaxPossibleRecords-1;
if (cMaxChar > nMaxLegalChar) { // no sane font should trigger this
SAL_WARN("vcl.gdi", "More indexes claimed that space available in font!");
break;
}
for( sal_UCS4 c = cMinChar; c <= cMaxChar; ++c, pGlyphIdPtr+=2 ) {
const int nGlyphIndex = Getsal_uInt16( pGlyphIdPtr ) + nGlyphDelta;
aGlyphIdArray.push_back( static_cast<sal_uInt16>(nGlyphIndex) );
}
}
*(pCP++) = cMinChar;
*(pCP++) = cMaxChar + 1;
}
nRangeCount = (pCP - pCodePairs) / 2;
}
// format 12, the most common 32bit char mapping table
else if( (nFormat == 12) && ((nOffset+16) < nLength) )
{
nRangeCount = GetUInt( pCmap + nOffset + 12 );
pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
pStartGlyphs = new int[ nRangeCount ];
const unsigned char* pGroup = pCmap + nOffset + 16;
sal_UCS4* pCP = pCodePairs;
for( int i = 0; i < nRangeCount; ++i )
{
sal_UCS4 cMinChar = GetUInt( pGroup + 0 );
sal_UCS4 cMaxChar = GetUInt( pGroup + 4 );
int nGlyphId = GetUInt( pGroup + 8 );
pGroup += 12;
if( cMinChar > cMaxChar ) { // no sane font should trigger this
SAL_WARN("vcl.gdi", "Min char should never be more than the max char!");
break;
}
*(pCP++) = cMinChar;
*(pCP++) = cMaxChar + 1;
pStartGlyphs[i] = nGlyphId;
}
nRangeCount = (pCP - pCodePairs) / 2;
}
// check if any subtable resulted in something usable
if( nRangeCount <= 0 )
{
delete[] pCodePairs;
delete[] pStartGlyphs;
// even when no CMAP is available we know it for symbol fonts
if( rResult.mbSymbolic )
{
pCodePairs = new sal_UCS4[4];
pCodePairs[0] = 0x0020; // aliased symbols
pCodePairs[1] = 0x0100;
pCodePairs[2] = 0xF020; // original symbols
pCodePairs[3] = 0xF100;
rResult.mpRangeCodes = pCodePairs;
rResult.mnRangeCount = 2;
return true;
}
return false;
}
// recode the code ranges to their unicode encoded ranges if needed
rtl_TextToUnicodeConverter aConverter = nullptr;
rtl_UnicodeToTextContext aCvtContext = nullptr;
rResult.mbRecoded = ( eRecodeFrom != RTL_TEXTENCODING_UNICODE );
if( rResult.mbRecoded )
{
aConverter = rtl_createTextToUnicodeConverter( eRecodeFrom );
aCvtContext = rtl_createTextToUnicodeContext( aConverter );
}
if( aConverter && aCvtContext )
{
// determine the set of supported code points from encoded ranges
std::set<sal_UCS4> aSupportedCodePoints;
static const int NINSIZE = 64;
static const int NOUTSIZE = 64;
sal_Char cCharsInp[ NINSIZE ];
sal_Unicode cCharsOut[ NOUTSIZE ];
sal_UCS4* pCP = pCodePairs;
for( int i = 0; i < nRangeCount; ++i )
{
sal_UCS4 cMin = *(pCP++);
sal_UCS4 cEnd = *(pCP++);
while( cMin < cEnd )
{
int j = 0;
for(; (cMin < cEnd) && (j < NINSIZE); ++cMin )
{
if( cMin >= 0x0100 )
cCharsInp[ j++ ] = static_cast<sal_Char>(cMin >> 8);
if( (cMin >= 0x0100) || (cMin < 0x00A0) )
cCharsInp[ j++ ] = static_cast<sal_Char>(cMin);
}
sal_uInt32 nCvtInfo;
sal_Size nSrcCvtBytes;
int nOutLen = rtl_convertTextToUnicode(
aConverter, aCvtContext,
cCharsInp, j, cCharsOut, NOUTSIZE,
RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE
| RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE,
&nCvtInfo, &nSrcCvtBytes );
for( j = 0; j < nOutLen; ++j )
aSupportedCodePoints.insert( cCharsOut[j] );
}
}
rtl_destroyTextToUnicodeConverter( aCvtContext );
rtl_destroyTextToUnicodeConverter( aConverter );
// convert the set of supported code points to ranges
std::vector<sal_UCS4> aSupportedRanges;
for (auto const& supportedPoint : aSupportedCodePoints)
{
if( aSupportedRanges.empty()
|| (aSupportedRanges.back() != supportedPoint) )
{
// add new range beginning with current unicode
aSupportedRanges.push_back(supportedPoint);
aSupportedRanges.push_back( 0 );
}
// extend existing range to include current unicode
aSupportedRanges.back() = supportedPoint + 1;
}
// glyph mapping for non-unicode fonts not implemented
delete[] pStartGlyphs;
pStartGlyphs = nullptr;
aGlyphIdArray.clear();
// make a pCodePairs array using the vector from above
delete[] pCodePairs;
nRangeCount = aSupportedRanges.size() / 2;
if( nRangeCount <= 0 )
return false;
pCodePairs = new sal_UCS4[ nRangeCount * 2 ];
for (auto const& supportedRange : aSupportedRanges)
*(pCP++) = supportedRange;
}
// prepare the glyphid-array if needed
// TODO: merge ranges if they are close enough?
sal_uInt16* pGlyphIds = nullptr;
if( !aGlyphIdArray.empty())
{
pGlyphIds = new sal_uInt16[ aGlyphIdArray.size() ];
sal_uInt16* pOut = pGlyphIds;
for (auto const& glyphId : aGlyphIdArray)
*(pOut++) = glyphId;
}
// update the result struct
rResult.mpRangeCodes = pCodePairs;
rResult.mpStartGlyphs = pStartGlyphs;
rResult.mnRangeCount = nRangeCount;
rResult.mpGlyphIds = pGlyphIds;
return true;
}
FontCharMap::FontCharMap()
: mpImplFontCharMap( ImplFontCharMap::getDefaultMap() )
{
}
FontCharMap::FontCharMap( ImplFontCharMapRef const & pIFCMap )
: mpImplFontCharMap( pIFCMap )
{
}
FontCharMap::FontCharMap( const CmapResult& rCR )
: mpImplFontCharMap(new ImplFontCharMap(rCR))
{
}
FontCharMap::~FontCharMap()
{
mpImplFontCharMap = nullptr;
}
FontCharMapRef FontCharMap::GetDefaultMap( bool bSymbol )
{
FontCharMapRef xFontCharMap( new FontCharMap( ImplFontCharMap::getDefaultMap( bSymbol ) ) );
return xFontCharMap;
}
bool FontCharMap::IsDefaultMap() const
{
return mpImplFontCharMap->isDefaultMap();
}
int FontCharMap::GetCharCount() const
{
return mpImplFontCharMap->mnCharCount;
}
int FontCharMap::CountCharsInRange( sal_UCS4 cMin, sal_UCS4 cMax ) const
{
int nCount = 0;
// find and adjust range and char count for cMin
int nRangeMin = findRangeIndex( cMin );
if( nRangeMin & 1 )
++nRangeMin;
else if( cMin > mpImplFontCharMap->mpRangeCodes[ nRangeMin ] )
nCount -= cMin - mpImplFontCharMap->mpRangeCodes[ nRangeMin ];
// find and adjust range and char count for cMax
int nRangeMax = findRangeIndex( cMax );
if( nRangeMax & 1 )
--nRangeMax;
else
nCount -= mpImplFontCharMap->mpRangeCodes[ nRangeMax+1 ] - cMax - 1;
// count chars in complete ranges between cMin and cMax
for( int i = nRangeMin; i <= nRangeMax; i+=2 )
nCount += mpImplFontCharMap->mpRangeCodes[i+1] - mpImplFontCharMap->mpRangeCodes[i];
return nCount;
}
bool FontCharMap::HasChar( sal_UCS4 cChar ) const
{
bool bHasChar = false;
if( mpImplFontCharMap->mpStartGlyphs == nullptr ) { // only the char-ranges are known
const int nRange = findRangeIndex( cChar );
if( nRange==0 && cChar < mpImplFontCharMap->mpRangeCodes[0] )
return false;
bHasChar = ((nRange & 1) == 0); // inside a range
} else { // glyph mapping is available
const int nGlyphIndex = GetGlyphIndex( cChar );
bHasChar = (nGlyphIndex != 0); // not the notdef-glyph
}
return bHasChar;
}
sal_UCS4 FontCharMap::GetFirstChar() const
{
return mpImplFontCharMap->mpRangeCodes[0];
}
sal_UCS4 FontCharMap::GetLastChar() const
{
return (mpImplFontCharMap->mpRangeCodes[ 2*mpImplFontCharMap->mnRangeCount-1 ] - 1);
}
sal_UCS4 FontCharMap::GetNextChar( sal_UCS4 cChar ) const
{
if( cChar < GetFirstChar() )
return GetFirstChar();
if( cChar >= GetLastChar() )
return GetLastChar();
int nRange = findRangeIndex( cChar + 1 );
if( nRange & 1 ) // outside of range?
return mpImplFontCharMap->mpRangeCodes[ nRange + 1 ]; // => first in next range
return (cChar + 1);
}
sal_UCS4 FontCharMap::GetPrevChar( sal_UCS4 cChar ) const
{
if( cChar <= GetFirstChar() )
return GetFirstChar();
if( cChar > GetLastChar() )
return GetLastChar();
int nRange = findRangeIndex( cChar - 1 );
if( nRange & 1 ) // outside a range?
return (mpImplFontCharMap->mpRangeCodes[ nRange ] - 1); // => last in prev range
return (cChar - 1);
}
int FontCharMap::GetIndexFromChar( sal_UCS4 cChar ) const
{
// TODO: improve linear walk?
int nCharIndex = 0;
const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
{
sal_UCS4 cFirst = *(pRange++);
sal_UCS4 cLast = *(pRange++);
if( cChar >= cLast )
nCharIndex += cLast - cFirst;
else if( cChar >= cFirst )
return nCharIndex + (cChar - cFirst);
else
break;
}
return -1;
}
sal_UCS4 FontCharMap::GetCharFromIndex( int nIndex ) const
{
// TODO: improve linear walk?
const sal_UCS4* pRange = &mpImplFontCharMap->mpRangeCodes[0];
for( int i = 0; i < mpImplFontCharMap->mnRangeCount; ++i )
{
sal_UCS4 cFirst = *(pRange++);
sal_UCS4 cLast = *(pRange++);
nIndex -= cLast - cFirst;
if( nIndex < 0 )
return (cLast + nIndex);
}
// we can only get here with an out-of-bounds charindex
return mpImplFontCharMap->mpRangeCodes[0];
}
int FontCharMap::findRangeIndex( sal_UCS4 cChar ) const
{
int nLower = 0;
int nMid = mpImplFontCharMap->mnRangeCount;
int nUpper = 2 * mpImplFontCharMap->mnRangeCount - 1;
while( nLower < nUpper )
{
if( cChar >= mpImplFontCharMap->mpRangeCodes[ nMid ] )
nLower = nMid;
else
nUpper = nMid - 1;
nMid = (nLower + nUpper + 1) / 2;
}
return nMid;
}
int FontCharMap::GetGlyphIndex( sal_UCS4 cChar ) const
{
// return -1 if the object doesn't know the glyph ids
if( !mpImplFontCharMap->mpStartGlyphs )
return -1;
// return 0 if the unicode doesn't have a matching glyph
int nRange = findRangeIndex( cChar );
// check that we are inside any range
if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
// symbol aliasing gives symbol fonts a second chance
const bool bSymbolic = cChar <= 0xFF && (mpImplFontCharMap->mpRangeCodes[0]>=0xF000) &&
(mpImplFontCharMap->mpRangeCodes[1]<=0xF0FF);
if( !bSymbolic )
return 0;
// check for symbol aliasing (U+F0xx -> U+00xx)
cChar |= 0xF000;
nRange = findRangeIndex( cChar );
if( (nRange == 0) && (cChar < mpImplFontCharMap->mpRangeCodes[0]) ) {
return 0;
}
}
// check that we are inside a range
if( (nRange & 1) != 0 )
return 0;
// get glyph index directly or indirectly
int nGlyphIndex = cChar - mpImplFontCharMap->mpRangeCodes[ nRange ];
const int nStartIndex = mpImplFontCharMap->mpStartGlyphs[ nRange/2 ];
if( nStartIndex >= 0 ) {
// the glyph index can be calculated
nGlyphIndex += nStartIndex;
} else {
// the glyphid array has the glyph index
nGlyphIndex = mpImplFontCharMap->mpGlyphIds[ nGlyphIndex - nStartIndex];
}
return nGlyphIndex;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
↑ V557 Array overrun is possible. The value of 'j ++' index could reach 64.