|
tesseract
3.03
|
#include <pango_font_info.h>
Public Types | |
| enum | FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE } |
Public Member Functions | |
| PangoFontInfo () | |
| PangoFontInfo (const string &name) | |
| bool | ParseFontDescriptionName (const string &name) |
| bool | CoversUTF8Text (const char *utf8_text, int byte_length) const |
| int | DropUncoveredChars (string *utf8_text) const |
| bool | CanRenderString (const char *utf8_word, int len, vector< string > *graphemes) const |
| bool | CanRenderString (const char *utf8_word, int len) const |
| bool | GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const |
| string | DescriptionName () const |
| const string & | family_name () const |
| const int | font_size () const |
| const bool | is_bold () const |
| const bool | is_italic () const |
| const bool | is_smallcaps () const |
| const bool | is_monospace () const |
| const bool | is_fraktur () const |
| const FontTypeEnum | font_type () const |
| const int | resolution () const |
| void | set_resolution (const int resolution) |
Friends | |
| class | FontUtils |
Definition at line 38 of file pango_font_info.h.
Definition at line 40 of file pango_font_info.h.
{
UNKNOWN,
SERIF,
SANS_SERIF,
DECORATIVE,
};
Definition at line 79 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { Clear(); }
| tesseract::PangoFontInfo::PangoFontInfo | ( | const string & | name | ) | [explicit] |
Definition at line 83 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { if (!ParseFontDescriptionName(desc)) { tprintf("ERROR: Could not parse %s\n", desc.c_str()); Clear(); } }
| bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
| int | len, | ||
| vector< string > * | graphemes | ||
| ) | const |
Definition at line 341 of file pango_font_info.cpp.
{
if (graphemes) graphemes->clear();
// We check for font coverage of the text first, as otherwise Pango could
// (undesirably) fall back to another font that does have the required
// coverage.
if (!CoversUTF8Text(utf8_word, len)) {
return false;
}
// U+25CC dotted circle character that often (but not always) gets rendered
// when there is an illegal grapheme sequence.
const char32 kDottedCircleGlyph = 9676;
bool bad_glyph = false;
PangoFontMap* font_map = pango_cairo_font_map_get_default();
PangoContext* context = pango_context_new();
pango_context_set_font_map(context, font_map);
PangoLayout* layout = pango_layout_new(context);
if (desc_) {
pango_layout_set_font_description(layout, desc_);
} else {
PangoFontDescription *desc = pango_font_description_from_string(
DescriptionName().c_str());
pango_layout_set_font_description(layout, desc);
pango_font_description_free(desc);
}
pango_layout_set_text(layout, utf8_word, len);
PangoLayoutIter* run_iter = NULL;
{ // Fontconfig caches some information here that is not freed before exit.
DISABLE_HEAP_LEAK_CHECK;
run_iter = pango_layout_get_iter(layout);
}
do {
PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
if (!run) {
tlog(2, "Found end of line NULL run marker\n");
continue;
}
PangoGlyph dotted_circle_glyph;
PangoFont* font = run->item->analysis.font;
dotted_circle_glyph = pango_fc_font_get_glyph(
reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
if (TLOG_IS_ON(2)) {
PangoFontDescription* desc = pango_font_describe(font);
char* desc_str = pango_font_description_to_string(desc);
tlog(2, "Desc of font in run: %s\n", desc_str);
g_free(desc_str);
pango_font_description_free(desc);
}
PangoGlyphItemIter cluster_iter;
gboolean have_cluster;
for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
run, utf8_word);
have_cluster && !bad_glyph;
have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
const int start_byte_index = cluster_iter.start_index;
const int end_byte_index = cluster_iter.end_index;
int start_glyph_index = cluster_iter.start_glyph;
int end_glyph_index = cluster_iter.end_glyph;
string cluster_text = string(utf8_word + start_byte_index,
end_byte_index - start_byte_index);
if (graphemes) graphemes->push_back(cluster_text);
if (IsUTF8Whitespace(cluster_text.c_str())) {
tlog(2, "Skipping whitespace\n");
continue;
}
if (TLOG_IS_ON(2)) {
printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
start_byte_index, end_byte_index,
start_glyph_index, end_glyph_index);
}
for (int i = start_glyph_index,
step = (end_glyph_index > start_glyph_index) ? 1 : -1;
!bad_glyph && i != end_glyph_index; i+= step) {
const bool unknown_glyph =
(cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
PANGO_GLYPH_UNKNOWN_FLAG);
const bool illegal_glyph =
(cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
dotted_circle_glyph);
bad_glyph = unknown_glyph || illegal_glyph;
if (TLOG_IS_ON(2)) {
printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
bad_glyph ? 1 : 0);
}
}
if (TLOG_IS_ON(2)) {
printf(" '%s'\n", cluster_text.c_str());
}
if (bad_glyph)
tlog(1, "Found illegal glyph!\n");
}
} while (!bad_glyph && pango_layout_iter_next_run(run_iter));
pango_layout_iter_free(run_iter);
g_object_unref(context);
g_object_unref(layout);
if (bad_glyph && graphemes) graphemes->clear();
return !bad_glyph;
}
| bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
| int | len | ||
| ) | const |
Definition at line 336 of file pango_font_info.cpp.
{
vector<string> graphemes;
return CanRenderString(utf8_word, len, &graphemes);
}
| bool tesseract::PangoFontInfo::CoversUTF8Text | ( | const char * | utf8_text, |
| int | byte_length | ||
| ) | const |
Definition at line 248 of file pango_font_info.cpp.
{
PangoFont* font = ToPangoFont();
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
it != UNICHAR::end(utf8_text, byte_length);
++it) {
if (IsWhitespace(*it) || pango_is_zero_width(*it))
continue;
if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
char tmp[5];
int len = it.get_utf8(tmp);
tmp[len] = '\0';
tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
return false;
}
}
return true;
}
| string tesseract::PangoFontInfo::DescriptionName | ( | ) | const |
Definition at line 105 of file pango_font_info.cpp.
{
if (!desc_) return "";
char* desc_str = pango_font_description_to_string(desc_);
string desc_name(desc_str);
g_free(desc_str);
return desc_name;
}
| int tesseract::PangoFontInfo::DropUncoveredChars | ( | string * | utf8_text | ) | const |
Definition at line 267 of file pango_font_info.cpp.
{
PangoFont* font = ToPangoFont();
PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
int num_dropped_chars = 0;
// Maintain two iterators that point into the string. For space efficiency, we
// will repeatedly copy one covered UTF8 character from one to the other, and
// at the end resize the string to the right length.
char* out = const_cast<char*>(utf8_text->c_str());
const UNICHAR::const_iterator it_begin =
UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
const UNICHAR::const_iterator it_end =
UNICHAR::end(utf8_text->c_str(), utf8_text->length());
for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
if (!IsWhitespace(*it) && !pango_is_zero_width(*it) &&
pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
if (TLOG_IS_ON(2)) {
char tmp[5];
int len = it.get_utf8(tmp);
tmp[len] = '\0';
tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
}
++num_dropped_chars;
continue;
}
strncpy(out, it.utf8_data(), it.utf8_len());
out += it.utf8_len();
}
utf8_text->resize(out - utf8_text->c_str());
return num_dropped_chars;
}
| const string& tesseract::PangoFontInfo::family_name | ( | ) | const [inline] |
Definition at line 89 of file pango_font_info.h.
{ return family_name_; }
| const int tesseract::PangoFontInfo::font_size | ( | ) | const [inline] |
Definition at line 91 of file pango_font_info.h.
{ return font_size_; }
| const FontTypeEnum tesseract::PangoFontInfo::font_type | ( | ) | const [inline] |
Definition at line 97 of file pango_font_info.h.
{ return font_type_; }
| bool tesseract::PangoFontInfo::GetSpacingProperties | ( | const string & | utf8_char, |
| int * | x_bearing, | ||
| int * | x_advance | ||
| ) | const |
Definition at line 298 of file pango_font_info.cpp.
{
// Convert to equivalent PangoFont structure
PangoFont* font = ToPangoFont();
// Find the glyph index in the font for the supplied utf8 character.
int total_advance = 0;
int min_bearing = 0;
// Handle multi-unicode strings by reporting the left-most position of the
// x-bearing, and right-most position of the x-advance if the string were to
// be rendered.
const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
utf8_char.length());
const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
utf8_char.length());
for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
PangoGlyph glyph_index = pango_fc_font_get_glyph(
reinterpret_cast<PangoFcFont*>(font), *it);
if (!glyph_index) {
// Glyph for given unicode character doesn't exist in font.
return false;
}
// Find the ink glyph extents for the glyph
PangoRectangle ink_rect, logical_rect;
pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
pango_extents_to_pixels(&ink_rect, NULL);
pango_extents_to_pixels(&logical_rect, NULL);
int bearing = total_advance + PANGO_LBEARING(ink_rect);
if (it == it_begin || bearing < min_bearing) {
min_bearing = bearing;
}
total_advance += PANGO_RBEARING(logical_rect);
}
*x_bearing = min_bearing;
*x_advance = total_advance;
return true;
}
| const bool tesseract::PangoFontInfo::is_bold | ( | ) | const [inline] |
Definition at line 92 of file pango_font_info.h.
{ return is_bold_; }
| const bool tesseract::PangoFontInfo::is_fraktur | ( | ) | const [inline] |
Definition at line 96 of file pango_font_info.h.
{ return is_fraktur_; }
| const bool tesseract::PangoFontInfo::is_italic | ( | ) | const [inline] |
Definition at line 93 of file pango_font_info.h.
{ return is_italic_; }
| const bool tesseract::PangoFontInfo::is_monospace | ( | ) | const [inline] |
Definition at line 95 of file pango_font_info.h.
{ return is_monospace_; }
| const bool tesseract::PangoFontInfo::is_smallcaps | ( | ) | const [inline] |
Definition at line 94 of file pango_font_info.h.
{ return is_smallcaps_; }
| bool tesseract::PangoFontInfo::ParseFontDescriptionName | ( | const string & | name | ) |
Definition at line 223 of file pango_font_info.cpp.
{
PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
bool success = ParseFontDescription(desc);
pango_font_description_free(desc);
return success;
}
| const int tesseract::PangoFontInfo::resolution | ( | ) | const [inline] |
Definition at line 99 of file pango_font_info.h.
{ return resolution_; }
| void tesseract::PangoFontInfo::set_resolution | ( | const int | resolution | ) | [inline] |
Definition at line 100 of file pango_font_info.h.
{
resolution_ = resolution;
}
friend class FontUtils [friend] |
Definition at line 105 of file pango_font_info.h.