|
tesseract
3.03
|
#include <lm_pain_points.h>
Public Member Functions | |
| LMPainPoints (int max, float rat, bool fp, const Dict *d, int deb) | |
| ~LMPainPoints () | |
| bool | HasPainPoints (LMPainPointsType pp_type) const |
| LMPainPointsType | Deque (MATRIX_COORD *pp, float *priority) |
| void | Clear () |
| void | GenerateInitial (WERD_RES *word_res) |
| void | GenerateFromPath (float rating_cert_scale, ViterbiStateEntry *vse, WERD_RES *word_res) |
| void | GenerateFromAmbigs (const DANGERR &fixpt, ViterbiStateEntry *vse, WERD_RES *word_res) |
| bool | GenerateForBlamer (double max_char_wh_ratio, WERD_RES *word_res, int col, int row) |
| bool | GeneratePainPoint (int col, int row, LMPainPointsType pp_type, float special_priority, bool ok_to_extend, float max_char_wh_ratio, WERD_RES *word_res) |
| void | RemapForSplit (int index) |
Static Public Member Functions | |
| static const char * | PainPointDescription (LMPainPointsType type) |
Static Public Attributes | |
| static const float | kDefaultPainPointPriorityAdjustment = 2.0f |
| static const float | kLooseMaxCharWhRatio = 2.5f |
Definition at line 53 of file lm_pain_points.h.
| tesseract::LMPainPoints::LMPainPoints | ( | int | max, |
| float | rat, | ||
| bool | fp, | ||
| const Dict * | d, | ||
| int | deb | ||
| ) | [inline] |
Definition at line 66 of file lm_pain_points.h.
:
max_heap_size_(max), max_char_wh_ratio_(rat), fixed_pitch_(fp),
dict_(d), debug_level_(deb) {}
| tesseract::LMPainPoints::~LMPainPoints | ( | ) | [inline] |
Definition at line 69 of file lm_pain_points.h.
{}
| void tesseract::LMPainPoints::Clear | ( | ) | [inline] |
Definition at line 82 of file lm_pain_points.h.
{
for (int h = 0; h < LM_PPTYPE_NUM; ++h) pain_points_heaps_[h].clear();
}
| LMPainPointsType tesseract::LMPainPoints::Deque | ( | MATRIX_COORD * | pp, |
| float * | priority | ||
| ) |
Definition at line 37 of file lm_pain_points.cpp.
{
for (int h = 0; h < LM_PPTYPE_NUM; ++h) {
if (pain_points_heaps_[h].empty()) continue;
*priority = pain_points_heaps_[h].PeekTop().key;
*pp = pain_points_heaps_[h].PeekTop().data;
pain_points_heaps_[h].Pop(NULL);
return static_cast<LMPainPointsType>(h);
}
return LM_PPTYPE_NUM;
}
| bool tesseract::LMPainPoints::GenerateForBlamer | ( | double | max_char_wh_ratio, |
| WERD_RES * | word_res, | ||
| int | col, | ||
| int | row | ||
| ) | [inline] |
Definition at line 99 of file lm_pain_points.h.
{
return GeneratePainPoint(col, row, LM_PPTYPE_BLAMER, 0.0, false,
max_char_wh_ratio, word_res);
}
| void tesseract::LMPainPoints::GenerateFromAmbigs | ( | const DANGERR & | fixpt, |
| ViterbiStateEntry * | vse, | ||
| WERD_RES * | word_res | ||
| ) |
Definition at line 130 of file lm_pain_points.cpp.
{
// Begins and ends in DANGERR vector now record the blob indices as used
// by the ratings matrix.
for (int d = 0; d < fixpt.size(); ++d) {
const DANGERR_INFO &danger = fixpt[d];
// Only use dangerous ambiguities.
if (danger.dangerous) {
GeneratePainPoint(danger.begin, danger.end - 1,
LM_PPTYPE_AMBIG, vse->cost, true,
kLooseMaxCharWhRatio, word_res);
}
}
}
| void tesseract::LMPainPoints::GenerateFromPath | ( | float | rating_cert_scale, |
| ViterbiStateEntry * | vse, | ||
| WERD_RES * | word_res | ||
| ) |
Definition at line 68 of file lm_pain_points.cpp.
{
ViterbiStateEntry *curr_vse = vse;
BLOB_CHOICE *curr_b = vse->curr_b;
// The following pain point generation and priority calculation approaches
// prioritize exploring paths with low average rating of the known part of
// the path, while not relying on the ratings of the pieces to be combined.
//
// A pain point to combine the neighbors is generated for each pair of
// neighboring blobs on the path (the path is represented by vse argument
// given to GenerateFromPath()). The priority of each pain point is set to
// the average rating (per outline length) of the path, not including the
// ratings of the blobs to be combined.
// The ratings of the blobs to be combined are not used to calculate the
// priority, since it is not possible to determine from their magnitude
// whether it will be beneficial to combine the blobs. The reason is that
// chopped junk blobs (/ | - ') can have very good (low) ratings, however
// combining them will be beneficial. Blobs with high ratings might be
// over-joined pieces of characters, but also could be blobs from an unseen
// font or chopped pieces of complex characters.
while (curr_vse->parent_vse != NULL) {
ViterbiStateEntry* parent_vse = curr_vse->parent_vse;
const MATRIX_COORD& curr_cell = curr_b->matrix_cell();
const MATRIX_COORD& parent_cell = parent_vse->curr_b->matrix_cell();
MATRIX_COORD pain_coord(parent_cell.col, curr_cell.row);
if (!pain_coord.Valid(*word_res->ratings) ||
!word_res->ratings->Classified(parent_cell.col, curr_cell.row,
dict_->WildcardID())) {
// rat_subtr contains ratings sum of the two adjacent blobs to be merged.
// rat_subtr will be subtracted from the ratings sum of the path, since
// the blobs will be joined into a new blob, whose rating is yet unknown.
float rat_subtr = curr_b->rating() + parent_vse->curr_b->rating();
// ol_subtr contains the outline length of the blobs that will be joined.
float ol_subtr =
AssociateUtils::ComputeOutlineLength(rating_cert_scale, *curr_b) +
AssociateUtils::ComputeOutlineLength(rating_cert_scale,
*(parent_vse->curr_b));
// ol_dif is the outline of the path without the two blobs to be joined.
float ol_dif = vse->outline_length - ol_subtr;
// priority is set to the average rating of the path per unit of outline,
// not counting the ratings of the pieces to be joined.
float priority = ol_dif > 0 ? (vse->ratings_sum-rat_subtr)/ol_dif : 0.0;
GeneratePainPoint(pain_coord.col, pain_coord.row, LM_PPTYPE_PATH,
priority, true, max_char_wh_ratio_, word_res);
} else if (debug_level_ > 3) {
tprintf("NO pain point (Classified) for col=%d row=%d type=%s\n",
pain_coord.col, pain_coord.row,
LMPainPointsTypeName[LM_PPTYPE_PATH]);
BLOB_CHOICE_IT b_it(word_res->ratings->get(pain_coord.col,
pain_coord.row));
for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
BLOB_CHOICE* choice = b_it.data();
choice->print_full();
}
}
curr_vse = parent_vse;
curr_b = curr_vse->curr_b;
}
}
| void tesseract::LMPainPoints::GenerateInitial | ( | WERD_RES * | word_res | ) |
Definition at line 48 of file lm_pain_points.cpp.
{
MATRIX *ratings = word_res->ratings;
AssociateStats associate_stats;
for (int col = 0; col < ratings->dimension(); ++col) {
int row_end = MIN(ratings->dimension(), col + ratings->bandwidth() + 1);
for (int row = col + 1; row < row_end; ++row) {
MATRIX_COORD coord(col, row);
if (coord.Valid(*ratings) &&
ratings->get(col, row) != NOT_CLASSIFIED) continue;
// Add an initial pain point if needed.
if (ratings->Classified(col, row - 1, dict_->WildcardID()) ||
(col + 1 < ratings->dimension() &&
ratings->Classified(col + 1, row, dict_->WildcardID()))) {
GeneratePainPoint(col, row, LM_PPTYPE_SHAPE, 0.0,
true, max_char_wh_ratio_, word_res);
}
}
}
}
| bool tesseract::LMPainPoints::GeneratePainPoint | ( | int | col, |
| int | row, | ||
| LMPainPointsType | pp_type, | ||
| float | special_priority, | ||
| bool | ok_to_extend, | ||
| float | max_char_wh_ratio, | ||
| WERD_RES * | word_res | ||
| ) |
Definition at line 146 of file lm_pain_points.cpp.
{
MATRIX_COORD coord(col, row);
if (coord.Valid(*word_res->ratings) &&
word_res->ratings->Classified(col, row, dict_->WildcardID())) {
return false;
}
if (debug_level_ > 3) {
tprintf("Generating pain point for col=%d row=%d type=%s\n",
col, row, LMPainPointsTypeName[pp_type]);
}
// Compute associate stats.
AssociateStats associate_stats;
AssociateUtils::ComputeStats(col, row, NULL, 0, fixed_pitch_,
max_char_wh_ratio, word_res, debug_level_,
&associate_stats);
// For fixed-pitch fonts/languages: if the current combined blob overlaps
// the next blob on the right and it is ok to extend the blob, try extending
// the blob until there is no overlap with the next blob on the right or
// until the width-to-height ratio becomes too large.
if (ok_to_extend) {
while (associate_stats.bad_fixed_pitch_right_gap &&
row + 1 < word_res->ratings->dimension() &&
!associate_stats.bad_fixed_pitch_wh_ratio) {
AssociateUtils::ComputeStats(col, ++row, NULL, 0, fixed_pitch_,
max_char_wh_ratio, word_res, debug_level_,
&associate_stats);
}
}
if (associate_stats.bad_shape) {
if (debug_level_ > 3) {
tprintf("Discarded pain point with a bad shape\n");
}
return false;
}
// Insert the new pain point into pain_points_heap_.
if (pain_points_heaps_[pp_type].size() < max_heap_size_) {
// Compute pain point priority.
float priority;
if (pp_type == LM_PPTYPE_PATH) {
priority = special_priority;
} else {
priority = associate_stats.gap_sum;
}
MatrixCoordPair pain_point(priority, MATRIX_COORD(col, row));
pain_points_heaps_[pp_type].Push(&pain_point);
if (debug_level_) {
tprintf("Added pain point with priority %g\n", priority);
}
return true;
} else {
if (debug_level_) tprintf("Pain points heap is full\n");
return false;
}
}
| bool tesseract::LMPainPoints::HasPainPoints | ( | LMPainPointsType | pp_type | ) | const [inline] |
Definition at line 72 of file lm_pain_points.h.
{
return !pain_points_heaps_[pp_type].empty();
}
| static const char* tesseract::LMPainPoints::PainPointDescription | ( | LMPainPointsType | type | ) | [inline, static] |
Definition at line 62 of file lm_pain_points.h.
{
return LMPainPointsTypeName[type];
}
| void tesseract::LMPainPoints::RemapForSplit | ( | int | index | ) |
Definition at line 207 of file lm_pain_points.cpp.
{
for (int i = 0; i < LM_PPTYPE_NUM; ++i) {
GenericVector<MatrixCoordPair>* heap = pain_points_heaps_[i].heap();
for (int j = 0; j < heap->size(); ++j)
(*heap)[j].data.MapForSplit(index);
}
}
const float tesseract::LMPainPoints::kDefaultPainPointPriorityAdjustment = 2.0f [static] |
Definition at line 56 of file lm_pain_points.h.
const float tesseract::LMPainPoints::kLooseMaxCharWhRatio = 2.5f [static] |
Definition at line 60 of file lm_pain_points.h.