|
tesseract
3.03
|
#include <shapetable.h>
Public Member Functions | |
| ShapeTable () | |
| ShapeTable (const UNICHARSET &unicharset) | |
| bool | Serialize (FILE *fp) const |
| bool | DeSerialize (bool swap, FILE *fp) |
| int | NumShapes () const |
| const UNICHARSET & | unicharset () const |
| int | NumFonts () const |
| void | set_unicharset (const UNICHARSET &unicharset) |
| void | ReMapClassIds (const GenericVector< int > &unicharset_map) |
| STRING | DebugStr (int shape_id) const |
| STRING | SummaryStr () const |
| int | AddShape (int unichar_id, int font_id) |
| int | AddShape (const Shape &other) |
| void | DeleteShape (int shape_id) |
| void | AddToShape (int shape_id, int unichar_id, int font_id) |
| void | AddShapeToShape (int shape_id, const Shape &other) |
| int | FindShape (int unichar_id, int font_id) const |
| void | GetFirstUnicharAndFont (int shape_id, int *unichar_id, int *font_id) const |
| const Shape & | GetShape (int shape_id) const |
| Shape * | MutableShape (int shape_id) |
| int | BuildFromShape (const Shape &shape, const ShapeTable &master_shapes) |
| bool | AlreadyMerged (int shape_id1, int shape_id2) const |
| bool | AnyMultipleUnichars () const |
| int | MaxNumUnichars () const |
| void | ForceFontMerges (int start, int end) |
| int | MasterUnicharCount (int shape_id) const |
| int | MasterFontCount (int shape_id) const |
| int | MergedUnicharCount (int shape_id1, int shape_id2) const |
| void | MergeShapes (int shape_id1, int shape_id2) |
| void | SwapShapes (int shape_id1, int shape_id2) |
| void | AppendMasterShapes (const ShapeTable &other, GenericVector< int > *shape_map) |
| int | NumMasterShapes () const |
| int | MasterDestinationIndex (int shape_id) const |
| bool | SubsetUnichar (int shape_id1, int shape_id2) const |
| bool | MergeSubsetUnichar (int merge_id1, int merge_id2, int shape_id) const |
| bool | EqualUnichars (int shape_id1, int shape_id2) const |
| bool | MergeEqualUnichars (int merge_id1, int merge_id2, int shape_id) const |
| bool | CommonUnichars (int shape_id1, int shape_id2) const |
| bool | CommonFont (int shape_id1, int shape_id2) const |
| void | AddShapeToResults (const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const |
Definition at line 249 of file shapetable.h.
Definition at line 243 of file shapetable.cpp.
: unicharset_(NULL), num_fonts_(0) {
}
| tesseract::ShapeTable::ShapeTable | ( | const UNICHARSET & | unicharset | ) | [explicit] |
Definition at line 245 of file shapetable.cpp.
: unicharset_(&unicharset), num_fonts_(0) {
}
| int tesseract::ShapeTable::AddShape | ( | int | unichar_id, |
| int | font_id | ||
| ) |
Definition at line 346 of file shapetable.cpp.
| int tesseract::ShapeTable::AddShape | ( | const Shape & | other | ) |
Definition at line 357 of file shapetable.cpp.
| void tesseract::ShapeTable::AddShapeToResults | ( | const ShapeRating & | shape_rating, |
| GenericVector< int > * | unichar_map, | ||
| GenericVector< UnicharRating > * | results | ||
| ) | const |
Definition at line 697 of file shapetable.cpp.
{
if (shape_rating.joined) {
AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map,
results);
}
if (shape_rating.broken) {
AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map,
results);
}
const Shape& shape = GetShape(shape_rating.shape_id);
for (int u = 0; u < shape.size(); ++u) {
int result_index = AddUnicharToResults(shape[u].unichar_id,
shape_rating.rating,
unichar_map, results);
(*results)[result_index].fonts += shape[u].font_ids;
}
}
| void tesseract::ShapeTable::AddShapeToShape | ( | int | shape_id, |
| const Shape & | other | ||
| ) |
Definition at line 386 of file shapetable.cpp.
{
Shape& shape = *shape_table_[shape_id];
shape.AddShape(other);
num_fonts_ = 0;
}
| void tesseract::ShapeTable::AddToShape | ( | int | shape_id, |
| int | unichar_id, | ||
| int | font_id | ||
| ) |
Definition at line 379 of file shapetable.cpp.
{
Shape& shape = *shape_table_[shape_id];
shape.AddToShape(unichar_id, font_id);
num_fonts_ = MAX(num_fonts_, font_id + 1);
}
| bool tesseract::ShapeTable::AlreadyMerged | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 449 of file shapetable.cpp.
{
return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
}
| bool tesseract::ShapeTable::AnyMultipleUnichars | ( | ) | const |
Definition at line 454 of file shapetable.cpp.
{
int num_shapes = NumShapes();
for (int s1 = 0; s1 < num_shapes; ++s1) {
if (MasterDestinationIndex(s1) != s1) continue;
if (GetShape(s1).size() > 1)
return true;
}
return false;
}
| void tesseract::ShapeTable::AppendMasterShapes | ( | const ShapeTable & | other, |
| GenericVector< int > * | shape_map | ||
| ) |
Definition at line 666 of file shapetable.cpp.
{
if (shape_map != NULL)
shape_map->init_to_size(other.NumShapes(), -1);
for (int s = 0; s < other.shape_table_.size(); ++s) {
if (other.shape_table_[s]->destination_index() < 0) {
int index = AddShape(*other.shape_table_[s]);
if (shape_map != NULL)
(*shape_map)[s] = index;
}
}
}
| int tesseract::ShapeTable::BuildFromShape | ( | const Shape & | shape, |
| const ShapeTable & | master_shapes | ||
| ) |
Definition at line 423 of file shapetable.cpp.
{
BitVector shape_map(master_shapes.NumShapes());
for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
int c = shape[u_ind].unichar_id;
int f = shape[u_ind].font_ids[f_ind];
int master_id = master_shapes.FindShape(c, f);
if (master_id >= 0) {
shape_map.SetBit(master_id);
} else if (FindShape(c, f) < 0) {
AddShape(c, f);
}
}
}
int num_masters = 0;
for (int s = 0; s < master_shapes.NumShapes(); ++s) {
if (shape_map[s]) {
AddShape(master_shapes.GetShape(s));
++num_masters;
}
}
return num_masters;
}
| bool tesseract::ShapeTable::CommonFont | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 651 of file shapetable.cpp.
{
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
const GenericVector<int>& font_list1 = shape1[c1].font_ids;
for (int f = 0; f < font_list1.size(); ++f) {
if (shape2.ContainsFont(font_list1[f]))
return true;
}
}
return false;
}
| bool tesseract::ShapeTable::CommonUnichars | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 639 of file shapetable.cpp.
| STRING tesseract::ShapeTable::DebugStr | ( | int | shape_id | ) | const |
Definition at line 291 of file shapetable.cpp.
{
if (shape_id < 0 || shape_id >= shape_table_.size())
return STRING("INVALID_UNICHAR_ID");
const Shape& shape = GetShape(shape_id);
STRING result;
result.add_str_int("Shape", shape_id);
if (shape.size() > 100) {
result.add_str_int(" Num unichars=", shape.size());
return result;
}
for (int c = 0; c < shape.size(); ++c) {
result.add_str_int(" c_id=", shape[c].unichar_id);
result += "=";
result += unicharset_->id_to_unichar(shape[c].unichar_id);
if (shape.size() < 10) {
result.add_str_int(", ", shape[c].font_ids.size());
result += " fonts =";
int num_fonts = shape[c].font_ids.size();
if (num_fonts > 10) {
result.add_str_int(" ", shape[c].font_ids[0]);
result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
} else {
for (int f = 0; f < num_fonts; ++f) {
result.add_str_int(" ", shape[c].font_ids[f]);
}
}
}
}
return result;
}
| void tesseract::ShapeTable::DeleteShape | ( | int | shape_id | ) |
Definition at line 371 of file shapetable.cpp.
{
delete shape_table_[shape_id];
shape_table_[shape_id] = NULL;
shape_table_.remove(shape_id);
}
| bool tesseract::ShapeTable::DeSerialize | ( | bool | swap, |
| FILE * | fp | ||
| ) |
Definition at line 256 of file shapetable.cpp.
{
if (!shape_table_.DeSerialize(swap, fp)) return false;
num_fonts_ = 0;
return true;
}
| bool tesseract::ShapeTable::EqualUnichars | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 597 of file shapetable.cpp.
{
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
for (int c1 = 0; c1 < shape1.size(); ++c1) {
int unichar_id1 = shape1[c1].unichar_id;
if (!shape2.ContainsUnichar(unichar_id1))
return false;
}
for (int c2 = 0; c2 < shape2.size(); ++c2) {
int unichar_id2 = shape2[c2].unichar_id;
if (!shape1.ContainsUnichar(unichar_id2))
return false;
}
return true;
}
| int tesseract::ShapeTable::FindShape | ( | int | unichar_id, |
| int | font_id | ||
| ) | const |
Definition at line 396 of file shapetable.cpp.
{
for (int s = 0; s < shape_table_.size(); ++s) {
const Shape& shape = GetShape(s);
for (int c = 0; c < shape.size(); ++c) {
if (shape[c].unichar_id == unichar_id) {
if (font_id < 0)
return s; // We don't care about the font.
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
if (shape[c].font_ids[f] == font_id)
return s;
}
}
}
}
return -1;
}
| void tesseract::ShapeTable::ForceFontMerges | ( | int | start, |
| int | end | ||
| ) |
Definition at line 478 of file shapetable.cpp.
{
for (int s1 = start; s1 < end; ++s1) {
if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
int unichar_id = GetShape(s1)[0].unichar_id;
for (int s2 = s1 + 1; s2 < end; ++s2) {
if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
unichar_id == GetShape(s2)[0].unichar_id) {
MergeShapes(s1, s2);
}
}
}
}
ShapeTable compacted(*unicharset_);
compacted.AppendMasterShapes(*this, NULL);
*this = compacted;
}
| void tesseract::ShapeTable::GetFirstUnicharAndFont | ( | int | shape_id, |
| int * | unichar_id, | ||
| int * | font_id | ||
| ) | const |
Definition at line 414 of file shapetable.cpp.
{
const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
*unichar_id = unichar_and_fonts.unichar_id;
*font_id = unichar_and_fonts.font_ids[0];
}
| const Shape& tesseract::ShapeTable::GetShape | ( | int | shape_id | ) | const [inline] |
Definition at line 308 of file shapetable.h.
{
return *shape_table_[shape_id];
}
| int tesseract::ShapeTable::MasterDestinationIndex | ( | int | shape_id | ) | const |
Definition at line 541 of file shapetable.cpp.
{
int dest_id = shape_table_[shape_id]->destination_index();
if (dest_id == shape_id || dest_id < 0)
return shape_id; // Is master already.
int master_id = shape_table_[dest_id]->destination_index();
if (master_id == dest_id || master_id < 0)
return dest_id; // Dest is the master and shape_id points to it.
master_id = MasterDestinationIndex(master_id);
return master_id;
}
| int tesseract::ShapeTable::MasterFontCount | ( | int | shape_id | ) | const |
Definition at line 502 of file shapetable.cpp.
{
int master_id = MasterDestinationIndex(shape_id);
const Shape& shape = GetShape(master_id);
int font_count = 0;
for (int c = 0; c < shape.size(); ++c) {
font_count += shape[c].font_ids.size();
}
return font_count;
}
| int tesseract::ShapeTable::MasterUnicharCount | ( | int | shape_id | ) | const |
Definition at line 496 of file shapetable.cpp.
{
int master_id = MasterDestinationIndex(shape_id);
return GetShape(master_id).size();
}
| int tesseract::ShapeTable::MaxNumUnichars | ( | ) | const |
| int tesseract::ShapeTable::MergedUnicharCount | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 513 of file shapetable.cpp.
{
// Do it the easy way for now.
int master_id1 = MasterDestinationIndex(shape_id1);
int master_id2 = MasterDestinationIndex(shape_id2);
Shape combined_shape(*shape_table_[master_id1]);
combined_shape.AddShape(*shape_table_[master_id2]);
return combined_shape.size();
}
| bool tesseract::ShapeTable::MergeEqualUnichars | ( | int | merge_id1, |
| int | merge_id2, | ||
| int | shape_id | ||
| ) | const |
Definition at line 614 of file shapetable.cpp.
{
const Shape& merge1 = GetShape(merge_id1);
const Shape& merge2 = GetShape(merge_id2);
const Shape& shape = GetShape(shape_id);
for (int cs = 0; cs < shape.size(); ++cs) {
int unichar_id = shape[cs].unichar_id;
if (!merge1.ContainsUnichar(unichar_id) &&
!merge2.ContainsUnichar(unichar_id))
return false; // Shape has a unichar that appears in neither merge.
}
for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
int unichar_id1 = merge1[cm1].unichar_id;
if (!shape.ContainsUnichar(unichar_id1))
return false; // Merge has a unichar that is not in shape.
}
for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
int unichar_id2 = merge2[cm2].unichar_id;
if (!shape.ContainsUnichar(unichar_id2))
return false; // Merge has a unichar that is not in shape.
}
return true;
}
| void tesseract::ShapeTable::MergeShapes | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) |
Definition at line 523 of file shapetable.cpp.
{
int master_id1 = MasterDestinationIndex(shape_id1);
int master_id2 = MasterDestinationIndex(shape_id2);
// Point master_id2 (and all merged shapes) to master_id1.
shape_table_[master_id2]->set_destination_index(master_id1);
// Add all the shapes of master_id2 to master_id1.
shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
}
| bool tesseract::ShapeTable::MergeSubsetUnichar | ( | int | merge_id1, |
| int | merge_id2, | ||
| int | shape_id | ||
| ) | const |
Definition at line 571 of file shapetable.cpp.
{
const Shape& merge1 = GetShape(merge_id1);
const Shape& merge2 = GetShape(merge_id2);
const Shape& shape = GetShape(shape_id);
int cm1, cm2, cs;
for (cs = 0; cs < shape.size(); ++cs) {
int unichar_id = shape[cs].unichar_id;
if (!merge1.ContainsUnichar(unichar_id) &&
!merge2.ContainsUnichar(unichar_id))
break; // Shape is not a subset of the merge.
}
for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
int unichar_id1 = merge1[cm1].unichar_id;
if (!shape.ContainsUnichar(unichar_id1))
break; // Merge is not a subset of shape
}
for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
int unichar_id2 = merge2[cm2].unichar_id;
if (!shape.ContainsUnichar(unichar_id2))
break; // Merge is not a subset of shape
}
return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
}
| Shape* tesseract::ShapeTable::MutableShape | ( | int | shape_id | ) | [inline] |
Definition at line 311 of file shapetable.h.
{
return shape_table_[shape_id];
}
| int tesseract::ShapeTable::NumFonts | ( | ) | const |
Definition at line 264 of file shapetable.cpp.
{
if (num_fonts_ <= 0) {
for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
const Shape& shape = *shape_table_[shape_id];
for (int c = 0; c < shape.size(); ++c) {
for (int f = 0; f < shape[c].font_ids.size(); ++f) {
if (shape[c].font_ids[f] >= num_fonts_)
num_fonts_ = shape[c].font_ids[f] + 1;
}
}
}
}
return num_fonts_;
}
| int tesseract::ShapeTable::NumMasterShapes | ( | ) | const |
Definition at line 680 of file shapetable.cpp.
{
int num_shapes = 0;
for (int s = 0; s < shape_table_.size(); ++s) {
if (shape_table_[s]->destination_index() < 0)
++num_shapes;
}
return num_shapes;
}
| int tesseract::ShapeTable::NumShapes | ( | ) | const [inline] |
Definition at line 263 of file shapetable.h.
{
return shape_table_.size();
}
| void tesseract::ShapeTable::ReMapClassIds | ( | const GenericVector< int > & | unicharset_map | ) |
Definition at line 281 of file shapetable.cpp.
{
for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
Shape* shape = shape_table_[shape_id];
for (int c = 0; c < shape->size(); ++c) {
shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
}
}
}
| bool tesseract::ShapeTable::Serialize | ( | FILE * | fp | ) | const |
Definition at line 250 of file shapetable.cpp.
{
if (!shape_table_.Serialize(fp)) return false;
return true;
}
| void tesseract::ShapeTable::set_unicharset | ( | const UNICHARSET & | unicharset | ) | [inline] |
Definition at line 274 of file shapetable.h.
{
unicharset_ = &unicharset;
}
| bool tesseract::ShapeTable::SubsetUnichar | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) | const |
Definition at line 553 of file shapetable.cpp.
{
const Shape& shape1 = GetShape(shape_id1);
const Shape& shape2 = GetShape(shape_id2);
int c1, c2;
for (c1 = 0; c1 < shape1.size(); ++c1) {
int unichar_id1 = shape1[c1].unichar_id;
if (!shape2.ContainsUnichar(unichar_id1))
break;
}
for (c2 = 0; c2 < shape2.size(); ++c2) {
int unichar_id2 = shape2[c2].unichar_id;
if (!shape1.ContainsUnichar(unichar_id2))
break;
}
return c1 == shape1.size() || c2 == shape2.size();
}
| STRING tesseract::ShapeTable::SummaryStr | ( | ) | const |
Definition at line 323 of file shapetable.cpp.
{
int max_unichars = 0;
int num_multi_shapes = 0;
int num_master_shapes = 0;
for (int s = 0; s < shape_table_.size(); ++s) {
if (MasterDestinationIndex(s) != s) continue;
++num_master_shapes;
int shape_size = GetShape(s).size();
if (shape_size > 1)
++num_multi_shapes;
if (shape_size > max_unichars)
max_unichars = shape_size;
}
STRING result;
result.add_str_int("Number of shapes = ", num_master_shapes);
result.add_str_int(" max unichars = ", max_unichars);
result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
return result;
}
| void tesseract::ShapeTable::SwapShapes | ( | int | shape_id1, |
| int | shape_id2 | ||
| ) |
Definition at line 533 of file shapetable.cpp.
{
Shape* tmp = shape_table_[shape_id1];
shape_table_[shape_id1] = shape_table_[shape_id2];
shape_table_[shape_id2] = tmp;
}
| const UNICHARSET& tesseract::ShapeTable::unicharset | ( | ) | const [inline] |
Definition at line 266 of file shapetable.h.
{
return *unicharset_;
}