Character recognition software is widely used to digitise printed texts. Thus the texts can be edited, searched and stored on a computer.
When documents (especially pretty old ones written with a typewriter), are digitised character recognition softwares often make mistakes.
Your task is correct the errors in the digitised text. You only have to handle the following mistakes:
S
is misinterpreted as 5
O
is misinterpreted as 0
I
is misinterpreted as 1
#include <random>
#include <iostream>
auto randint(int min, int max) {
static std::random_device rd;
static std::mt19937 rng(rd());
std::uniform_int_distribution<int> uni(min, max);
return uni(rng);
}
std::string sol(std::string s){
std::transform(s.begin(), s.end(), s.begin(), [](char c){return c == '5' ? 'S' : c == '0' ? 'O' : c == '1' ? 'I' : c;});
return s;
}
Describe(Correct){
It(BasicTests){
Assert::That(correct("1F-RUDYARD K1PL1NG"), Equals("IF-RUDYARD KIPLING"));
Assert::That(correct("R0BERT MERLE - THE DAY 0F THE D0LPH1N"), Equals("ROBERT MERLE - THE DAY OF THE DOLPHIN"));
Assert::That(correct("R1CHARD P. FEYNMAN - THE FEYNMAN LECTURE5 0N PHY51C5"), Equals("RICHARD P. FEYNMAN - THE FEYNMAN LECTURES ON PHYSICS"));
Assert::That(correct("R1CHARD P. FEYNMAN - 5TAT15T1CAL MECHAN1C5"), Equals("RICHARD P. FEYNMAN - STATISTICAL MECHANICS"));
Assert::That(correct("5TEPHEN HAWK1NG - A BR1EF H15T0RY 0F T1ME"), Equals("STEPHEN HAWKING - A BRIEF HISTORY OF TIME"));
Assert::That(correct("5TEPHEN HAWK1NG - THE UN1VER5E 1N A NUT5HELL"), Equals("STEPHEN HAWKING - THE UNIVERSE IN A NUTSHELL"));
Assert::That(correct("ERNE5T HEM1NGWAY - A FARWELL T0 ARM5"), Equals("ERNEST HEMINGWAY - A FARWELL TO ARMS"));
Assert::That(correct("ERNE5T HEM1NGWAY - F0R WH0M THE BELL T0LL5"), Equals("ERNEST HEMINGWAY - FOR WHOM THE BELL TOLLS"));
Assert::That(correct("ERNE5T HEM1NGWAY - THE 0LD MAN AND THE 5EA"), Equals("ERNEST HEMINGWAY - THE OLD MAN AND THE SEA"));
Assert::That(correct("J. R. R. T0LK1EN - THE L0RD 0F THE R1NG5"), Equals("J. R. R. TOLKIEN - THE LORD OF THE RINGS"));
}
It(RandomTests){
std::string base="abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ - ";
for (int i = 0; i < 40; i++) {
std::string s;
int sLen = randint(1, 35);
for (int j = 0; j < sLen; j++) s += randint(0, 9) == 0 ? "501"[randint(0, 2)] : base[randint(0, base.size() - 1)];
std::cout << "Testing for '" << s << "'\n";
Assert::That(correct(s), Equals(sol(s)));
}
}
};
#include <string>
std::string correct(std::string str){
replace(str.begin(), str.end(), '5', 'S');
replace(str.begin(), str.end(), '0', 'O');
replace(str.begin(), str.end(), '1', 'I');
return str;
}
replace函数包含于头文件#include中。泛型算法replace把队列中与给定值相等的所有值替换为另一个值,整个队列都被扫描,即此算法的各个版本都在线性时间内执行———其复杂度为O(n)。
但是上面的replace 用法是在 algorithm
中
template <class _FwdIt, class _Ty>
_CONSTEXPR20 void replace(const _FwdIt _First, const _FwdIt _Last, const _Ty& _Oldval, const _Ty& _Newval) {
// replace each matching _Oldval with _Newval
_Adl_verify_range(_First, _Last);
auto _UFirst = _Get_unwrapped(_First);
const auto _ULast = _Get_unwrapped(_Last);
for (; _UFirst != _ULast; ++_UFirst) {
if (*_UFirst == _Oldval) {
*_UFirst = _Newval;
}
}
}
这个方案比较易读,但有3次迭代,会损失一些效率。
#include <string>
#include <algorithm>
std::string correct(std::string str){
for(size_t i=0; i<str.size(); i++)
{
switch (str[i])
{
case '1': str[i]='I'; break;
case '0': str[i]='O'; break;
case '5': str[i]='S'; break;
}
}
return str;
}
简单的替换,使用switch 替换了if else,提高了易读性。
#include <string>
std::string correct(std::string str)
{
std::map<char, char> corrections{ {'5', 'S'}, {'0', 'O'}, {'1', 'I'} };
for(auto& c : str)
{
for(auto const cor : corrections)
if(c == cor.first)
{
c = cor.second;
break;
}
}
return str;
}
使用了map 来存放条件, auto& c 来遍历
#include <string>
#include <algorithm>
std::string correct(std::string str){
std::transform(str.begin(), str.end(), str.begin(), [](auto c){
if(c == '5') return 'S';
if(c == '0') return 'O';
if(c == '1') return 'I';
return c;
});
return str;
}
transform函数的作用是:将某操作应用于指定范围的每个元素。transform函数有两个重载版本:
transform(first,last,result,op);//first是容器的首迭代器,last为容器的末迭代器,result为存放结果的容器,op为要进行操作的一元函数对象或sturct、class。
transform(first1,last1,first2,result,binary_op);//first1是第一个容器的首迭代 器,last1为第一个容器的末迭代器,first2为第二个容器的首迭代器,result为存放结果的容器,binary_op为要进行操作的二元函数 对象或sturct、class。
注意:第二个重载版本必须要保证两个容器的元素个数相等才行,否则会抛出异常。
#include <string>
#include <algorithm>
std::string correct(std::string str){
std::for_each( str.begin(), str.end(), [](char& c){
switch( c )
{
case '5': c = 'S'; break;
case '0': c = 'O'; break;
case '1': c = 'I'; break;
default: break;
}
});
return str;
}
lambda表达式中传入了引用,原地修改字符串。
std::string correct(const std::string& str){
std::string r { str };
for (int i { 0 }; i < r.size(); i++){
if (r[i] == '5') { r[i] = 'S'; }
else if ( r[i] == '0') { r[i] = 'O'; }
else if ( r[i] == '1') { r[i] = 'I'; }
}
return r;
}
这里有一个特殊的写法:int i { 0 };
貌似是C++ 11 的初始化方法 如 vector<int> p={1,2,3};