aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp')
-rw-r--r--src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp401
1 files changed, 401 insertions, 0 deletions
diff --git a/src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp b/src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp
new file mode 100644
index 00000000..3d27249e
--- /dev/null
+++ b/src/plugins/lipi-toolkit/3rdparty/lipi-toolkit/src/util/lib/LTKStrEncoding.cpp
@@ -0,0 +1,401 @@
+/*****************************************************************************************
+* Copyright (c) 2007 Hewlett-Packard Development Company, L.P.
+* Permission is hereby granted, free of charge, to any person obtaining a copy of
+* this software and associated documentation files (the "Software"), to deal in
+* the Software without restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
+* Software, and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+* PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*****************************************************************************************/
+
+/************************************************************************
+ * SVN MACROS
+ *
+ * $LastChangedDate: 2011-02-08 11:00:11 +0530 (Tue, 08 Feb 2011) $
+ * $Revision: 832 $
+ * $Author: dineshm $
+ *
+ ************************************************************************/
+
+#include "LTKStrEncoding.h"
+#include "LTKMacros.h"
+#include "LTKLoggerUtil.h"
+
+/************************************************************************
+ * FILE DESCR: Implementation of LTKInkUtils that computes the statistics
+ * of a trace group
+ *
+ * CONTENTS:
+ * shapeStrToUnicode
+ * tamilShapeStrToUnicode
+ *
+ * AUTHOR: Deepu V.
+ *
+ * DATE: September 8, 2005
+ * CHANGE HISTORY:
+ * Author Date Description of change
+ ************************************************************************/
+
+
+/**********************************************************************************
+* AUTHOR : Deepu V.
+* DATE : 08-SEP-2005
+* NAME : LTKStrEncoding
+* DESCRIPTION : Initialization constructor
+* ARGUMENTS :
+* RETURNS :
+* NOTES :
+* CHANGE HISTROY
+* Author Date Description of change
+*************************************************************************************/
+
+LTKStrEncoding::LTKStrEncoding()
+{
+
+}
+
+/**********************************************************************************
+* AUTHOR : Deepu V.
+* DATE : 08-SEP-2005
+* NAME : shapeStrToUnicode
+* DESCRIPTION : Do the mapping from shaperecognition ID to Unicode string
+* ARGUMENTS : shapeRecProjectName - Shape Recognition Project name
+* : shapeIDs - The shape recognizer output IDs
+* : unicodeString - output unicode string
+* RETURNS :
+* NOTES :
+* CHANGE HISTROY
+* Author Date Description of change
+*************************************************************************************/
+int LTKStrEncoding::shapeStrToUnicode(const string shapeRecProjectName, const vector<unsigned short>&shapeIDs, vector<unsigned short>& unicodeString)
+{
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Entering: LTKStrEncoding::shapeStrToUnicode()" << endl;
+
+
+ /*if(shapeRecProjectName.empty())
+ {
+ LOG(LTKLogger::LTK_LOGLEVEL_ERR)
+ <<"Error : "<< EMAP_NOT_FOUND <<":"<< getErrorMessage(EMAP_NOT_FOUND)
+ <<"LTKStrEncoding::shapeStrToUnicode()" <<endl;
+
+ LTKReturnError(EMAP_NOT_FOUND);
+ }*/
+
+ if(shapeRecProjectName == "tamil_iso_char")
+ {
+ return (tamilShapeStrToUnicode(shapeIDs,unicodeString));
+ }
+ else
+ {
+ return (numShapeStrToUnicode(shapeIDs,unicodeString));
+ }
+
+ //uncomment following if you need to implement mappings for
+ //additional shape rec projects
+ //else if (shapeRecProjectName == <YOUR PROJECT NAME>)
+ //{
+ // //Implement the logic here
+ //}
+
+
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Exiting: LTKStrEncoding::shapeStrToUnicode()" << endl;
+}
+
+/**********************************************************************************
+* AUTHOR : Deepu V.
+* DATE : 08-SEP-2005
+* NAME : numShapeStrToUnicode
+* DESCRIPTION : Do the mapping from shaperecognition ID
+* : to Unicode string for numerals project
+* ARGUMENTS : shapeIDs - The shape recognizer output IDs
+* : unicodeString - output unicode string
+* RETURNS :
+* NOTES :
+* CHANGE HISTROY
+* Author Date Description of change
+*************************************************************************************/
+
+int LTKStrEncoding::numShapeStrToUnicode(const vector<unsigned short>& shapeIDs, vector<unsigned short>& unicodeString)
+{
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Entering: LTKStrEncoding::numShapeStrToUnicode()" << endl;
+
+ vector<unsigned short>::const_iterator shapeIDsIter, shapeIDsEnd;
+
+ //iterating through the shape IDs
+ shapeIDsEnd = shapeIDs.end();
+ for(shapeIDsIter = shapeIDs.begin();shapeIDsIter != shapeIDsEnd;++shapeIDsIter)
+ {
+ if(*shapeIDsIter == SHRT_MAX )
+ unicodeString.push_back(L' ');
+ else
+ unicodeString.push_back((L'0')+ (*shapeIDsIter) );
+ }
+
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Exiting: LTKStrEncoding::numShapeStrToUnicode()" << endl;
+
+ return SUCCESS;
+}
+
+/**********************************************************************************
+* AUTHOR : Deepu V.
+* DATE : 08-SEP-2005
+* NAME : tamilShapeStrToUnicode
+* DESCRIPTION : Do the mapping from shaperecognition ID
+* : to Unicode string for tamil_iso_char project
+* ARGUMENTS : shapeIDs - The shape recognizer output IDs
+* : unicodeString - output unicode string
+* RETURNS :
+* NOTES :
+* CHANGE HISTROY
+* Author Date Description of change
+*************************************************************************************/
+
+int LTKStrEncoding::tamilShapeStrToUnicode(const vector<unsigned short>& shapeIDs, vector<unsigned short>& unicodeString)
+{
+ int errorCode;
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Entering: LTKStrEncoding::tamilShapeStrToUnicode()" << endl;
+
+ vector<unsigned short>::const_iterator shapeIDsIter, shapeIDsEnd;
+ int charIndex; //index variable for unicode string
+ unsigned short currentChar; //unicode representation of the character
+ bool matraFlag; //indicates whether a matra is following the current character
+
+ //iterating through the shape IDs
+ shapeIDsEnd = shapeIDs.end();
+ for(shapeIDsIter = shapeIDs.begin();shapeIDsIter != shapeIDsEnd;++shapeIDsIter)
+ {
+ if(*shapeIDsIter == SHRT_MAX )
+ unicodeString.push_back(L' ');
+ else if(*shapeIDsIter < 35)
+ {
+ if((errorCode = tamilCharToUnicode(*shapeIDsIter,unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ }
+ else if (*shapeIDsIter < 58 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-23),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bbf); //i mAtra
+ }
+ else if (*shapeIDsIter < 81 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-46),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bc0); //ii mAtra
+ }
+ else if (*shapeIDsIter < 99 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-69),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bc1); //u mAtra
+ }
+ else if (*shapeIDsIter < 117 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-87),unicodeString)) != SUCCESS)
+ unicodeString.push_back(0x0bc2); //uu mAtra
+ }
+ else if (*shapeIDsIter < 118 )
+ {
+ unicodeString.push_back(0x0bbe); //aa mAtra
+ }
+ else if (*shapeIDsIter < 119 )
+ {
+ unicodeString.push_back(0x0bc6); //e mAtra
+ }
+ else if (*shapeIDsIter < 120 )
+ {
+ unicodeString.push_back(0x0bc7); //E mAtra
+ }
+ else if (*shapeIDsIter < 121 )
+ {
+ unicodeString.push_back(0x0bc8); //ai mAtra
+ }
+ else if(*shapeIDsIter < 122 )
+ {
+ //letter shri
+ unicodeString.push_back(0x0bb8);//ss
+ unicodeString.push_back(0x0bcd);//halant
+ unicodeString.push_back(0x0bb0);//r
+ unicodeString.push_back(0x0bc0);//ii
+ }
+ else if(*shapeIDsIter < 127 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-92),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bc1); //u mAtra
+ }
+ else if(*shapeIDsIter < 132 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-97),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bc2); //u mAtra
+ }
+ else if(*shapeIDsIter < 155 )
+ {
+ if((errorCode = tamilCharToUnicode((*shapeIDsIter-120),unicodeString)) != SUCCESS)
+ {
+ return errorCode;
+ }
+ unicodeString.push_back(0x0bcd); //halant
+ }
+ else if (*shapeIDsIter < 156 )
+ {
+ unicodeString.push_back(0x0b94);
+ }
+ else
+ {
+ LOG(LTKLogger::LTK_LOGLEVEL_ERR)
+ <<"Error : "<< EINVALID_SHAPEID <<":"<< getErrorMessage(EINVALID_SHAPEID)
+ <<"LTKStrEncoding::tamilShapeStrToUnicode()" <<endl;
+
+ LTKReturnError(EINVALID_SHAPEID);
+ }
+ }
+
+ //Applying rules for e, E , ai, o, O, and au matras
+ charIndex = 0;
+ while( charIndex < unicodeString.size() )
+ {
+ currentChar = unicodeString[charIndex];
+ switch(currentChar)
+ {
+ case 0x0bc6://e
+ case 0x0bc7://E
+ case 0x0bc8://ai
+ if( (charIndex +1) < unicodeString.size() )
+ {
+ unicodeString[charIndex] = unicodeString[charIndex+1];
+ unicodeString[charIndex+1] = currentChar;
+ charIndex += 2;
+ }
+ else
+ {
+ ++charIndex;
+ }
+ break;
+ case 0x0bbe: //check for `o' or `O'
+ if(charIndex>0)//within string bounds
+ {
+ if(unicodeString[charIndex-1] == 0x0bc6 )
+ {
+ unicodeString[charIndex-1] = 0x0bca;
+ unicodeString.erase(unicodeString.begin()+charIndex);
+ }
+ else if(unicodeString[charIndex-1] == 0x0bc7 )
+ {
+ unicodeString[charIndex-1] = 0x0bcb;
+ unicodeString.erase(unicodeString.begin()+charIndex);
+ }
+ else
+ {
+ ++charIndex;
+ }
+ }
+ else
+ {
+ ++charIndex;
+ }
+ break;
+ case 0x0bb3: //check for au
+ matraFlag = (charIndex+1<unicodeString.size() && (unicodeString[charIndex+1] > 0x0bbd && unicodeString[charIndex+1] < 0x0bc3 ) );
+
+ //if la is not follwed by a matra and is preceded by an e matra it is an au matra
+ if((charIndex >0)&&(unicodeString[charIndex-1] == 0x0bc6) && (!matraFlag))
+ {
+ unicodeString[charIndex-1] = 0x0bcc;
+ unicodeString.erase(unicodeString.begin()+charIndex);
+ }
+ else
+ {
+ ++charIndex;
+ }
+ break;
+ default:
+ ++charIndex;
+
+ }
+
+ }
+
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Exiting: LTKStrEncoding::tamilShapeStrToUnicode()" << endl;
+
+ return SUCCESS;
+}
+
+int LTKStrEncoding::tamilCharToUnicode(const unsigned short& shapeID, vector<unsigned short>& unicodeString)
+{
+
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Entering: LTKStrEncoding::tamilCharToUnicode()" << endl;
+
+
+ if(shapeID < 35)
+ {
+ if(shapeID == 34)
+ {
+ unicodeString.push_back(0x0b95);
+ unicodeString.push_back(0x0bcd);
+ unicodeString.push_back(0x0bb7);
+ }
+ else
+ {
+ unicodeString.push_back(tamilIsoCharMap[shapeID]);
+ }
+ }
+ else
+ {
+ LOG(LTKLogger::LTK_LOGLEVEL_ERR)
+ <<"Error : "<< EINVALID_SHAPEID <<":"<< getErrorMessage(EINVALID_SHAPEID)
+ <<"LTKStrEncoding::tamilCharToUnicode()" <<endl;
+
+ LTKReturnError(EINVALID_SHAPEID);
+ }
+ LOG( LTKLogger::LTK_LOGLEVEL_DEBUG) <<
+ " Exiting: LTKStrEncoding::tamilCharToUnicode()" << endl;
+
+ return SUCCESS;
+}
+
+
+/**********************************************************************************
+* AUTHOR : Deepu V.
+* DATE : 08-SEP-2005
+* NAME : ~LTKStrEncoding
+* DESCRIPTION : destructor
+* ARGUMENTS :
+* RETURNS :
+* NOTES :
+* CHANGE HISTROY
+* Author Date Description of change
+*************************************************************************************/
+LTKStrEncoding::~LTKStrEncoding()
+{
+
+}