#------------------------------------------------------------------------------
# nn.cfg
#
# Configuration file for Nearest Neighbor Classification Method for
# Lipi Toolkit 4.0.0
#------------------------------------------------------------------------------

#------------------------------------------------------------------------------
# The standard format for the configuration entries is the name of the 
# configuration parameter seperated by an equal to sign and then the value of 
# the configuration parameter. For example:
# ConfigurationEntryName = value
#
# Lines starting with a # are commnet lines
#
# A cfg entry is strictly a key value pair and leaving the key without the 
# value or specification of a value out of the range is not permitted
#
# If a cfg entry is not specified at all, then default values are used by the 
# recognizer
#------------------------------------------------------------------------------ 

#-------------------------------
#    PREPROCESSING  
#-------------------------------

#-------------------------------------------------------------------------------
# ResampTraceDimension
#
# Description: The number of target points for resampling. In other words, 
# each character will be resampled to this number of points. In case of 
# multistroke characters, this number of points will be distributed between 
# the strokes in proportion to their lengths in proportion to their initial 
# number of points.  
#
# Valid values: Any integer > 0
# Units: Points
# Default value: 60
# Typical value: Average number of points per character in the training data set.
#-------------------------------------------------------------------------------
ResampTraceDimension = 60


#-------------------------------------------------------------------------------
# ResampPointAllocation
#
# Description: Method to be used for point allocation among different strokes 
# during resampling. Two schemes have been implemented lengthbased and point 
# based. In lengthbased allocation scheme, the number of points allocated to 
# each stroke is proportional to the length of the stroke. Length of a stroke 
# is calculated as the sum of the distances between each point in the stroke. 
# In the pointbased allocation scheme, the target stroke point allocation is 
# proportional to the number of points in the initial stroke. In the 
# interpointdistbased scheme, the distance between consecutive points is fixed 
# resulting in variable number based on the length of the trajectory.
# 
# Valid value: [lengthbased | pointbased | interpointdistbased]
# Default value: lengthbased
#-------------------------------------------------------------------------------
ResampPointAllocation = lengthbased


#-------------------------------------------------------------------------------
# NormDotSizeThreshold
#
# Description: This threshold is used to determine whether a character is a dot.
# It is expressed in real length terms (inches) and converted internally to
# points using knowledge of the device’s spatial resolution. If the width 
# and height are both less than this threshold, then all the points are replaced
# with the center of the of the normalized character, basically to represent it 
# as a dot
#
# Valid values: Any real number > 0
# Units: inches
# Default value: 0.01
# Typical value: < 0.1 
#-------------------------------------------------------------------------------
NormDotSizeThreshold = 0.01

#-------------------------------------------------------------------------------
# NormLineWidthThreshold
#
# Description: This threshold is used to detect whether the character is a 
# vertical or horizontal line. If only the height is less than this threshold 
# then the character is detected as a horizontal line and if only the width is 
# less than this threshold then the character is detected as a vertical line. 
# Assuming the height is along the y-dimension and width is along the x-
# dimension, during normalization of a horizontal line only the x-coordinates 
# are scaled and the y-coordinates are translated to the center of the character, 
# with out scaling. Similarly for the vertical line only the y-coordinates are 
# normalized and the x-coordinates are translated to the center with out scaling
#
# Valid values: Any real number > 0
# Units: inches
# Default value: 0.01
# Typical value: < 0.1 
#-------------------------------------------------------------------------------
NormLineWidthThreshold = 0.01

#-------------------------------------------------------------------------------
# NormPreserveAspectRatio
#
# Description: This parameter is used to indicate whether the aspect ratio 
# has to be preserved during normalization. The aspect ratio is the calculated 
# as maximum of (height/width , width/height). The aspect ratio is preserved only 
# if the calculated aspect ratio is greater than the threshold value specified 
# through NormPreserveAspectRatioThreshold and this configuration variable is 
# set to true. If this configuration variable is set to false the aspect ratio 
# is not preserved during normalization.
# 
# Valid value: [true | false]
# Default value: true
#-------------------------------------------------------------------------------
NormPreserveAspectRatio = true


#-------------------------------------------------------------------------------
# NormPreserveAspectRatioThreshold
#
# Description: Aspect ratio is preserved during normalization if the computed 
# aspect ratio (max(height/width, width/height)) is greater than this threshold 
# and the configuration value NormPreserveAspectRatio is set to true. During 
# aspect ratio preserving normalization, the larger of the two dimensions is 
# normalized to the standard size and the other dimension is normalized 
# proportional to the initial height and width ratio, so that the initial 
# aspect ratio is maintained.
#
# Valid values: Any real number >= 1
# Default value: 3
# Typical value: >= 1.5
#------------------------------------------------------------------------------- 
NormPreserveAspectRatioThreshold = 3

#-------------------------------------------------------------------------------
# NormPreserveRelativeYPosition
#
# Description: The relative Y position is the mean of the y-coordinates in the 
# input character. During normalization if this parameter is set to true, each 
# y-coordinate of the character point is translated by the initial y-mean value, 
# so that the mean of the y-coordinates remains the same before and after  
# normalization. This is typically used in the word recognition context where 
# each stroke of the character has to be normalized separately and the relative 
# position of the strokes should be maintained even after normalization. 
# 
# Valid value: [true | false]
# Default value: false
#------------------------------------------------------------------------------- 
NormPreserveRelativeYPosition = false

#-------------------------------------------------------------------------------
# SmoothWindowSize
#
# Description: The configuration value specifies the length of the moving 
# average filter (size of the window) for smoothing the character image.
# If this value is set to N, then each point in the input character is replaced
# by the average of value of this point, (N-1)/2 points on the right and (N-1)/2
# on the left of this point.
#
# Valid value: Any integer > 0
# Units: Points
# Typical value: 5
# Default value: 3
#-------------------------------------------------------------------------------
SmoothWindowSize = 3

#-------------------------------------------------------------------------------
# PreprocSequence
# 
# Description: This variable is used to specify the sequence of preprocessing
# operations to be carried out on the input character sample before extracting 
# the features. A valid preprocessing sequence can consist of combination of one 
# or more of the functions selected from the valid values set mentioned below. 
# The CommonPreProc prefix is used specify the default preprocessing module of 
# LipiTk. The user can add his own preprocessing functions in other modules and 
# specify them in the preprocessing sequence.
#
# Valid values: Any sequence formed from the following set
# CommonPreProc::normalizeSize;
# CommonPreProc::removeDuplicatePoints;
# CommonPreProc::smoothenTraceGroup;
# CommonPreProc::dehookTraces;
# CommonPreProc::normalizeOrientation;
# CommonPreProc::resampleTraceGroup;
# Default value: {CommonPreProc::normalizeSize,CommonPreProc::resampleTraceGroup,CommonPreProc::normalizeSize}
#-------------------------------------------------------------------------------
PreprocSequence={CommonPreProc::normalizeSize,CommonPreProc::resampleTraceGroup,CommonPreProc::normalizeSize}

#---------------------------------------
#    TRAINING
#---------------------------------------

#-------------------------------------------------------------------------------
# NNTrainPrototypeSelectionMethod
# 
# Description: This is used to specify the prototype selection method to be used 
# while training the shape recognizer. When set to hier-clustering, the 
# prototypes are selected using hierarchical clustering method.
#
# Valid value: [hier-clustering]
# Default value: hier-clustering
#-------------------------------------------------------------------------------
NNTrainPrototypeSelectionMethod=hier-clustering


#-------------------------------------------------------------------------------
# NNTrainPrototypeReductionFactorPerClass
#
# Description: This config parameter is used only when the prototype selection 
# is clustering. This config parameter is used to specify the amount of the 
# initial prototypes to be excluded during prototype selection.  
# Set it to automatic if the number of clusters is to be determined 
# automatically. Set it to none if no prototype selection is required. If the 
# value of this parameter is set to a number between 1-100, say 25, then 75% 
# (i.e 100-25) of the initial training data are retained as prototypes. 
# This parameter can be specified only if the NNTrainNumPrototypesPerClass
# is not specified.
#
# Valid value: [automatic | none | any real number from 0-100] 
# Default value: automatic
#-------------------------------------------------------------------------------
NNTrainPrototypeReductionFactorPerClass = none

#-------------------------------------------------------------------------------
# NNTrainNumPrototypesPerClass
#
# Description: This config parameter is used only when the prototype selection 
# is clustering. This is used to specify the number of prototypes to be selected 
# from the training data. This parameter can be specified only if 
# PrototypeReductionFactor is not specified. This config entry is commented as 
# only one of NNTrainPrototypeReductionFactorPerClass or 
# NNTrainNumPrototypesPerClass can be active in a valid cfg file.
#
# Valid value: [automatic | none | any integer from 1-N]
#              (N is the number of samples # per class)
# Default value: automatic
#-------------------------------------------------------------------------------  
#NNTrainNumPrototypesPerClass=automatic

# Note: Only one of either PrototypeReductionFactor or NumClusters can be 
# enabled at any particular instance 

#-----------------------------------------
#    FEATURE EXTRACTION
#-----------------------------------------

#-------------------------------------------------------------------------------
# FeatureExtractor
#
# Description: The configuration value is used to specify the feature extraction 
# module to be used for feature extraction. The point float feature extraction 
# module extracts the x,y,cosine and sine angle features at every point of the 
# character.
#
# Valid value: [PointFloatShapeFeatureExtractor]
# Default value: PointFloatShapeFeatureExtractor
#-------------------------------------------------------------------------------
FeatureExtractor=PointFloatShapeFeatureExtractor

#-----------------------------------------
#    RECOGNITION
#-----------------------------------------

#-------------------------------------------------------------------------------
# NNRecoDTWEuFilterOutputSize
# 
# Description: This config parameter is used to set the number of nearest 
# neighbours (filtered based on euclidean distance)to be considered for 
# calculating dtw distance. The value is specified as fraction of total number
# of prototypes.
# Set to all if all samples are to be considered for
# calculating dtw distance. This is mainly used to increase the speed of 
# recognition.
#
# Valid value: [all| any integer from 1-100]  
# Default Value: all
#-------------------------------------------------------------------------------
NNRecoDTWEuFilterOutputSize = all

#-------------------------------------------------------------------------------
# NNRecoRejectThreshold
# 
# Description: Threshold to reject the test sample. If the confidence obtained 
# for the recognition of test sample is less than this threshold then the test 
# sample is rejected. 
#
# Valid value: Any real number from 0-1
# Default value: 0.001
#-------------------------------------------------------------------------------
NNRecoRejectThreshold = 0.001

#-------------------------------------------------------------------------------
# NNRecoNumNearestNeighbors
# 
# Description: Number of nearest neighbors to be considered during recognition
# and computation of confidence. If the value is set to 1, nearest neighbor 
# classifier is used, otherwise k-nearest neighbor or Adaptive k-nearest 
# neighbor classifiers are used. By default, nearest neighbor classifier is used.
#
# Valid value: Any integer >= 1
# Default value: 1
#-------------------------------------------------------------------------------
NNRecoNumNearestNeighbors = 1

#-------------------------------------------------------------------------------
# NNRecoUseAdaptiveKNN
#
# Description: This parameter is used to specify whether Adaptive k-nearest 
# neighbor recognizer (A-kNN) is to be used. If set to true, A-kNN recognizer is 
# used, otherwise kNN recognizer is used.  The A-kNN recognizer automatically 
# determines the number of nearest neighbors to be considered for recognition in 
# each class. If NNRecoNumNearestNeighbors is set to 1, this parameter is 
# automatically set to false and the manually set value will not be considered.
# The confidence values obtained do not add to 1 for all classes when
# Adaptive-kNN is used.
#
# Valid value: [true | false]
# Default value: false
#-------------------------------------------------------------------------------
NNRecoUseAdaptiveKNN = false

#--------------------------------------------
#      COMMON FOR TRAINING AND RECOGNITION
#--------------------------------------------

#-------------------------------------------------------------------------------
# NNPrototypeDistanceMeasure
#
# Description: This configuration parameter is used to specify the distance 
# measure to be used in clustering and recognition. DTW or Euclidean distance 
# measures can be used.
# 
# Valid value [dtw | eu]
# Default value: dtw
#-------------------------------------------------------------------------------
NNPrototypeDistanceMeasure = dtw

#-------------------------------------------------------------------------------
# NNDTWBandingRadius
#
# Description: This configuration parameter specifies the banding radius
# to be used for DTW computation. This is used to speed up the computation
# process. If this value is zero no banding is done. The value is specified as
# fraction of ResampTraceDimension to be used while computing the DTW
# distance.
#
# Valid values: Any real number > 0 and <= 1
# Default Value: 0.33
#-------------------------------------------------------------------------------
NNDTWBandingRadius=0.33

#-------------------------------------------------------------------------------
# NNDTWBandingRadius
#
# Description: This configuration parameter specifies the mode for 
# opening the mdt file. 
#
# Valid values: ascii, binary
# Default Value: ascii
#-------------------------------------------------------------------------------

NNMDTFileOpenMode=ascii