summaryrefslogtreecommitdiffstats
path: root/src/linguist/shared/simtexth.h
blob: 70c889662ab0ce64f89e071e0935cb228f712847 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the Qt Linguist of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:GPL-EXCEPT$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3 as published by the Free Software
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#ifndef SIMTEXTH_H
#define SIMTEXTH_H

const int textSimilarityThreshold = 190;

#include <QString>
#include <QList>

QT_BEGIN_NAMESPACE

class Translator;

struct Candidate
{
    Candidate() {}
    Candidate(const QString &c, const QString &s, const QString &d, const QString &t)
        : context(c), source(s), disambiguation(d), translation(t)
    {}

    QString context;
    QString source;
    QString disambiguation;
    QString translation;
};

inline bool operator==( const Candidate& c, const Candidate& d ) {
    return c.translation == d.translation && c.source == d.source && c.context == d.context
           && c.disambiguation == d.disambiguation;
}
inline bool operator!=( const Candidate& c, const Candidate& d ) {
    return !operator==( c, d );
}

typedef QList<Candidate> CandidateList;

struct CoMatrix
{
    CoMatrix(const QString &str);
    CoMatrix() {}

    /*
      The matrix has 20 * 20 = 400 entries.  This requires 50 bytes, or 13
      words.  Some operations are performed on words for more efficiency.
    */
    union {
        quint8 b[52];
        quint32 w[13];
    };
};

/**
 * This class is more efficient for searching through a large array of candidate strings, since we only
 * have to construct the CoMatrix for the \a stringToMatch once,
 * after that we just call getSimilarityScore(strCandidate).
 * \sa getSimilarityScore
 */
class StringSimilarityMatcher {
public:
    StringSimilarityMatcher(const QString &stringToMatch);
    int getSimilarityScore(const QString &strCandidate);

private:
    CoMatrix m_cm;
    int m_length;
};

/**
 * Checks how similar two strings are.
 * The return value is the score, and a higher score is more similar
 * than one with a low score.
 * Linguist considers a score over 190 to be a good match.
 * \sa StringSimilarityMatcher
 */
static inline int getSimilarityScore(const QString &str1, const QString &str2)
{
    return StringSimilarityMatcher(str1).getSimilarityScore(str2);
}

CandidateList similarTextHeuristicCandidates( const Translator *tor,
                                              const QString &text,
                                              int maxCandidates );

QT_END_NAMESPACE

#endif