examples/corelib/serialization/cbordump/cbortag.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188

#!/bin/env python3
# Copyright (C) 2023 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR BSD-3-Clause
"""Digest cbor-tags.xml file into code for insertion into main.cpp

See main.cpp's comment on how to regenerate its GENERATED CODE.
See ./cbortag.py --help for further details on how to invoke.
You can import this is a module without invoking the script.
"""

def firstChild(parent, tag):
    """Return parent's first child element with the given tag."""
    return next(node for node in parent.childNodes
                if node.nodeType == parent.ELEMENT_NODE and node.nodeName == tag)

def nodeAttrIs(node, attr, seek):
    """Checks whether the node has a given value for an attribute

    Takes the node to check, the name of the attribute and the value
    to check against.  Returns true if the node does have that value
    for the named attribute."""
    if node.nodeType != node.ELEMENT_NODE:
        return False
    if node.attributes is None or attr not in node.attributes:
        return False
    return node.attributes[attr].value == seek

def getRfcValue(node):
    """Extract RFC reference from an <xref type="rfc" ...> element

    Some of these have a reference including section details as the
    body of the element, otherwise the data attribute should identify
    the RFC. If neither is found, an empty string is returned."""
    if node.childNodes:
        return node.childNodes[0].nodeValue # Maybe accumulate several children ?
    if node.attributes is None or 'data' not in node.attributes:
        return ''
    return node.attributes['data'].value

def readRegistry(filename):
    """Handles the XML parsing and returns the relevant parts.

    Single argument is the path to the cbor-tags.xml file; returns a
    twople of the title element's text and an interator over the
    record nodes.  Checks some things are as expected while doing so."""
    from xml.dom.minidom import parse
    doc = parse(filename).documentElement
    assert nodeAttrIs(doc, 'id', 'cbor-tags')
    title = firstChild(doc, 'title').childNodes[0].nodeValue
    registry = firstChild(doc, 'registry')
    assert nodeAttrIs(registry, 'id', 'tags')
    records = (node for node in registry.childNodes if node.nodeName == 'record')
    return title, records

def digest(record):
    """Digest a single record from cbor-tags.xml

    If the record is not of interest, returns the twople (None, None).
    For records of interest, returns (n, t) where n is the numeric tag
    code of the record and t is a text describing it. If the record,
    or its semantics field, has an xref child with type="rfc", the RFC
    mentioned there is included with the text of the semantics; such a
    record is of interest, provided it has a semantics field and no
    dash in its value.  Records with a value field containing a dash
    (indicating a range) are not of interest. Records with a value of
    256 or above are only of interest if they include an RFC."""
    data = {}
    for kid in record.childNodes:
        if kid.nodeName == 'xref':
            if not nodeAttrIs(kid, 'type', 'rfc'):
                continue
            rfc = getRfcValue(kid)
            if rfc:
                # Potentially stomping one taken from semantics
                data['rfc'] = rfc
        elif kid.nodeName == 'semantics':
            text = rfc = ''
            for part in kid.childNodes:
                if part.nodeType == kid.TEXT_NODE:
                    text += part.nodeValue
                elif part.nodeType == kid.ELEMENT_NODE:
                    if part.nodeName != 'xref' or not nodeAttrIs(part, 'type', 'rfc'):
                        continue # potentially append content to text
                    assert not rfc, ('Duplicate RFC ?', rfc, part)
                    rfc = getRfcValue(part)
            if rfc:
                if text.endswith('()'):
                    text = text[:-2].rstrip()
                if 'rfc' not in data:
                    data['rfc'] = rfc
            data['semantics'] = ' '.join(text.split())
        elif kid.nodeName == 'value':
            data['value'] = kid.childNodes[0].nodeValue
    text = data.get('semantics')
    if not text or 'value' not in data or '-' in data['value']:
        return None, None
    value = int(data['value'])
    if 'rfc' in data:
        rfc = data["rfc"].replace('rfc', 'RFC')
        text = f'{text} [{rfc}]'
    elif value >= 256:
        return None, None
    return value, text

def entries(records):
    """Digest each record of interest into a value and text.

    The value and text form the raw material of the tagDescriptions
    array in main.cpp; see digest for which records are retained."""
    for record in records:
        value, text = digest(record)
        if value is not None:
            yield value, text

def marginBound(text, prior, left, right):
    """Split up a string literal for tidy display.

    The first parameter, text, is the content of the string literal;
    quotes shall be added.  It may be split into several fragments,
    each quoted, so as to abide by line length constraints.

    The remaining parameters are integers: prior is the text already
    present on the line before text is to be added; left is the width
    of the left margin for all subsequent lines; and right is the
    right margin to stay within, where possible. The returned string
    is either a space with the whole quoted text following, to fit on
    the line already started to length prior, or a sequence of quoted
    strings, each preceded by a newline and indent of width left."""
    if prior + 3 + len(text) < right: # 1 for space, 2 for quotes
        return f' "{text}"'
    width = right - left - 2 # 2 for the quotes
    words = iter(text.split(' '))
    lines, current = [''], [next(words)]
    for word in words:
        if len(word) + sum(len(w) + 1 for w in current) > width:
            line = ' '.join(current)
            lines.append(f'"{line}"')
            current = ['', word]
        else:
            current.append(word)
    line = ' '.join(current)
    lines.append(f'"{line}"')
    return ('\n' + ' ' * left).join(lines)

def main(argv, speak):
    """Takes care of driving the process.

    Takes the command-line argument list (whose first entry is the
    name of this script) and standard output (or compatible stream of
    your choosing) to which to write data. If the --out option is
    specified in the arguments, the file it names is used in place of
    this output stream."""
    from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
    parser = ArgumentParser(
        description='Digest cbor-tags.xml into code to insert in main.cpp',
        formatter_class=ArgumentDefaultsHelpFormatter)
    parser.add_argument('path', help='path of the cbor-tags.xml file',
                        default='cbor-tags.xml')
    parser.add_argument('--out', help='file to write instead of standard output')
    args = parser.parse_args(argv[1:])
    emit = (open(args.out) if args.out else speak).write

    title, records = readRegistry(args.path)
    emit(f"""\
struct CborTagDescription
{{
    QCborTag tag;
    const char *description; // with space and parentheses
}};

// {title}
static const CborTagDescription tagDescriptions[] = {{
    // from https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml
""")

    for value, text in sorted(entries(records)):
        prior = f'    {{ QCborTag({value}),'
        body = marginBound(f' ({text})', len(prior), 6, 96)
        emit(f"{prior}{body} }},\n")

    emit("""\
    { QCborTag(-1), nullptr }
};
""")

if __name__ == '__main__':
    import sys
    sys.exit(main(sys.argv, sys.stdout))