1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
#!/bin/env python3
# Copyright (C) 2023 The Qt Company Ltd.
# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR BSD-3-Clause
"""Digest cbor-tags.xml file into code for insertion into main.cpp
See main.cpp's comment on how to regenerate its GENERATED CODE.
See ./cbortag.py --help for further details on how to invoke.
You can import this is a module without invoking the script.
"""
def firstChild(parent, tag):
"""Return parent's first child element with the given tag."""
return next(node for node in parent.childNodes
if node.nodeType == parent.ELEMENT_NODE and node.nodeName == tag)
def nodeAttrIs(node, attr, seek):
"""Checks whether the node has a given value for an attribute
Takes the node to check, the name of the attribute and the value
to check against. Returns true if the node does have that value
for the named attribute."""
if node.nodeType != node.ELEMENT_NODE:
return False
if node.attributes is None or attr not in node.attributes:
return False
return node.attributes[attr].value == seek
def getRfcValue(node):
"""Extract RFC reference from an <xref type="rfc" ...> element
Some of these have a reference including section details as the
body of the element, otherwise the data attribute should identify
the RFC. If neither is found, an empty string is returned."""
if node.childNodes:
return node.childNodes[0].nodeValue # Maybe accumulate several children ?
if node.attributes is None or 'data' not in node.attributes:
return ''
return node.attributes['data'].value
def readRegistry(filename):
"""Handles the XML parsing and returns the relevant parts.
Single argument is the path to the cbor-tags.xml file; returns a
twople of the title element's text and an interator over the
record nodes. Checks some things are as expected while doing so."""
from xml.dom.minidom import parse
doc = parse(filename).documentElement
assert nodeAttrIs(doc, 'id', 'cbor-tags')
title = firstChild(doc, 'title').childNodes[0].nodeValue
registry = firstChild(doc, 'registry')
assert nodeAttrIs(registry, 'id', 'tags')
records = (node for node in registry.childNodes if node.nodeName == 'record')
return title, records
def digest(record):
"""Digest a single record from cbor-tags.xml
If the record is not of interest, returns the twople (None, None).
For records of interest, returns (n, t) where n is the numeric tag
code of the record and t is a text describing it. If the record,
or its semantics field, has an xref child with type="rfc", the RFC
mentioned there is included with the text of the semantics; such a
record is of interest, provided it has a semantics field and no
dash in its value. Records with a value field containing a dash
(indicating a range) are not of interest. Records with a value of
256 or above are only of interest if they include an RFC."""
data = {}
for kid in record.childNodes:
if kid.nodeName == 'xref':
if not nodeAttrIs(kid, 'type', 'rfc'):
continue
rfc = getRfcValue(kid)
if rfc:
# Potentially stomping one taken from semantics
data['rfc'] = rfc
elif kid.nodeName == 'semantics':
text = rfc = ''
for part in kid.childNodes:
if part.nodeType == kid.TEXT_NODE:
text += part.nodeValue
elif part.nodeType == kid.ELEMENT_NODE:
if part.nodeName != 'xref' or not nodeAttrIs(part, 'type', 'rfc'):
continue # potentially append content to text
assert not rfc, ('Duplicate RFC ?', rfc, part)
rfc = getRfcValue(part)
if rfc:
if text.endswith('()'):
text = text[:-2].rstrip()
if 'rfc' not in data:
data['rfc'] = rfc
data['semantics'] = ' '.join(text.split())
elif kid.nodeName == 'value':
data['value'] = kid.childNodes[0].nodeValue
text = data.get('semantics')
if not text or 'value' not in data or '-' in data['value']:
return None, None
value = int(data['value'])
if 'rfc' in data:
rfc = data["rfc"].replace('rfc', 'RFC')
text = f'{text} [{rfc}]'
elif value >= 256:
return None, None
return value, text
def entries(records):
"""Digest each record of interest into a value and text.
The value and text form the raw material of the tagDescriptions
array in main.cpp; see digest for which records are retained."""
for record in records:
value, text = digest(record)
if value is not None:
yield value, text
def marginBound(text, prior, left, right):
"""Split up a string literal for tidy display.
The first parameter, text, is the content of the string literal;
quotes shall be added. It may be split into several fragments,
each quoted, so as to abide by line length constraints.
The remaining parameters are integers: prior is the text already
present on the line before text is to be added; left is the width
of the left margin for all subsequent lines; and right is the
right margin to stay within, where possible. The returned string
is either a space with the whole quoted text following, to fit on
the line already started to length prior, or a sequence of quoted
strings, each preceded by a newline and indent of width left."""
if prior + 3 + len(text) < right: # 1 for space, 2 for quotes
return f' "{text}"'
width = right - left - 2 # 2 for the quotes
words = iter(text.split(' '))
lines, current = [''], [next(words)]
for word in words:
if len(word) + sum(len(w) + 1 for w in current) > width:
line = ' '.join(current)
lines.append(f'"{line}"')
current = ['', word]
else:
current.append(word)
line = ' '.join(current)
lines.append(f'"{line}"')
return ('\n' + ' ' * left).join(lines)
def main(argv, speak):
"""Takes care of driving the process.
Takes the command-line argument list (whose first entry is the
name of this script) and standard output (or compatible stream of
your choosing) to which to write data. If the --out option is
specified in the arguments, the file it names is used in place of
this output stream."""
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
parser = ArgumentParser(
description='Digest cbor-tags.xml into code to insert in main.cpp',
formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('path', help='path of the cbor-tags.xml file',
default='cbor-tags.xml')
parser.add_argument('--out', help='file to write instead of standard output')
args = parser.parse_args(argv[1:])
emit = (open(args.out) if args.out else speak).write
title, records = readRegistry(args.path)
emit(f"""\
struct CborTagDescription
{{
QCborTag tag;
const char *description; // with space and parentheses
}};
// {title}
static const CborTagDescription tagDescriptions[] = {{
// from https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml
""")
for value, text in sorted(entries(records)):
prior = f' {{ QCborTag({value}),'
body = marginBound(f' ({text})', len(prior), 6, 96)
emit(f"{prior}{body} }},\n")
emit("""\
{ QCborTag(-1), nullptr }
};
""")
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv, sys.stdout))
|