summaryrefslogtreecommitdiffstats
path: root/src/xmlpatterns/api/qabstractxmlreceiver.cpp
blob: e212f88b8347011ce73af6e55e507a8fe58f7884 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtXmlPatterns module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include <QString>

#include "qitem_p.h"

#include "qabstractxmlreceiver_p.h"
#include "qabstractxmlreceiver.h"

QT_BEGIN_NAMESPACE

/*!
  \class QAbstractXmlReceiver
  \brief The QAbstractXmlReceiver class provides a callback interface
         for transforming the output of a QXmlQuery.
  \reentrant
  \since 4.4
  \ingroup xml-tools
  \inmodule QtXmlPatterns

  QAbstractXmlReceiver is an abstract base class that provides
  a callback interface for receiving an \l {XQuery Sequence}
  {XQuery sequence}, usually the output of an QXmlQuery, and
  transforming that sequence into a structure of your choosing,
  usually XML. Consider the example:

  \snippet code/src_xmlpatterns_api_qabstractxmlreceiver.cpp 0

  First it constructs a \l {QXmlQuery} {query} that gets the
  first paragraph from document \c index.html. Then it constructs
  an \l {QXmlSerializer} {XML serializer} with the \l {QXmlQuery}
  {query} and \l {QIODevice} {myOutputDevice} (Note the
  \l {QXmlSerializer} {serializer} is an \e {XML receiver},
  ie a subclass of QAbstractXmlReceiver). Finally, it
  \l {QXmlQuery::evaluateTo()} {evaluates} the
  \l {QXmlQuery} {query}, producing an ordered sequence of calls
  to the \l {QXmlSerializer} {serializer's} callback functions.
  The sequence of callbacks transforms the query output to XML
  and writes it to \l {QIODevice} {myOutputDevice}.

  Although the example uses \l {QXmlQuery} to produce the sequence
  of callbacks to functions in QAbstractXmlReceiver, you can call
  the callback functions directly as long as your sequence of
  calls represents a valid \l {XQuery Sequence} {XQuery sequence}.

  \target XQuery Sequence
  \section1 XQuery Sequences

  An XQuery \a sequence is an ordered collection of zero, one,
  or many \e items. Each \e item is either an \e {atomic value}
  or a \e {node}. An \e {atomic value} is a simple data value.

  There are six kinds of \e nodes.

  \list

  \li An \e {Element Node} represents an XML element.

  \li An \e {Attribute Node} represents an XML attribute.

  \li A \e {Document Node} represents an entire XML document.

  \li A \e {Text Node} represents character data (element content).

  \li A \e {Processing Instruction Node} represents an XML
  processing instruction, which is used in an XML document
  to tell the application reading the document to perform
  some action. A typical example is to use a processing
  instruction to tell the application to use a particular
  XSLT stylesheet to display the document.

  \li And a \e {Comment node} represents an XML comment.

  \endlist

  The \e sequence of \e nodes and \e {atomic values} obeys
  the following rules. Note that \e {Namespace Node} refers
  to a special \e {Attribute Node} with name \e {xmlns}.

  \list

  \li Each \e node appears in the \e sequence before its children
  and their descendants appear.

  \li A \e node's descendants appear in the \e sequence before
  any of its siblings appear.

  \li A \e {Document Node} represents an entire document. Zero or
  more \e {Document Nodes} can appear in a \e sequence, but they
  can only be top level items (i.e., a \e {Document Node} can't
  be a child of another \e node.

  \li \e {Namespace Nodes} immediately follow the \e {Element Node}
   with which they are associated.

  \li \e {Attribute Nodes} immediately follow the \e {Namespace Nodes}
   of the element with which they are associated, or...

   \li If there are no \e {Namespace Nodes} following an element, then
   the \e {Attribute Nodes} immediately follow the element.

   \li An \e {atomic value} can only appear as a top level \e item,
   i.e., it can't appear as a child of a \e node.

   \li \e {Processing Instruction Nodes} do not have children, and
   their parent is either a \e {Document Node} or an \e {Element
   Node}.

   \li \e {Comment Nodes} do not have children, and
   their parent is either a \e {Document Node} or an \e {Element
   Node}.

  \endlist

  The \e sequence of \e nodes and \e {atomic values} is sent to
  an QAbstractXmlReceiver (QXmlSerializer in
  the example above) as a sequence of calls to the receiver's
  callback functions. The mapping of callback functions to
  sequence items is as follows.

  \list

  \li startDocument() and endDocument() are called for each
  \e {Document Node} in the \e sequence. endDocument() is not
  called until all the \e {Document Node's} children have
  appeared in the \e sequence.

  \li startElement() and endElement() are called for each
  \e {Element Node}. endElement() is not called until all the
  \e {Element Node's} children have appeared in the \e sequence.

  \li attribute() is called for each \e {Attribute Node}.

  \li comment() is called for each \e {Comment Node}.

  \li characters() is called for each \e {Text Node}.

  \li processingInstruction() is called for each \e {Processing
  Instruction Node}.

  \li namespaceBinding() is called for each \e {Namespace Node}.

  \li atomicValue() is called for each \e {atomic value}.

  \endlist

  For a complete explanation of XQuery sequences, visit
  \l {http://www.w3.org/TR/xpath-datamodel/}{XQuery Data Model}.

  \sa {http://www.w3.org/TR/xpath-datamodel/}{W3C XQuery 1.0 and XPath 2.0 Data Model (XDM)}
  \sa QXmlSerializer
  \sa QXmlResultItems
 */

template<const QXmlNodeModelIndex::Axis axis>
void QAbstractXmlReceiver::sendFromAxis(const QXmlNodeModelIndex &node)
{
    Q_ASSERT(!node.isNull());
    const QXmlNodeModelIndex::Iterator::Ptr it(node.iterate(axis));
    QXmlNodeModelIndex next(it->next());

    while(!next.isNull())
    {
        sendAsNode(next);
        next = it->next();
    }
}

/*!
 \internal
 */
QAbstractXmlReceiver::QAbstractXmlReceiver(QAbstractXmlReceiverPrivate *d)
  : d_ptr(d)
{
}

/*!
  Constructs an abstract xml receiver.
 */
QAbstractXmlReceiver::QAbstractXmlReceiver() : d_ptr(0)
{
}

/*!
  Destroys the xml receiver.
 */
QAbstractXmlReceiver::~QAbstractXmlReceiver()
{
}

/*!
  \fn void QAbstractXmlReceiver::startElement(const QXmlName &name)

  This callback is called when a new element node appears
  in the \l {XQuery Sequence} {sequence}. \a name is the
  valid \l {QXmlName} {name} of the node element.
 */

/*
### Qt 5:

Consider how source locations should be communicated. Maybe every signature
should be extended by adding "qint64 line = -1, qint64 column = -1".
 */

/*!
  \fn void QAbstractXmlReceiver::endElement()

  This callback is called when the end of an element node
  appears in the \l {XQuery Sequence} {sequence}.
*/

/*!
  \fn void QAbstractXmlReceiver::attribute(const QXmlName &name,
                                           const QStringRef &value)
  This callback is called when an attribute node
  appears in the \l {XQuery Sequence} {sequence}.
  \a name is the \l {QXmlName} {attribute name} and
  the \a value string contains the attribute value.
 */

/*!
  \fn void QAbstractXmlReceiver::comment(const QString &value)

  This callback is called when a comment node appears
  in the \l {XQuery Sequence} {sequence}. The \a value
  is the comment text, which must not contain the string
  "--".
 */

/*!
  \fn void QAbstractXmlReceiver::characters(const QStringRef &value)

  This callback is called when a text node appears in the
  \l {XQuery Sequence} {sequence}. The \a value contains
  the text. Adjacent text nodes may not occur in the
  \l {XQuery Sequence} {sequence}, i.e., this callback must not
  be called twice in a row.
 */

/*!
  \fn void QAbstractXmlReceiver::startDocument()

  This callback is called when a document node appears
  in the \l {XQuery Sequence} {sequence}.
 */

/*
### Qt 5:

Change
    virtual void startDocument() = 0;

To:
    virtual void startDocument(const QUrl &uri) = 0;

Such that it allows the document URI to be communicated. The contract would
allow null QUrls.
*/

/*!
  \fn void QAbstractXmlReceiver::endDocument()

  This callback is called when the end of a document node
  appears in the \l {XQuery Sequence} {sequence}.
 */

/*!
  \fn void QAbstractXmlReceiver::processingInstruction(const QXmlName &target,
                                                       const QString &value)

  This callback is called when a processing instruction
  appears in the \l {XQuery Sequence} {sequence}.
  A processing instruction is used in an XML document
  to tell the application reading the document to
  perform some action. A typical example is to use a
  processing instruction to tell the application to use a
  particular XSLT stylesheet to process the document.

  \quotefile patternist/xmlStylesheet.xq

  \a target is the \l {QXmlName} {name} of the processing
  instruction. Its \e prefix and \e {namespace URI} must both
  be empty. Its \e {local name} is the target. In the above
  example, the name is \e {xml-stylesheet}.

  The \a value specifies the action to be taken. Note that
  the \a value must not contain the string "?>". In the above
  example, the \a value is \e{type="test/xsl" href="formatter.xsl}.

  Generally, use of processing instructions should be avoided,
  because they are not namespace aware and in many contexts
  are stripped out anyway. Processing instructions can often
  be replaced with elements from a custom namespace.
 */

/*!
  \fn void QAbstractXmlReceiver::atomicValue(const QVariant &value)

  This callback is called when an atomic value appears in the \l
  {XQuery Sequence} {sequence}. The \a value is a simple \l {QVariant}
  {data value}. It is guaranteed to be \l {QVariant::isValid()}
  {valid}.
 */

/*!
  \fn virtual void QAbstractXmlReceiver::namespaceBinding(const QXmlName &name)

  This callback is called when a namespace binding is in scope of an
  element. A namespace is defined by a URI. In the \l {QXmlName}
  \a name, the value of \l {QXmlName::namespaceUri()} is that URI. The
  value of \l {QXmlName::prefix}() is the prefix that the URI is bound
  to. The local name is insignificant and can be an arbitrary value.
 */

/*!
  \internal

   Treats \a outputItem as a node and calls the appropriate function,
   e.g., attribute() or comment(), depending on its
   QXmlNodeModelIndex::NodeKind.

   This is a helper function that subclasses can use to multiplex
   Nodes received via item().
 */
void QAbstractXmlReceiver::sendAsNode(const QPatternist::Item &outputItem)
{
    Q_ASSERT(outputItem);
    Q_ASSERT(outputItem.isNode());
    const QXmlNodeModelIndex asNode = outputItem.asNode();

    switch(asNode.kind())
    {
        case QXmlNodeModelIndex::Attribute:
        {
            const QString &v = outputItem.stringValue();
            attribute(asNode.name(), QStringRef(&v));
            return;
        }
        case QXmlNodeModelIndex::Element:
        {
            startElement(asNode.name());

            /* First the namespaces, then attributes, then the children. */
            asNode.sendNamespaces(this);
            sendFromAxis<QXmlNodeModelIndex::AxisAttribute>(asNode);
            sendFromAxis<QXmlNodeModelIndex::AxisChild>(asNode);

            endElement();

            return;
        }
        case QXmlNodeModelIndex::Text:
        {
            const QString &v = asNode.stringValue();
            characters(QStringRef(&v));
            return;
        }
        case QXmlNodeModelIndex::ProcessingInstruction:
        {
            processingInstruction(asNode.name(), outputItem.stringValue());
            return;
        }
        case QXmlNodeModelIndex::Comment:
        {
            comment(outputItem.stringValue());
            return;
        }
        case QXmlNodeModelIndex::Document:
        {
            startDocument();
            sendFromAxis<QXmlNodeModelIndex::AxisChild>(asNode);
            endDocument();
            return;
        }
        case QXmlNodeModelIndex::Namespace:
            Q_ASSERT_X(false, Q_FUNC_INFO, "Not implemented");
    }

    Q_ASSERT_X(false, Q_FUNC_INFO,
               QString::fromLatin1("Unknown node type: %1").arg(asNode.kind()).toUtf8().constData());
}

/*!
  \internal

   This function may be called instead of characters() if, and only if,
   \a value consists only of whitespace.

   The caller gurantees that \a value is not empty.

   \e Whitespace refers to a sequence of characters that are either
   spaces, tabs, or newlines, in any order. In other words, not all
   the Unicode whitespace category is considered whitespace here.

   However, there is no guarantee or requirement that whitespaceOnly()
   is called for text nodes containing whitespace only. characters()
   may be called just as well. This is why the default implementation
   for whitespaceOnly() calls characters().

   \sa characters()
 */
void QAbstractXmlReceiver::whitespaceOnly(const QStringRef &value)
{
    Q_ASSERT_X(value.toString().trimmed().isEmpty(), Q_FUNC_INFO,
               "The caller must guarantee only whitespace is passed. Use characters() in other cases.");
    const QString &v = value.toString();
    characters(QStringRef(&v));
}

/*!
  \internal
 */
void QAbstractXmlReceiver::item(const QPatternist::Item &item)
{
    if(item.isNode())
        return sendAsNode(item);
    else
        atomicValue(QPatternist::AtomicValue::toQt(item.asAtomicValue()));
}

/*!
 \fn void QAbstractXmlReceiver::startOfSequence()

 This callback is called once only, right before the
 \l {XQuery Sequence} {sequence} begins.
 */

/*!
  \fn void QAbstractXmlReceiver::endOfSequence()

 This callback is called once only, right after the
 \l {XQuery Sequence} {sequence} ends.
 */

QT_END_NAMESPACE