summaryrefslogtreecommitdiffstats
path: root/bindings/python
diff options
context:
space:
mode:
authorGregory Szorc <gregory.szorc@gmail.com>2012-07-12 07:21:12 +0000
committerGregory Szorc <gregory.szorc@gmail.com>2012-07-12 07:21:12 +0000
commitbe51e43ba2c57b8032286af4e8713485b6dc78c3 (patch)
tree618a2b89bee02d2d2cba83e9a5fb76ff2e8dbdf8 /bindings/python
parent0f1964a5c1627bcc3fd658cdd1f139e30b0ad612 (diff)
[clang.py] Implement Token API
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160111 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'bindings/python')
-rw-r--r--bindings/python/clang/cindex.py198
-rw-r--r--bindings/python/clang/enumerations.py34
-rw-r--r--bindings/python/tests/cindex/test_cursor.py10
-rw-r--r--bindings/python/tests/cindex/test_token_kind.py43
-rw-r--r--bindings/python/tests/cindex/test_tokens.py52
-rw-r--r--bindings/python/tests/cindex/test_translation_unit.py24
6 files changed, 345 insertions, 16 deletions
diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py
index 1bc7d0bae9..fc0a2a18bb 100644
--- a/bindings/python/clang/cindex.py
+++ b/bindings/python/clang/cindex.py
@@ -65,6 +65,8 @@ call is efficient.
from ctypes import *
import collections
+import clang.enumerations
+
def get_cindex_library():
# FIXME: It's probably not the case that the library is actually found in
# this location. We need a better system of identifying and loading the
@@ -367,6 +369,98 @@ class FixIt(object):
def __repr__(self):
return "<FixIt range %r, value %r>" % (self.range, self.value)
+class TokenGroup(object):
+ """Helper class to facilitate token management.
+
+ Tokens are allocated from libclang in chunks. They must be disposed of as a
+ collective group.
+
+ One purpose of this class is for instances to represent groups of allocated
+ tokens. Each token in a group contains a reference back to an instance of
+ this class. When all tokens from a group are garbage collected, it allows
+ this class to be garbage collected. When this class is garbage collected,
+ it calls the libclang destructor which invalidates all tokens in the group.
+
+ You should not instantiate this class outside of this module.
+ """
+ def __init__(self, tu, memory, count):
+ self._tu = tu
+ self._memory = memory
+ self._count = count
+
+ def __del__(self):
+ lib.clang_disposeTokens(self._tu, self._memory, self._count)
+
+ @staticmethod
+ def get_tokens(tu, extent):
+ """Helper method to return all tokens in an extent.
+
+ This functionality is needed multiple places in this module. We define
+ it here because it seems like a logical place.
+ """
+ tokens_memory = POINTER(Token)()
+ tokens_count = c_uint()
+
+ lib.clang_tokenize(tu, extent, byref(tokens_memory),
+ byref(tokens_count))
+
+ count = int(tokens_count.value)
+
+ # If we get no tokens, no memory was allocated. Be sure not to return
+ # anything and potentially call a destructor on nothing.
+ if count < 1:
+ return
+
+ tokens_array = cast(tokens_memory, POINTER(Token * count)).contents
+
+ token_group = TokenGroup(tu, tokens_memory, tokens_count)
+
+ for i in xrange(0, count):
+ token = Token()
+ token.int_data = tokens_array[i].int_data
+ token.ptr_data = tokens_array[i].ptr_data
+ token._tu = tu
+ token._group = token_group
+
+ yield token
+
+class TokenKind(object):
+ """Describes a specific type of a Token."""
+
+ _value_map = {} # int -> TokenKind
+
+ def __init__(self, value, name):
+ """Create a new TokenKind instance from a numeric value and a name."""
+ self.value = value
+ self.name = name
+
+ def __repr__(self):
+ return 'TokenKind.%s' % (self.name,)
+
+ @staticmethod
+ def from_value(value):
+ """Obtain a registered TokenKind instance from its value."""
+ result = TokenKind._value_map.get(value, None)
+
+ if result is None:
+ raise ValueError('Unknown TokenKind: %d' % value)
+
+ return result
+
+ @staticmethod
+ def register(value, name):
+ """Register a new TokenKind enumeration.
+
+ This should only be called at module load time by code within this
+ package.
+ """
+ if value in TokenKind._value_map:
+ raise ValueError('TokenKind already registered: %d' % value)
+
+ kind = TokenKind(value, name)
+ TokenKind._value_map[value] = kind
+ setattr(TokenKind, name, kind)
+
### Cursor Kinds ###
class CursorKind(object):
@@ -1181,6 +1275,14 @@ class Cursor(Structure):
children)
return iter(children)
+ def get_tokens(self):
+ """Obtain Token instances formulating that compose this Cursor.
+
+ This is a generator for Token instances. It returns all tokens which
+ occupy the extent this cursor occupies.
+ """
+ return TokenGroup.get_tokens(self._tu, self.extent)
+
@staticmethod
def from_result(res, fn, args):
assert isinstance(res, Cursor)
@@ -2058,6 +2160,19 @@ class TranslationUnit(ClangObject):
return CodeCompletionResults(ptr)
return None
+ def get_tokens(self, locations=None, extent=None):
+ """Obtain tokens in this translation unit.
+
+ This is a generator for Token instances. The caller specifies a range
+ of source code to obtain tokens for. The range can be specified as a
+ 2-tuple of SourceLocation or as a SourceRange. If both are defined,
+ behavior is undefined.
+ """
+ if locations is not None:
+ extent = SourceRange(start=locations[0], end=locations[1])
+
+ return TokenGroup.get_tokens(self, extent)
+
class File(ClangObject):
"""
The File class represents a particular source file that is part of a
@@ -2226,6 +2341,52 @@ class CompilationDatabase(ClangObject):
"""
return lib.clang_CompilationDatabase_getCompileCommands(self, filename)
+class Token(Structure):
+ """Represents a single token from the preprocessor.
+
+ Tokens are effectively segments of source code. Source code is first parsed
+ into tokens before being converted into the AST and Cursors.
+
+ Tokens are obtained from parsed TranslationUnit instances. You currently
+ can't create tokens manually.
+ """
+ _fields_ = [
+ ('int_data', c_uint * 4),
+ ('ptr_data', c_void_p)
+ ]
+
+ @property
+ def spelling(self):
+ """The spelling of this token.
+
+ This is the textual representation of the token in source.
+ """
+ return lib.clang_getTokenSpelling(self._tu, self)
+
+ @property
+ def kind(self):
+ """Obtain the TokenKind of the current token."""
+ return TokenKind.from_value(lib.clang_getTokenKind(self))
+
+ @property
+ def location(self):
+ """The SourceLocation this Token occurs at."""
+ return lib.clang_getTokenLocation(self._tu, self)
+
+ @property
+ def extent(self):
+ """The SourceRange this Token occupies."""
+ return lib.clang_getTokenExtent(self._tu, self)
+
+ @property
+ def cursor(self):
+ """The Cursor this Token corresponds to."""
+ cursor = Cursor()
+
+ lib.clang_annotateTokens(self._tu, byref(self), 1, byref(cursor))
+
+ return cursor
+
# Now comes the plumbing to hook up the C library.
# Register callback types in common container.
@@ -2240,8 +2401,8 @@ def register_functions(lib):
to call out to the shared library.
"""
# Functions are registered in strictly alphabetical order.
- #lib.clang_annotateTokens.argtype = [TranslationUnit, POINTER(Token),
- # c_uint, POINTER(Cursor)]
+ lib.clang_annotateTokens.argtype = [TranslationUnit, POINTER(Token),
+ c_uint, POINTER(Cursor)]
lib.clang_CompilationDatabase_dispose.argtypes = [c_object_p]
@@ -2309,7 +2470,7 @@ def register_functions(lib):
lib.clang_disposeString.argtypes = [_CXString]
- #lib.clang_disposeTokens.argtype = [TranslationUnit, POINTER(Token), c_uint]
+ lib.clang_disposeTokens.argtype = [TranslationUnit, POINTER(Token), c_uint]
lib.clang_disposeTranslationUnit.argtypes = [TranslationUnit]
@@ -2543,19 +2704,18 @@ def register_functions(lib):
lib.clang_getTemplateCursorKind.argtypes = [Cursor]
lib.clang_getTemplateCursorKind.restype = c_uint
- #lib.clang_getTokenExtent.argtypes = [TranslationUnit, Token]
- #lib.clang_getTokenExtent.restype = SourceRange
+ lib.clang_getTokenExtent.argtypes = [TranslationUnit, Token]
+ lib.clang_getTokenExtent.restype = SourceRange
- #lib.clang_getTokenKind.argtypes = [Token]
- #lib.clang_getTokenKind.restype = c_uint
- #lib.clang_getTokenKind.errcheck = TokenKind.from_result
+ lib.clang_getTokenKind.argtypes = [Token]
+ lib.clang_getTokenKind.restype = c_uint
- #lib.clang_getTokenLocation.argtype = [TranslationUnit, Token]
- #lib.clang_getTokenLocation.restype = SourceLocation
+ lib.clang_getTokenLocation.argtype = [TranslationUnit, Token]
+ lib.clang_getTokenLocation.restype = SourceLocation
- #lib.clang_getTokenSpelling.argtype = [TranslationUnit, Token]
- #lib.clang_getTokenSpelling.restype = _CXString
- #lib.clang_getTokenSpelling.errcheck = _CXString.from_result
+ lib.clang_getTokenSpelling.argtype = [TranslationUnit, Token]
+ lib.clang_getTokenSpelling.restype = _CXString
+ lib.clang_getTokenSpelling.errcheck = _CXString.from_result
lib.clang_getTranslationUnitCursor.argtypes = [TranslationUnit]
lib.clang_getTranslationUnitCursor.restype = Cursor
@@ -2646,8 +2806,8 @@ def register_functions(lib):
c_uint]
lib.clang_saveTranslationUnit.restype = c_int
- #lib.clang_tokenize.argtypes = [TranslationUnit, SourceRange,
- # POINTER(POINTER(Token)), POINTER(c_uint)]
+ lib.clang_tokenize.argtypes = [TranslationUnit, SourceRange,
+ POINTER(POINTER(Token)), POINTER(c_uint)]
lib.clang_visitChildren.argtypes = [Cursor, callbacks['cursor_visit'],
py_object]
@@ -2655,6 +2815,12 @@ def register_functions(lib):
register_functions(lib)
+def register_enumerations():
+ for name, value in clang.enumerations.TokenKinds:
+ TokenKind.register(value, name)
+
+register_enumerations()
+
__all__ = [
'CodeCompletionResults',
'CompilationDatabase',
@@ -2668,6 +2834,8 @@ __all__ = [
'Index',
'SourceLocation',
'SourceRange',
+ 'TokenKind',
+ 'Token',
'TranslationUnitLoadError',
'TranslationUnit',
'TypeKind',
diff --git a/bindings/python/clang/enumerations.py b/bindings/python/clang/enumerations.py
new file mode 100644
index 0000000000..a86a48ade3
--- /dev/null
+++ b/bindings/python/clang/enumerations.py
@@ -0,0 +1,34 @@
+#===- enumerations.py - Python Enumerations ------------------*- python -*--===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+"""
+Clang Enumerations
+==================
+
+This module provides static definitions of enumerations that exist in libclang.
+
+Enumerations are typically defined as a list of tuples. The exported values are
+typically munged into other types or classes at module load time.
+
+All enumerations are centrally defined in this file so they are all grouped
+together and easier to audit. And, maybe even one day this file will be
+automatically generated by scanning the libclang headers!
+"""
+
+# Maps to CXTokenKind. Note that libclang maintains a separate set of token
+# enumerations from the C++ API.
+TokenKinds = [
+ ('PUNCTUATION', 0),
+ ('KEYWORD', 1),
+ ('IDENTIFIER', 2),
+ ('LITERAL', 3),
+ ('COMMENT', 4),
+]
+
+__all__ = ['TokenKinds']
diff --git a/bindings/python/tests/cindex/test_cursor.py b/bindings/python/tests/cindex/test_cursor.py
index 979838b21c..51695e20b0 100644
--- a/bindings/python/tests/cindex/test_cursor.py
+++ b/bindings/python/tests/cindex/test_cursor.py
@@ -231,3 +231,13 @@ def test_result_type():
assert foo is not None
t = foo.result_type
assert t.kind == TypeKind.INT
+
+def test_get_tokens():
+ """Ensure we can map cursors back to tokens."""
+ tu = get_tu('int foo(int i);')
+ foo = get_cursor(tu, 'foo')
+
+ tokens = list(foo.get_tokens())
+ assert len(tokens) == 7
+ assert tokens[0].spelling == 'int'
+ assert tokens[1].spelling == 'foo'
diff --git a/bindings/python/tests/cindex/test_token_kind.py b/bindings/python/tests/cindex/test_token_kind.py
new file mode 100644
index 0000000000..62ec63e0ad
--- /dev/null
+++ b/bindings/python/tests/cindex/test_token_kind.py
@@ -0,0 +1,43 @@
+from clang.cindex import TokenKind
+from nose.tools import eq_
+from nose.tools import ok_
+from nose.tools import raises
+
+def test_constructor():
+ """Ensure TokenKind constructor works as expected."""
+
+ t = TokenKind(5, 'foo')
+
+ eq_(t.value, 5)
+ eq_(t.name, 'foo')
+
+@raises(ValueError)
+def test_bad_register():
+ """Ensure a duplicate value is rejected for registration."""
+
+ TokenKind.register(2, 'foo')
+
+@raises(ValueError)
+def test_unknown_value():
+ """Ensure trying to fetch an unknown value raises."""
+
+ TokenKind.from_value(-1)
+
+def test_registration():
+ """Ensure that items registered appear as class attributes."""
+ ok_(hasattr(TokenKind, 'LITERAL'))
+ literal = TokenKind.LITERAL
+
+ ok_(isinstance(literal, TokenKind))
+
+def test_from_value():
+ """Ensure registered values can be obtained from from_value()."""
+ t = TokenKind.from_value(3)
+ ok_(isinstance(t, TokenKind))
+ eq_(t, TokenKind.LITERAL)
+
+def test_repr():
+ """Ensure repr() works."""
+
+ r = repr(TokenKind.LITERAL)
+ eq_(r, 'TokenKind.LITERAL')
diff --git a/bindings/python/tests/cindex/test_tokens.py b/bindings/python/tests/cindex/test_tokens.py
new file mode 100644
index 0000000000..7074842909
--- /dev/null
+++ b/bindings/python/tests/cindex/test_tokens.py
@@ -0,0 +1,52 @@
+from clang.cindex import CursorKind
+from clang.cindex import Index
+from clang.cindex import SourceLocation
+from clang.cindex import SourceRange
+from clang.cindex import TokenKind
+from nose.tools import eq_
+from nose.tools import ok_
+
+from .util import get_tu
+
+def test_token_to_cursor():
+ """Ensure we can obtain a Cursor from a Token instance."""
+ tu = get_tu('int i = 5;')
+ r = tu.get_extent('t.c', (0, 9))
+ tokens = list(tu.get_tokens(extent=r))
+
+ assert len(tokens) == 5
+ assert tokens[1].spelling == 'i'
+ assert tokens[1].kind == TokenKind.IDENTIFIER
+
+ cursor = tokens[1].cursor
+ assert cursor.kind == CursorKind.VAR_DECL
+ assert tokens[1].cursor == tokens[2].cursor
+
+def test_token_location():
+ """Ensure Token.location works."""
+
+ tu = get_tu('int foo = 10;')
+ r = tu.get_extent('t.c', (0, 11))
+
+ tokens = list(tu.get_tokens(extent=r))
+ eq_(len(tokens), 4)
+
+ loc = tokens[1].location
+ ok_(isinstance(loc, SourceLocation))
+ eq_(loc.line, 1)
+ eq_(loc.column, 5)
+ eq_(loc.offset, 4)
+
+def test_token_extent():
+ """Ensure Token.extent works."""
+ tu = get_tu('int foo = 10;')
+ r = tu.get_extent('t.c', (0, 11))
+
+ tokens = list(tu.get_tokens(extent=r))
+ eq_(len(tokens), 4)
+
+ extent = tokens[1].extent
+ ok_(isinstance(extent, SourceRange))
+
+ eq_(extent.start.offset, 4)
+ eq_(extent.end.offset, 7)
diff --git a/bindings/python/tests/cindex/test_translation_unit.py b/bindings/python/tests/cindex/test_translation_unit.py
index 9de12ad462..c91f126097 100644
--- a/bindings/python/tests/cindex/test_translation_unit.py
+++ b/bindings/python/tests/cindex/test_translation_unit.py
@@ -1,3 +1,6 @@
+import gc
+import os
+
from clang.cindex import CursorKind
from clang.cindex import Cursor
from clang.cindex import File
@@ -8,7 +11,6 @@ from clang.cindex import TranslationUnitSaveError
from clang.cindex import TranslationUnit
from .util import get_cursor
from .util import get_tu
-import os
kInputsDir = os.path.join(os.path.dirname(__file__), 'INPUTS')
@@ -217,3 +219,23 @@ def test_get_source_range():
assert r.end.offset == 5
assert r.start.file.name == 't.c'
assert r.end.file.name == 't.c'
+
+def test_get_tokens_gc():
+ """Ensures get_tokens() works properly with garbage collection."""
+
+ tu = get_tu('int foo();')
+ r = tu.get_extent('t.c', (0, 10))
+ tokens = list(tu.get_tokens(extent=r))
+
+ assert tokens[0].spelling == 'int'
+ gc.collect()
+ assert tokens[0].spelling == 'int'
+
+ del tokens[1]
+ gc.collect()
+ assert tokens[0].spelling == 'int'
+
+ # May trigger segfault if we don't do our job properly.
+ del tokens
+ gc.collect()
+ gc.collect() # Just in case.