doc: add the beginnings of a comment to sphinx RST extractor
parent
591c24dab9
commit
9e2fe4ca72
|
@ -0,0 +1,122 @@
|
||||||
|
# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
|
||||||
|
# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
|
||||||
|
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
|
||||||
|
Comment = namedtuple('Comment', ['comment', 'line', 'multiline'])
|
||||||
|
|
||||||
|
|
||||||
|
class FileError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class UnterminatedCommentError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def extract_comments(filename):
|
||||||
|
"""Extracts a list of comments from the given C family source file.
|
||||||
|
Comments are represented with the Comment class found in the common module.
|
||||||
|
C family comments come in two forms, single and multi-line comments.
|
||||||
|
- Single-line comments begin with '//' and continue to the end of line.
|
||||||
|
- Multi-line comments begin with '/*' and end with '*/' and can span
|
||||||
|
multiple lines of code. If a multi-line comment does not terminate
|
||||||
|
before EOF is reached, then an exception is raised.
|
||||||
|
Note that this doesn't take language-specific preprocessor directives into
|
||||||
|
consideration.
|
||||||
|
Args:
|
||||||
|
filename: String name of the file to extract comments from.
|
||||||
|
Returns:
|
||||||
|
Python list of Comment objects in the order that they appear in the file.
|
||||||
|
Raises:
|
||||||
|
FileError: File was unable to be open or read.
|
||||||
|
UnterminatedCommentError: Encountered an unterminated multi-line
|
||||||
|
comment.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(filename, 'r') as source_file:
|
||||||
|
state = 0
|
||||||
|
current_comment = ''
|
||||||
|
comments = []
|
||||||
|
line_counter = 1
|
||||||
|
comment_start = 1
|
||||||
|
while True:
|
||||||
|
char = source_file.read(1)
|
||||||
|
if not char:
|
||||||
|
if state is 3 or state is 4:
|
||||||
|
raise UnterminatedCommentError()
|
||||||
|
if state is 2:
|
||||||
|
# Was in single line comment. Create comment.
|
||||||
|
comment = Comment(current_comment, line_counter, False)
|
||||||
|
comments.append(comment)
|
||||||
|
return comments
|
||||||
|
if state is 0:
|
||||||
|
# Waiting for comment start character or beginning of
|
||||||
|
# string.
|
||||||
|
if char == '/':
|
||||||
|
state = 1
|
||||||
|
elif char == '"':
|
||||||
|
state = 5
|
||||||
|
elif state is 1:
|
||||||
|
# Found comment start character, classify next character and
|
||||||
|
# determine if single or multiline comment.
|
||||||
|
if char == '/':
|
||||||
|
state = 2
|
||||||
|
elif char == '*':
|
||||||
|
comment_start = line_counter
|
||||||
|
state = 3
|
||||||
|
else:
|
||||||
|
state = 0
|
||||||
|
elif state is 2:
|
||||||
|
# In single line comment, read characters until EOL.
|
||||||
|
if char == '\n':
|
||||||
|
comment = Comment(current_comment, line_counter, False)
|
||||||
|
comments.append(comment)
|
||||||
|
current_comment = ''
|
||||||
|
state = 0
|
||||||
|
else:
|
||||||
|
current_comment += char
|
||||||
|
elif state is 3:
|
||||||
|
# In multi-line comment, add characters until '*'
|
||||||
|
# encountered.
|
||||||
|
if char == '*':
|
||||||
|
state = 4
|
||||||
|
else:
|
||||||
|
current_comment += char
|
||||||
|
elif state is 4:
|
||||||
|
# In multi-line comment with asterisk found. Determine if
|
||||||
|
# comment is ending.
|
||||||
|
if char == '/':
|
||||||
|
comment = Comment(
|
||||||
|
current_comment, comment_start, True)
|
||||||
|
comments.append(comment)
|
||||||
|
current_comment = ''
|
||||||
|
state = 0
|
||||||
|
else:
|
||||||
|
current_comment += '*'
|
||||||
|
# Care for multiple '*' in a row
|
||||||
|
if char != '*':
|
||||||
|
current_comment += char
|
||||||
|
state = 3
|
||||||
|
elif state is 5:
|
||||||
|
# In string literal, expect literal end or escape char.
|
||||||
|
if char == '"':
|
||||||
|
state = 0
|
||||||
|
elif char == '\\':
|
||||||
|
state = 6
|
||||||
|
elif state is 6:
|
||||||
|
# In string literal, escaping current char.
|
||||||
|
state = 5
|
||||||
|
if char == '\n':
|
||||||
|
line_counter += 1
|
||||||
|
except OSError as exception:
|
||||||
|
raise FileError(str(exception))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import sys
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
pprint(extract_comments(sys.argv[1]))
|
Loading…
Reference in New Issue