doc: add the beginnings of a comment to sphinx RST extractor

feature/tap-sh
William Pitcock 2016-12-10 17:37:10 -06:00
parent 591c24dab9
commit 9e2fe4ca72
1 changed files with 122 additions and 0 deletions

122
doc/extract.py Normal file
View File

@ -0,0 +1,122 @@
# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
from collections import namedtuple
Comment = namedtuple('Comment', ['comment', 'line', 'multiline'])
class FileError(Exception):
pass
class UnterminatedCommentError(Exception):
pass
def extract_comments(filename):
"""Extracts a list of comments from the given C family source file.
Comments are represented with the Comment class found in the common module.
C family comments come in two forms, single and multi-line comments.
- Single-line comments begin with '//' and continue to the end of line.
- Multi-line comments begin with '/*' and end with '*/' and can span
multiple lines of code. If a multi-line comment does not terminate
before EOF is reached, then an exception is raised.
Note that this doesn't take language-specific preprocessor directives into
consideration.
Args:
filename: String name of the file to extract comments from.
Returns:
Python list of Comment objects in the order that they appear in the file.
Raises:
FileError: File was unable to be open or read.
UnterminatedCommentError: Encountered an unterminated multi-line
comment.
"""
try:
with open(filename, 'r') as source_file:
state = 0
current_comment = ''
comments = []
line_counter = 1
comment_start = 1
while True:
char = source_file.read(1)
if not char:
if state is 3 or state is 4:
raise UnterminatedCommentError()
if state is 2:
# Was in single line comment. Create comment.
comment = Comment(current_comment, line_counter, False)
comments.append(comment)
return comments
if state is 0:
# Waiting for comment start character or beginning of
# string.
if char == '/':
state = 1
elif char == '"':
state = 5
elif state is 1:
# Found comment start character, classify next character and
# determine if single or multiline comment.
if char == '/':
state = 2
elif char == '*':
comment_start = line_counter
state = 3
else:
state = 0
elif state is 2:
# In single line comment, read characters until EOL.
if char == '\n':
comment = Comment(current_comment, line_counter, False)
comments.append(comment)
current_comment = ''
state = 0
else:
current_comment += char
elif state is 3:
# In multi-line comment, add characters until '*'
# encountered.
if char == '*':
state = 4
else:
current_comment += char
elif state is 4:
# In multi-line comment with asterisk found. Determine if
# comment is ending.
if char == '/':
comment = Comment(
current_comment, comment_start, True)
comments.append(comment)
current_comment = ''
state = 0
else:
current_comment += '*'
# Care for multiple '*' in a row
if char != '*':
current_comment += char
state = 3
elif state is 5:
# In string literal, expect literal end or escape char.
if char == '"':
state = 0
elif char == '\\':
state = 6
elif state is 6:
# In string literal, escaping current char.
state = 5
if char == '\n':
line_counter += 1
except OSError as exception:
raise FileError(str(exception))
if __name__ == '__main__':
import sys
from pprint import pprint
pprint(extract_comments(sys.argv[1]))