doc: add the beginnings of a comment to sphinx RST extractor

2016-12-10 17:37:10 -06:00 · 2016-12-10 17:37:10 -06:00 · 9e2fe4ca72
parent 591c24dab9
commit 9e2fe4ca72
1 changed files with 122 additions and 0 deletions
--- a/doc/extract.py
+++ b/doc/extract.py
@ -0,0 +1,122 @@
+# derived from https://github.com/jeanralphaviles/comment_parser/blob/master/comment_parser/parsers/c_parser.py
+# MIT license - https://github.com/jeanralphaviles/comment_parser/blob/master/LICENSE
+
+
+from collections import namedtuple
+
+
+Comment = namedtuple('Comment', ['comment', 'line', 'multiline'])
+
+
+class FileError(Exception):
+    pass
+
+
+class UnterminatedCommentError(Exception):
+    pass
+
+
+def extract_comments(filename):
+    """Extracts a list of comments from the given C family source file.
+    Comments are represented with the Comment class found in the common module.
+    C family comments come in two forms, single and multi-line comments.
+        - Single-line comments begin with '//' and continue to the end of line.
+        - Multi-line comments begin with '/*' and end with '*/' and can span
+            multiple lines of code. If a multi-line comment does not terminate
+            before EOF is reached, then an exception is raised.
+    Note that this doesn't take language-specific preprocessor directives into
+    consideration.
+    Args:
+        filename: String name of the file to extract comments from.
+    Returns:
+        Python list of Comment objects in the order that they appear in the file.
+    Raises:
+        FileError: File was unable to be open or read.
+        UnterminatedCommentError: Encountered an unterminated multi-line
+            comment.
+    """
+    try:
+        with open(filename, 'r') as source_file:
+            state = 0
+            current_comment = ''
+            comments = []
+            line_counter = 1
+            comment_start = 1
+            while True:
+                char = source_file.read(1)
+                if not char:
+                    if state is 3 or state is 4:
+                        raise UnterminatedCommentError()
+                    if state is 2:
+                        # Was in single line comment. Create comment.
+                        comment = Comment(current_comment, line_counter, False)
+                        comments.append(comment)
+                    return comments
+                if state is 0:
+                    # Waiting for comment start character or beginning of
+                    # string.
+                    if char == '/':
+                        state = 1
+                    elif char == '"':
+                        state = 5
+                elif state is 1:
+                    # Found comment start character, classify next character and
+                    # determine if single or multiline comment.
+                    if char == '/':
+                        state = 2
+                    elif char == '*':
+                        comment_start = line_counter
+                        state = 3
+                    else:
+                        state = 0
+                elif state is 2:
+                    # In single line comment, read characters until EOL.
+                    if char == '\n':
+                        comment = Comment(current_comment, line_counter, False)
+                        comments.append(comment)
+                        current_comment = ''
+                        state = 0
+                    else:
+                        current_comment += char
+                elif state is 3:
+                    # In multi-line comment, add characters until '*'
+                    # encountered.
+                    if char == '*':
+                        state = 4
+                    else:
+                        current_comment += char
+                elif state is 4:
+                    # In multi-line comment with asterisk found. Determine if
+                    # comment is ending.
+                    if char == '/':
+                        comment = Comment(
+                            current_comment, comment_start, True)
+                        comments.append(comment)
+                        current_comment = ''
+                        state = 0
+                    else:
+                        current_comment += '*'
+                        # Care for multiple '*' in a row
+                        if char != '*':
+                            current_comment += char
+                            state = 3
+                elif state is 5:
+                    # In string literal, expect literal end or escape char.
+                    if char == '"':
+                        state = 0
+                    elif char == '\\':
+                        state = 6
+                elif state is 6:
+                    # In string literal, escaping current char.
+                    state = 5
+                if char == '\n':
+                    line_counter += 1
+    except OSError as exception:
+        raise FileError(str(exception))
+
+
+if __name__ == '__main__':
+    import sys
+    from pprint import pprint
+
+    pprint(extract_comments(sys.argv[1]))