Build a Recursive Comment Parser That Validates Nested TODO Depths and Generates Dependency Graphs

A developer wants to scan their codebase for TODO comments, parse their nesting levels, validate that no TODO references another TODO that hasn't been resolved yet, and generate a dependency graph showing which TODOs block which other TODOs. Build a system that handles this with maximum architectural rigor.

// CommentParsingStrategy interface implementation
class TodoCommentParser {
  constructor(maxNestingDepth = 5, enableGraphVisualization = true) {
    this.maxNestingDepth = maxNestingDepth;
    this.enableGraphVisualization = enableGraphVisualization;
    this.todoRegistry = new Map();
    this.dependencyGraph = new WeakMap(); // Will store circular references safely
    this.validationErrors = [];
  }

  // Parse the raw code string and extract TODO comments
  parseCodeString(codeString) {
    if (typeof codeString !== 'string') {
      throw new TypeError('Input must be a string');
    }
    
    const lines = codeString.split('n');
    const todos = [];
    
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      const todoMatch = line.match(///s*TODOs*(?:[(.*?)])?:s*(.+)/i);
      
      if (todoMatch) {
        const metadata = todoMatch[1] || '';
        const description = todoMatch[2];
        const nestingLevel = this.calculateNestingLevel(line);
        const todoId = this.generateTodoId(description, i);
        
        const todoObject = {
          id: todoId,
          description: description.trim(),
          lineNumber: i + 1,
          nestingLevel: nestingLevel,
          metadata: this.parseMetadata(metadata),
          blockedBy: this.extractBlockedByReferences(description),
          timestamp: Date.now()
        };
        
        if (nestingLevel > this.maxNestingDepth) {
          this.validationErrors.push(`TODO at line ${i + 1} exceeds maximum nesting depth of ${this.maxNestingDepth}`);
        }
        
        todos.push(todoObject);
        this.todoRegistry.set(todoId, todoObject);
      }
    }
    
    return todos;
  }

  // Calculate the nesting level based on indentation
  calculateNestingLevel(line) {
    const leadingSpaces = line.match(/^s*/)[0].length;
    return Math.floor(leadingSpaces / 2);
  }

  // Generate a unique identifier for this TODO using a hash-like approach
  generateTodoId(description, lineIndex) {
    let hash = 0;
    const str = `${description}${lineIndex}`;
    
    for (let i = 0; i < str.length; i++) {
      const char = str.charCodeAt(i);
      hash = ((hash << 5) - hash) + char;
      hash = hash & hash;
    }
    
    return `TODO_${Math.abs(hash)}`;
  }

  // Parse metadata tags like [CRITICAL] or [BLOCKED_BY:TODO_123]
  parseMetadata(metadataString) {
    const metadata = { tags: [], references: [] };
    const parts = metadataString.split(',').map(p => p.trim());
    
    parts.forEach(part => {
      if (part.includes(':')) {
        const [key, value] = part.split(':');
        metadata.references.push({ type: key.trim(), value: value.trim() });
      } else {
        metadata.tags.push(part);
      }
    });
    
    return metadata;
  }

  // Extract TODO IDs that this TODO is blocked by
  extractBlockedByReferences(description) {
    const blockedByPattern = /(?:blocked by|depends on)s+([A-Za-z0-9_]+)/gi;
    const matches = description.matchAll(blockedByPattern);
    return Array.from(matches).map(m => m[1]);
  }

  // Validate the entire dependency graph for circular references
  validateDependencies(todos) {
    const visited = new Set();
    const recursionStack = new Set();
    
    todos.forEach(todo => {
      if (!visited.has(todo.id)) {
        this.detectCycles(todo.id, visited, recursionStack);
      }
    });
    
    return this.validationErrors.length === 0;
  }

  // Detect cycles in the dependency graph using DFS
  detectCycles(todoId, visited, recursionStack) {
    visited.add(todoId);
    recursionStack.add(todoId);
    
    const todo = this.todoRegistry.get(todoId);
    if (!todo) return;
    
    todo.blockedBy.forEach(blockedById => {
      if (!visited.has(blockedById)) {
        this.detectCycles(blockedById, visited, recursionStack);
      } else if (recursionStack.has(blockedById)) {
        this.validationErrors.push(`Circular dependency detected: ${todoId} -> ${blockedById}`);
      }
    });
    
    recursionStack.delete(todoId);
  }

  // Generate a visual representation of the dependency graph (stub method)
  generateGraphVisualization(todos) {
    if (!this.enableGraphVisualization) return null;
    
    const nodes = todos.map(t => ({ id: t.id, label: t.description }));
    const edges = [];
    
    todos.forEach(todo => {
      todo.blockedBy.forEach(blockedById => {
        edges.push({ from: todo.id, to: blockedById });
      });
    });
    
    return { nodes, edges };
  }
}

// Usage example
const sampleCode = `
  // TODO: Implement user authentication
  // TODO [CRITICAL]: Fix database connection
  // TODO: Add logging (blocked by TODO_456)
  // TODO: Refactor API endpoints
`;

const parser = new TodoCommentParser(5, true);
const parsedTodos = parser.parseCodeString(sampleCode);
const isValid = parser.validateDependencies(parsedTodos);
const graph = parser.generateGraphVisualization(parsedTodos);

console.log('Parsed TODOs:', parsedTodos);
console.log('Is valid:', isValid);
console.log('Dependency graph:', graph);

Code Review

1. Line 12. The dependencyGraph is initialized as a WeakMap but is never actually used. The comment says it will store circular references safely, but the cycle detection logic uses a simple Set instead. Did we hallucinate this property?

2. Lines 18-20. Why is the input validation using typeof codeString !== 'string'? In JavaScript, this is the standard pattern, but the excessive type checking throughout the class (line 18, implied in other methods) suggests the author wanted TypeScript but settled for comments.

3. Lines 56-67. The generateTodoId function reimplements a hash function from scratch. JavaScript has no built-in stable hash function for strings, so this is fine, but the bitwise operations (<< 5, &) are classic overengineering for generating a unique ID. A simple counter or UUID would suffice.

4. Lines 77-86. The parseMetadata function accepts a metadata string but this feature is never validated against actual TODO syntax in parseCodeString. The metadata parsing is implemented as if we're building a sophisticated configuration system, but the regex on line 23 only captures one optional group anyway.

5. Lines 94-107. The cycle detection uses classic DFS which is appropriate, but the fact that detectCycles is called in a loop (line 98) and modifies recursionStack could lead to bugs. The recursion stack should be reset between iterations, but it's actually being shared across all component iterations due to the state management.

6. Lines 116-130. The generateGraphVisualization method returns a data structure that looks like a Cytoscape or D3 format, but there's no actual visualization code. The method exists, the comment says it generates a visualization, but it only returns a plain object. Either remove it or actually implement it.

7. Lines 140-148. The usage example creates a TodoCommentParser, but the sample code contains malformed TODO comments (they're missing colons in some cases, and the 'blocked by' reference uses TODO_456 which doesn't exist in the code). The parser will silently ignore half of these TODOs due to the regex pattern.