code formatting + documentation for MarkdownGenerator

2024-11-07 11:42:51 -05:00
parent e747f99984
commit 9025836857
6 changed files with 561 additions and 211 deletions
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -17,8 +17,8 @@ jobs:
      - name: Setup Node
        uses: actions/setup-node@v4
        with:
-          node-version: '18'
-          registry-url: 'https://registry.npmjs.org'
+          node-version: "18"
+          registry-url: "https://registry.npmjs.org"

      - name: Install dependencies
        run: pnpm install
@@ -26,4 +26,4 @@ jobs:
      - name: Publish
        run: pnpm publish --access public --no-git-checks
        env:
-          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
--- a/README.md
+++ b/README.md
@@ -2,11 +2,14 @@

 > Created to push creative limits.

-Process git repository files into markdown with token counting and sensitive data redaction.
+Process git repository files into markdown with token counting and sensitive
+data redaction.

 ## Overview

-`code-tokenizer-md` is a Node.js tool that processes git repository files, cleans code, redacts sensitive information, and generates markdown documentation with token counts.
+`code-tokenizer-md` is a Node.js tool that processes git repository files,
+cleans code, redacts sensitive information, and generates markdown documentation
+with token counts.

 ```mermaid
 graph TD
@@ -26,18 +29,21 @@ graph TD
 ## Features

 ### Data Processing
+
 - Reads files from git repository
 - Removes comments and unnecessary whitespace
 - Redacts sensitive information (API keys, tokens, etc.)
 - Counts tokens using llama3-tokenizer

 ### Analysis Types
+
 - Token counting per file
 - Total token usage
 - File content analysis
 - Sensitive data detection

 ### Data Presentation
+
 - Markdown formatted output
 - Code block formatting
 - Token count summaries
@@ -70,7 +76,7 @@ import { MarkdownGenerator } from 'code-tokenizer-md';

 const generator = new MarkdownGenerator({
  dir: './project',
-  outputFilePath: './output.md'
+  outputFilePath: './output.md',
 });

 const result = await generator.createMarkdownDocument();
@@ -105,12 +111,8 @@ src/

 ```javascript
 const generator = new MarkdownGenerator({
-  customPatterns: [
-    { regex: /TODO:/g, replacement: '' }
-  ],
-  customSecretPatterns: [
-    { regex: /mySecret/g, replacement: '[REDACTED]' }
-  ]
+  customPatterns: [{ regex: /TODO:/g, replacement: '' }],
+  customSecretPatterns: [{ regex: /mySecret/g, replacement: '[REDACTED]' }],
 });
 ```

@@ -131,6 +133,7 @@ const generator = new MarkdownGenerator({
 - Update the README for significant changes

 ## License
+
 MIT © 2024 Geoff Seemueller

 ## Note
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -25,24 +25,27 @@ export default [
      'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
      'no-constant-condition': ['error', { checkLoops: false }],
      'no-multiple-empty-lines': ['error', { max: 1, maxEOF: 0 }],
-      'quotes': ['error', 'single', { avoidEscape: true }],
-      'semi': ['error', 'always'],
-      'indent': ['error', 2, { SwitchCase: 1 }],
+      quotes: ['error', 'single', { avoidEscape: true }],
+      semi: ['error', 'always'],
+      indent: ['error', 2, { SwitchCase: 1 }],
      'comma-dangle': ['error', 'always-multiline'],
-      'arrow-parens': ['error', 'as-needed'],  // Changed from 'avoid' to 'as-needed'
+      'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
      'object-curly-spacing': ['error', 'always'],
      'array-bracket-spacing': ['error', 'never'],
-      'space-before-function-paren': ['error', {
-        anonymous: 'never',
-        named: 'never',
-        asyncArrow: 'always',
-      }],
+      'space-before-function-paren': [
+        'error',
+        {
+          anonymous: 'never',
+          named: 'never',
+          asyncArrow: 'always',
+        },
+      ],
      'no-trailing-spaces': 'error',
      'eol-last': ['error', 'always'],
      'prefer-const': 'error',
      'no-var': 'error',
-      'eqeqeq': ['error', 'always'],
-      'curly': ['error', 'all'],
+      eqeqeq: ['error', 'always'],
+      curly: ['error', 'all'],
      'brace-style': ['error', '1tbs', { allowSingleLine: false }],
      'keyword-spacing': ['error', { before: true, after: true }],
      'space-infix-ops': 'error',
@@ -56,8 +59,8 @@ export default [
  {
    files: ['src/TokenCleaner.js'],
    rules: {
-      'no-useless-escape': 'off'
-    }
+      'no-useless-escape': 'off',
+    },
  },
  {
    files: ['**/*.test.js', '**/*.spec.js'],
@@ -68,4 +71,4 @@ export default [
      },
    },
  },
-];
+];
--- a/package.json
+++ b/package.json
@@ -15,8 +15,8 @@
    "prepublishOnly": "npm run build",
    "dev": "node ./src/cli.js",
    "deploy:dev": "pnpm publish .",
-    "lint": "eslint . --ext .js,.jsx,.ts,.tsx",
-    "lint:fix": "eslint . --ext .js,.jsx,.ts,.tsx --fix",
+    "lint": "eslint src/",
+    "lint:fix": "eslint src/ --fix",
    "format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"",
    "fix": "pnpm format && pnpm lint:fix"
  },
@@ -32,4 +32,4 @@
    "globals": "^15.12.0",
    "prettier": "^3.3.3"
  }
-}
+}
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/src/MarkdownGenerator.js
+++ b/src/MarkdownGenerator.js
@@ -5,7 +5,34 @@ import { readFile, writeFile } from 'fs/promises';
 import llama3Tokenizer from 'llama3-tokenizer-js';
 import { TokenCleaner } from './TokenCleaner.js';

+/**
+ * @typedef {Object} MarkdownGeneratorOptions
+ * @property {string} [dir='.'] - The directory to process files from
+ * @property {string} [outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
+ * @property {Set<string>} [fileTypeExclusions] - Set of file extensions to exclude
+ * @property {string[]} [fileExclusions] - Array of specific files or patterns to exclude
+ * @property {Object} [customPatterns] - Custom patterns for token cleaning
+ * @property {Object} [customSecretPatterns] - Custom patterns for identifying and redacting secrets
+ * @property {boolean} [verbose=true] - Whether to log detailed information during processing
+ */
+
+/**
+ * @class MarkdownGenerator
+ * @description A class that generates markdown documentation from tracked Git files in a project.
+ * It can exclude specific file types and files, clean tokens, and include todo lists.
+ */
 export class MarkdownGenerator {
+  /**
+   * Creates an instance of MarkdownGenerator.
+   * @param {Object} [options={}] - Configuration options for the generator
+   * @param {string} [options.dir='.'] - The directory to process files from
+   * @param {string} [options.outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
+   * @param {Set<string>} [options.fileTypeExclusions] - Set of file extensions to exclude (defaults to common image and asset files)
+   * @param {string[]} [options.fileExclusions] - Array of specific files or patterns to exclude
+   * @param {Object} [options.customPatterns] - Custom patterns for token cleaning
+   * @param {Object} [options.customSecretPatterns] - Custom patterns for identifying and redacting secrets
+   * @param {boolean} [options.verbose=true] - Whether to log detailed information during processing
+   */
  constructor(options = {}) {
    this.dir = options.dir || '.';
    this.outputFilePath = options.outputFilePath || './prompt.md';
@@ -35,7 +62,12 @@ export class MarkdownGenerator {
    this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
    this.verbose = options.verbose ?? true;
  }
-
+  /**
+   * Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions.
+   * @async
+   * @returns {Promise<string[]>} Array of tracked file paths that aren't excluded
+   * @throws {Error} When unable to execute git command or access files
+   */
  async getTrackedFiles() {
    try {
      const output = this.execCommand('git ls-files');
@@ -55,7 +87,17 @@ export class MarkdownGenerator {
      return [];
    }
  }
-
+  /**
+   * Determines if a file should be excluded based on the given pattern.
+   * @param {string} filePath - Path of the file to check
+   * @param {string} pattern - Exclusion pattern to match against
+   * @returns {boolean} True if the file should be excluded, false otherwise
+   * @example
+   * // Excludes all files in a directory
+   * isFileExcluded('src/tests/file.js', 'src/tests/*') // returns true
+   * // Excludes specific file extensions in a directory
+   * isFileExcluded('src/assets/image.png', 'src/assets/*.png') // returns true
+   */
  isFileExcluded(filePath, pattern) {
    if (pattern.endsWith('/*')) {
      const directory = pattern.slice(0, -2);
@@ -67,7 +109,13 @@ export class MarkdownGenerator {
    }
    return filePath === pattern;
  }
-
+  /**
+   * Reads and processes the content of a file, cleaning and redacting sensitive information.
+   * @async
+   * @param {string} filePath - Path to the file to read
+   * @returns {Promise<string>} Cleaned and redacted content of the file
+   * @throws {Error} When unable to read or process the file
+   */
  async readFileContent(filePath) {
    try {
      const content = await fs.readFile(filePath, 'utf-8');
@@ -84,7 +132,12 @@ export class MarkdownGenerator {
      return '';
    }
  }
-
+  /**
+   * Generates markdown content from all tracked files in the project.
+   * @async
+   * @returns {Promise<string>} Generated markdown content containing all processed files
+   * @throws {Error} When unable to generate markdown content
+   */
  async generateMarkdown() {
    const trackedFiles = await this.getTrackedFiles();
    if (this.verbose) {
@@ -98,7 +151,12 @@ export class MarkdownGenerator {
    }
    return markdownContent;
  }
-
+  /**
+   * Retrieves the content of the project's todo file, creating it if it doesn't exist.
+   * @async
+   * @returns {Promise<string>} Content of the todo file
+   * @throws {Error} When unable to read or create the todo file
+   */
  async getTodo() {
    try {
      console.log('getting project todo');
@@ -113,7 +171,15 @@ export class MarkdownGenerator {
      console.error('Error reading todo file:', error);
    }
  }
-
+  /**
+   * Creates a complete markdown document combining code documentation and todos.
+   * @async
+   * @returns {Promise<Object>} Result object
+   * @returns {boolean} result.success - Whether the operation was successful
+   * @returns {number} [result.tokenCount] - Number of tokens in the generated document
+   * @returns {Error} [result.error] - Error object if operation failed
+   * @throws {Error} When unable to create or write the markdown document
+   */
  async createMarkdownDocument() {
    try {
      const codeMarkdown = await this.generateMarkdown();
@@ -133,7 +199,13 @@ export class MarkdownGenerator {
      return { success: false, error };
    }
  }
-
+  /**
+   * Executes a shell command in the specified directory.
+   * @param {string} command - Shell command to execute
+   * @returns {string} Output of the command
+   * @throws {Error} When command execution fails
+   * @private
+   */
  execCommand(command) {
    try {
      return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();