code formatting + documentation for MarkdownGenerator

This commit is contained in:
2024-11-07 11:42:51 -05:00
parent e747f99984
commit 9025836857
6 changed files with 561 additions and 211 deletions

View File

@@ -17,8 +17,8 @@ jobs:
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: '18'
registry-url: 'https://registry.npmjs.org'
node-version: "18"
registry-url: "https://registry.npmjs.org"
- name: Install dependencies
run: pnpm install

View File

@@ -2,11 +2,14 @@
> Created to push creative limits.
Process git repository files into markdown with token counting and sensitive data redaction.
Process git repository files into markdown with token counting and sensitive
data redaction.
## Overview
`code-tokenizer-md` is a Node.js tool that processes git repository files, cleans code, redacts sensitive information, and generates markdown documentation with token counts.
`code-tokenizer-md` is a Node.js tool that processes git repository files,
cleans code, redacts sensitive information, and generates markdown documentation
with token counts.
```mermaid
graph TD
@@ -26,18 +29,21 @@ graph TD
## Features
### Data Processing
- Reads files from git repository
- Removes comments and unnecessary whitespace
- Redacts sensitive information (API keys, tokens, etc.)
- Counts tokens using llama3-tokenizer
### Analysis Types
- Token counting per file
- Total token usage
- File content analysis
- Sensitive data detection
### Data Presentation
- Markdown formatted output
- Code block formatting
- Token count summaries
@@ -70,7 +76,7 @@ import { MarkdownGenerator } from 'code-tokenizer-md';
const generator = new MarkdownGenerator({
dir: './project',
outputFilePath: './output.md'
outputFilePath: './output.md',
});
const result = await generator.createMarkdownDocument();
@@ -105,12 +111,8 @@ src/
```javascript
const generator = new MarkdownGenerator({
customPatterns: [
{ regex: /TODO:/g, replacement: '' }
],
customSecretPatterns: [
{ regex: /mySecret/g, replacement: '[REDACTED]' }
]
customPatterns: [{ regex: /TODO:/g, replacement: '' }],
customSecretPatterns: [{ regex: /mySecret/g, replacement: '[REDACTED]' }],
});
```
@@ -131,6 +133,7 @@ const generator = new MarkdownGenerator({
- Update the README for significant changes
## License
MIT © 2024 Geoff Seemueller
## Note

View File

@@ -25,24 +25,27 @@ export default [
'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
'no-constant-condition': ['error', { checkLoops: false }],
'no-multiple-empty-lines': ['error', { max: 1, maxEOF: 0 }],
'quotes': ['error', 'single', { avoidEscape: true }],
'semi': ['error', 'always'],
'indent': ['error', 2, { SwitchCase: 1 }],
quotes: ['error', 'single', { avoidEscape: true }],
semi: ['error', 'always'],
indent: ['error', 2, { SwitchCase: 1 }],
'comma-dangle': ['error', 'always-multiline'],
'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
'object-curly-spacing': ['error', 'always'],
'array-bracket-spacing': ['error', 'never'],
'space-before-function-paren': ['error', {
'space-before-function-paren': [
'error',
{
anonymous: 'never',
named: 'never',
asyncArrow: 'always',
}],
},
],
'no-trailing-spaces': 'error',
'eol-last': ['error', 'always'],
'prefer-const': 'error',
'no-var': 'error',
'eqeqeq': ['error', 'always'],
'curly': ['error', 'all'],
eqeqeq: ['error', 'always'],
curly: ['error', 'all'],
'brace-style': ['error', '1tbs', { allowSingleLine: false }],
'keyword-spacing': ['error', { before: true, after: true }],
'space-infix-ops': 'error',
@@ -56,8 +59,8 @@ export default [
{
files: ['src/TokenCleaner.js'],
rules: {
'no-useless-escape': 'off'
}
'no-useless-escape': 'off',
},
},
{
files: ['**/*.test.js', '**/*.spec.js'],

View File

@@ -15,8 +15,8 @@
"prepublishOnly": "npm run build",
"dev": "node ./src/cli.js",
"deploy:dev": "pnpm publish .",
"lint": "eslint . --ext .js,.jsx,.ts,.tsx",
"lint:fix": "eslint . --ext .js,.jsx,.ts,.tsx --fix",
"lint": "eslint src/",
"lint:fix": "eslint src/ --fix",
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"",
"fix": "pnpm format && pnpm lint:fix"
},

622
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,34 @@ import { readFile, writeFile } from 'fs/promises';
import llama3Tokenizer from 'llama3-tokenizer-js';
import { TokenCleaner } from './TokenCleaner.js';
/**
* @typedef {Object} MarkdownGeneratorOptions
* @property {string} [dir='.'] - The directory to process files from
* @property {string} [outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
* @property {Set<string>} [fileTypeExclusions] - Set of file extensions to exclude
* @property {string[]} [fileExclusions] - Array of specific files or patterns to exclude
* @property {Object} [customPatterns] - Custom patterns for token cleaning
* @property {Object} [customSecretPatterns] - Custom patterns for identifying and redacting secrets
* @property {boolean} [verbose=true] - Whether to log detailed information during processing
*/
/**
* @class MarkdownGenerator
* @description A class that generates markdown documentation from tracked Git files in a project.
* It can exclude specific file types and files, clean tokens, and include todo lists.
*/
export class MarkdownGenerator {
/**
* Creates an instance of MarkdownGenerator.
* @param {Object} [options={}] - Configuration options for the generator
* @param {string} [options.dir='.'] - The directory to process files from
* @param {string} [options.outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
* @param {Set<string>} [options.fileTypeExclusions] - Set of file extensions to exclude (defaults to common image and asset files)
* @param {string[]} [options.fileExclusions] - Array of specific files or patterns to exclude
* @param {Object} [options.customPatterns] - Custom patterns for token cleaning
* @param {Object} [options.customSecretPatterns] - Custom patterns for identifying and redacting secrets
* @param {boolean} [options.verbose=true] - Whether to log detailed information during processing
*/
constructor(options = {}) {
this.dir = options.dir || '.';
this.outputFilePath = options.outputFilePath || './prompt.md';
@@ -35,7 +62,12 @@ export class MarkdownGenerator {
this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
this.verbose = options.verbose ?? true;
}
/**
* Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions.
* @async
* @returns {Promise<string[]>} Array of tracked file paths that aren't excluded
* @throws {Error} When unable to execute git command or access files
*/
async getTrackedFiles() {
try {
const output = this.execCommand('git ls-files');
@@ -55,7 +87,17 @@ export class MarkdownGenerator {
return [];
}
}
/**
* Determines if a file should be excluded based on the given pattern.
* @param {string} filePath - Path of the file to check
* @param {string} pattern - Exclusion pattern to match against
* @returns {boolean} True if the file should be excluded, false otherwise
* @example
* // Excludes all files in a directory
* isFileExcluded('src/tests/file.js', 'src/tests/*') // returns true
* // Excludes specific file extensions in a directory
* isFileExcluded('src/assets/image.png', 'src/assets/*.png') // returns true
*/
isFileExcluded(filePath, pattern) {
if (pattern.endsWith('/*')) {
const directory = pattern.slice(0, -2);
@@ -67,7 +109,13 @@ export class MarkdownGenerator {
}
return filePath === pattern;
}
/**
* Reads and processes the content of a file, cleaning and redacting sensitive information.
* @async
* @param {string} filePath - Path to the file to read
* @returns {Promise<string>} Cleaned and redacted content of the file
* @throws {Error} When unable to read or process the file
*/
async readFileContent(filePath) {
try {
const content = await fs.readFile(filePath, 'utf-8');
@@ -84,7 +132,12 @@ export class MarkdownGenerator {
return '';
}
}
/**
* Generates markdown content from all tracked files in the project.
* @async
* @returns {Promise<string>} Generated markdown content containing all processed files
* @throws {Error} When unable to generate markdown content
*/
async generateMarkdown() {
const trackedFiles = await this.getTrackedFiles();
if (this.verbose) {
@@ -98,7 +151,12 @@ export class MarkdownGenerator {
}
return markdownContent;
}
/**
* Retrieves the content of the project's todo file, creating it if it doesn't exist.
* @async
* @returns {Promise<string>} Content of the todo file
* @throws {Error} When unable to read or create the todo file
*/
async getTodo() {
try {
console.log('getting project todo');
@@ -113,7 +171,15 @@ export class MarkdownGenerator {
console.error('Error reading todo file:', error);
}
}
/**
* Creates a complete markdown document combining code documentation and todos.
* @async
* @returns {Promise<Object>} Result object
* @returns {boolean} result.success - Whether the operation was successful
* @returns {number} [result.tokenCount] - Number of tokens in the generated document
* @returns {Error} [result.error] - Error object if operation failed
* @throws {Error} When unable to create or write the markdown document
*/
async createMarkdownDocument() {
try {
const codeMarkdown = await this.generateMarkdown();
@@ -133,7 +199,13 @@ export class MarkdownGenerator {
return { success: false, error };
}
}
/**
* Executes a shell command in the specified directory.
* @param {string} command - Shell command to execute
* @returns {string} Output of the command
* @throws {Error} When command execution fails
* @private
*/
execCommand(command) {
try {
return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();