code formatting + documentation for MarkdownGenerator
This commit is contained in:
6
.github/workflows/publish.yml
vendored
6
.github/workflows/publish.yml
vendored
@@ -17,8 +17,8 @@ jobs:
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '18'
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
node-version: "18"
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install
|
||||
@@ -26,4 +26,4 @@ jobs:
|
||||
- name: Publish
|
||||
run: pnpm publish --access public --no-git-checks
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||
|
21
README.md
21
README.md
@@ -2,11 +2,14 @@
|
||||
|
||||
> Created to push creative limits.
|
||||
|
||||
Process git repository files into markdown with token counting and sensitive data redaction.
|
||||
Process git repository files into markdown with token counting and sensitive
|
||||
data redaction.
|
||||
|
||||
## Overview
|
||||
|
||||
`code-tokenizer-md` is a Node.js tool that processes git repository files, cleans code, redacts sensitive information, and generates markdown documentation with token counts.
|
||||
`code-tokenizer-md` is a Node.js tool that processes git repository files,
|
||||
cleans code, redacts sensitive information, and generates markdown documentation
|
||||
with token counts.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
@@ -26,18 +29,21 @@ graph TD
|
||||
## Features
|
||||
|
||||
### Data Processing
|
||||
|
||||
- Reads files from git repository
|
||||
- Removes comments and unnecessary whitespace
|
||||
- Redacts sensitive information (API keys, tokens, etc.)
|
||||
- Counts tokens using llama3-tokenizer
|
||||
|
||||
### Analysis Types
|
||||
|
||||
- Token counting per file
|
||||
- Total token usage
|
||||
- File content analysis
|
||||
- Sensitive data detection
|
||||
|
||||
### Data Presentation
|
||||
|
||||
- Markdown formatted output
|
||||
- Code block formatting
|
||||
- Token count summaries
|
||||
@@ -70,7 +76,7 @@ import { MarkdownGenerator } from 'code-tokenizer-md';
|
||||
|
||||
const generator = new MarkdownGenerator({
|
||||
dir: './project',
|
||||
outputFilePath: './output.md'
|
||||
outputFilePath: './output.md',
|
||||
});
|
||||
|
||||
const result = await generator.createMarkdownDocument();
|
||||
@@ -105,12 +111,8 @@ src/
|
||||
|
||||
```javascript
|
||||
const generator = new MarkdownGenerator({
|
||||
customPatterns: [
|
||||
{ regex: /TODO:/g, replacement: '' }
|
||||
],
|
||||
customSecretPatterns: [
|
||||
{ regex: /mySecret/g, replacement: '[REDACTED]' }
|
||||
]
|
||||
customPatterns: [{ regex: /TODO:/g, replacement: '' }],
|
||||
customSecretPatterns: [{ regex: /mySecret/g, replacement: '[REDACTED]' }],
|
||||
});
|
||||
```
|
||||
|
||||
@@ -131,6 +133,7 @@ const generator = new MarkdownGenerator({
|
||||
- Update the README for significant changes
|
||||
|
||||
## License
|
||||
|
||||
MIT © 2024 Geoff Seemueller
|
||||
|
||||
## Note
|
||||
|
@@ -25,24 +25,27 @@ export default [
|
||||
'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
|
||||
'no-constant-condition': ['error', { checkLoops: false }],
|
||||
'no-multiple-empty-lines': ['error', { max: 1, maxEOF: 0 }],
|
||||
'quotes': ['error', 'single', { avoidEscape: true }],
|
||||
'semi': ['error', 'always'],
|
||||
'indent': ['error', 2, { SwitchCase: 1 }],
|
||||
quotes: ['error', 'single', { avoidEscape: true }],
|
||||
semi: ['error', 'always'],
|
||||
indent: ['error', 2, { SwitchCase: 1 }],
|
||||
'comma-dangle': ['error', 'always-multiline'],
|
||||
'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
|
||||
'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
|
||||
'object-curly-spacing': ['error', 'always'],
|
||||
'array-bracket-spacing': ['error', 'never'],
|
||||
'space-before-function-paren': ['error', {
|
||||
anonymous: 'never',
|
||||
named: 'never',
|
||||
asyncArrow: 'always',
|
||||
}],
|
||||
'space-before-function-paren': [
|
||||
'error',
|
||||
{
|
||||
anonymous: 'never',
|
||||
named: 'never',
|
||||
asyncArrow: 'always',
|
||||
},
|
||||
],
|
||||
'no-trailing-spaces': 'error',
|
||||
'eol-last': ['error', 'always'],
|
||||
'prefer-const': 'error',
|
||||
'no-var': 'error',
|
||||
'eqeqeq': ['error', 'always'],
|
||||
'curly': ['error', 'all'],
|
||||
eqeqeq: ['error', 'always'],
|
||||
curly: ['error', 'all'],
|
||||
'brace-style': ['error', '1tbs', { allowSingleLine: false }],
|
||||
'keyword-spacing': ['error', { before: true, after: true }],
|
||||
'space-infix-ops': 'error',
|
||||
@@ -56,8 +59,8 @@ export default [
|
||||
{
|
||||
files: ['src/TokenCleaner.js'],
|
||||
rules: {
|
||||
'no-useless-escape': 'off'
|
||||
}
|
||||
'no-useless-escape': 'off',
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ['**/*.test.js', '**/*.spec.js'],
|
||||
@@ -68,4 +71,4 @@ export default [
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
];
|
||||
|
@@ -15,8 +15,8 @@
|
||||
"prepublishOnly": "npm run build",
|
||||
"dev": "node ./src/cli.js",
|
||||
"deploy:dev": "pnpm publish .",
|
||||
"lint": "eslint . --ext .js,.jsx,.ts,.tsx",
|
||||
"lint:fix": "eslint . --ext .js,.jsx,.ts,.tsx --fix",
|
||||
"lint": "eslint src/",
|
||||
"lint:fix": "eslint src/ --fix",
|
||||
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"",
|
||||
"fix": "pnpm format && pnpm lint:fix"
|
||||
},
|
||||
@@ -32,4 +32,4 @@
|
||||
"globals": "^15.12.0",
|
||||
"prettier": "^3.3.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
622
pnpm-lock.yaml
generated
622
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,34 @@ import { readFile, writeFile } from 'fs/promises';
|
||||
import llama3Tokenizer from 'llama3-tokenizer-js';
|
||||
import { TokenCleaner } from './TokenCleaner.js';
|
||||
|
||||
/**
|
||||
* @typedef {Object} MarkdownGeneratorOptions
|
||||
* @property {string} [dir='.'] - The directory to process files from
|
||||
* @property {string} [outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
|
||||
* @property {Set<string>} [fileTypeExclusions] - Set of file extensions to exclude
|
||||
* @property {string[]} [fileExclusions] - Array of specific files or patterns to exclude
|
||||
* @property {Object} [customPatterns] - Custom patterns for token cleaning
|
||||
* @property {Object} [customSecretPatterns] - Custom patterns for identifying and redacting secrets
|
||||
* @property {boolean} [verbose=true] - Whether to log detailed information during processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* @class MarkdownGenerator
|
||||
* @description A class that generates markdown documentation from tracked Git files in a project.
|
||||
* It can exclude specific file types and files, clean tokens, and include todo lists.
|
||||
*/
|
||||
export class MarkdownGenerator {
|
||||
/**
|
||||
* Creates an instance of MarkdownGenerator.
|
||||
* @param {Object} [options={}] - Configuration options for the generator
|
||||
* @param {string} [options.dir='.'] - The directory to process files from
|
||||
* @param {string} [options.outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
|
||||
* @param {Set<string>} [options.fileTypeExclusions] - Set of file extensions to exclude (defaults to common image and asset files)
|
||||
* @param {string[]} [options.fileExclusions] - Array of specific files or patterns to exclude
|
||||
* @param {Object} [options.customPatterns] - Custom patterns for token cleaning
|
||||
* @param {Object} [options.customSecretPatterns] - Custom patterns for identifying and redacting secrets
|
||||
* @param {boolean} [options.verbose=true] - Whether to log detailed information during processing
|
||||
*/
|
||||
constructor(options = {}) {
|
||||
this.dir = options.dir || '.';
|
||||
this.outputFilePath = options.outputFilePath || './prompt.md';
|
||||
@@ -35,7 +62,12 @@ export class MarkdownGenerator {
|
||||
this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
|
||||
this.verbose = options.verbose ?? true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions.
|
||||
* @async
|
||||
* @returns {Promise<string[]>} Array of tracked file paths that aren't excluded
|
||||
* @throws {Error} When unable to execute git command or access files
|
||||
*/
|
||||
async getTrackedFiles() {
|
||||
try {
|
||||
const output = this.execCommand('git ls-files');
|
||||
@@ -55,7 +87,17 @@ export class MarkdownGenerator {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a file should be excluded based on the given pattern.
|
||||
* @param {string} filePath - Path of the file to check
|
||||
* @param {string} pattern - Exclusion pattern to match against
|
||||
* @returns {boolean} True if the file should be excluded, false otherwise
|
||||
* @example
|
||||
* // Excludes all files in a directory
|
||||
* isFileExcluded('src/tests/file.js', 'src/tests/*') // returns true
|
||||
* // Excludes specific file extensions in a directory
|
||||
* isFileExcluded('src/assets/image.png', 'src/assets/*.png') // returns true
|
||||
*/
|
||||
isFileExcluded(filePath, pattern) {
|
||||
if (pattern.endsWith('/*')) {
|
||||
const directory = pattern.slice(0, -2);
|
||||
@@ -67,7 +109,13 @@ export class MarkdownGenerator {
|
||||
}
|
||||
return filePath === pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and processes the content of a file, cleaning and redacting sensitive information.
|
||||
* @async
|
||||
* @param {string} filePath - Path to the file to read
|
||||
* @returns {Promise<string>} Cleaned and redacted content of the file
|
||||
* @throws {Error} When unable to read or process the file
|
||||
*/
|
||||
async readFileContent(filePath) {
|
||||
try {
|
||||
const content = await fs.readFile(filePath, 'utf-8');
|
||||
@@ -84,7 +132,12 @@ export class MarkdownGenerator {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates markdown content from all tracked files in the project.
|
||||
* @async
|
||||
* @returns {Promise<string>} Generated markdown content containing all processed files
|
||||
* @throws {Error} When unable to generate markdown content
|
||||
*/
|
||||
async generateMarkdown() {
|
||||
const trackedFiles = await this.getTrackedFiles();
|
||||
if (this.verbose) {
|
||||
@@ -98,7 +151,12 @@ export class MarkdownGenerator {
|
||||
}
|
||||
return markdownContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the content of the project's todo file, creating it if it doesn't exist.
|
||||
* @async
|
||||
* @returns {Promise<string>} Content of the todo file
|
||||
* @throws {Error} When unable to read or create the todo file
|
||||
*/
|
||||
async getTodo() {
|
||||
try {
|
||||
console.log('getting project todo');
|
||||
@@ -113,7 +171,15 @@ export class MarkdownGenerator {
|
||||
console.error('Error reading todo file:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a complete markdown document combining code documentation and todos.
|
||||
* @async
|
||||
* @returns {Promise<Object>} Result object
|
||||
* @returns {boolean} result.success - Whether the operation was successful
|
||||
* @returns {number} [result.tokenCount] - Number of tokens in the generated document
|
||||
* @returns {Error} [result.error] - Error object if operation failed
|
||||
* @throws {Error} When unable to create or write the markdown document
|
||||
*/
|
||||
async createMarkdownDocument() {
|
||||
try {
|
||||
const codeMarkdown = await this.generateMarkdown();
|
||||
@@ -133,7 +199,13 @@ export class MarkdownGenerator {
|
||||
return { success: false, error };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a shell command in the specified directory.
|
||||
* @param {string} command - Shell command to execute
|
||||
* @returns {string} Output of the command
|
||||
* @throws {Error} When command execution fails
|
||||
* @private
|
||||
*/
|
||||
execCommand(command) {
|
||||
try {
|
||||
return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();
|
||||
|
Reference in New Issue
Block a user