code formatting + documentation for MarkdownGenerator
This commit is contained in:
4
.github/workflows/publish.yml
vendored
4
.github/workflows/publish.yml
vendored
@@ -17,8 +17,8 @@ jobs:
|
|||||||
- name: Setup Node
|
- name: Setup Node
|
||||||
uses: actions/setup-node@v4
|
uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: '18'
|
node-version: "18"
|
||||||
registry-url: 'https://registry.npmjs.org'
|
registry-url: "https://registry.npmjs.org"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pnpm install
|
run: pnpm install
|
||||||
|
21
README.md
21
README.md
@@ -2,11 +2,14 @@
|
|||||||
|
|
||||||
> Created to push creative limits.
|
> Created to push creative limits.
|
||||||
|
|
||||||
Process git repository files into markdown with token counting and sensitive data redaction.
|
Process git repository files into markdown with token counting and sensitive
|
||||||
|
data redaction.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
`code-tokenizer-md` is a Node.js tool that processes git repository files, cleans code, redacts sensitive information, and generates markdown documentation with token counts.
|
`code-tokenizer-md` is a Node.js tool that processes git repository files,
|
||||||
|
cleans code, redacts sensitive information, and generates markdown documentation
|
||||||
|
with token counts.
|
||||||
|
|
||||||
```mermaid
|
```mermaid
|
||||||
graph TD
|
graph TD
|
||||||
@@ -26,18 +29,21 @@ graph TD
|
|||||||
## Features
|
## Features
|
||||||
|
|
||||||
### Data Processing
|
### Data Processing
|
||||||
|
|
||||||
- Reads files from git repository
|
- Reads files from git repository
|
||||||
- Removes comments and unnecessary whitespace
|
- Removes comments and unnecessary whitespace
|
||||||
- Redacts sensitive information (API keys, tokens, etc.)
|
- Redacts sensitive information (API keys, tokens, etc.)
|
||||||
- Counts tokens using llama3-tokenizer
|
- Counts tokens using llama3-tokenizer
|
||||||
|
|
||||||
### Analysis Types
|
### Analysis Types
|
||||||
|
|
||||||
- Token counting per file
|
- Token counting per file
|
||||||
- Total token usage
|
- Total token usage
|
||||||
- File content analysis
|
- File content analysis
|
||||||
- Sensitive data detection
|
- Sensitive data detection
|
||||||
|
|
||||||
### Data Presentation
|
### Data Presentation
|
||||||
|
|
||||||
- Markdown formatted output
|
- Markdown formatted output
|
||||||
- Code block formatting
|
- Code block formatting
|
||||||
- Token count summaries
|
- Token count summaries
|
||||||
@@ -70,7 +76,7 @@ import { MarkdownGenerator } from 'code-tokenizer-md';
|
|||||||
|
|
||||||
const generator = new MarkdownGenerator({
|
const generator = new MarkdownGenerator({
|
||||||
dir: './project',
|
dir: './project',
|
||||||
outputFilePath: './output.md'
|
outputFilePath: './output.md',
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await generator.createMarkdownDocument();
|
const result = await generator.createMarkdownDocument();
|
||||||
@@ -105,12 +111,8 @@ src/
|
|||||||
|
|
||||||
```javascript
|
```javascript
|
||||||
const generator = new MarkdownGenerator({
|
const generator = new MarkdownGenerator({
|
||||||
customPatterns: [
|
customPatterns: [{ regex: /TODO:/g, replacement: '' }],
|
||||||
{ regex: /TODO:/g, replacement: '' }
|
customSecretPatterns: [{ regex: /mySecret/g, replacement: '[REDACTED]' }],
|
||||||
],
|
|
||||||
customSecretPatterns: [
|
|
||||||
{ regex: /mySecret/g, replacement: '[REDACTED]' }
|
|
||||||
]
|
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -131,6 +133,7 @@ const generator = new MarkdownGenerator({
|
|||||||
- Update the README for significant changes
|
- Update the README for significant changes
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT © 2024 Geoff Seemueller
|
MIT © 2024 Geoff Seemueller
|
||||||
|
|
||||||
## Note
|
## Note
|
||||||
|
@@ -25,24 +25,27 @@ export default [
|
|||||||
'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
|
'no-unused-vars': ['error', { argsIgnorePattern: '^_' }],
|
||||||
'no-constant-condition': ['error', { checkLoops: false }],
|
'no-constant-condition': ['error', { checkLoops: false }],
|
||||||
'no-multiple-empty-lines': ['error', { max: 1, maxEOF: 0 }],
|
'no-multiple-empty-lines': ['error', { max: 1, maxEOF: 0 }],
|
||||||
'quotes': ['error', 'single', { avoidEscape: true }],
|
quotes: ['error', 'single', { avoidEscape: true }],
|
||||||
'semi': ['error', 'always'],
|
semi: ['error', 'always'],
|
||||||
'indent': ['error', 2, { SwitchCase: 1 }],
|
indent: ['error', 2, { SwitchCase: 1 }],
|
||||||
'comma-dangle': ['error', 'always-multiline'],
|
'comma-dangle': ['error', 'always-multiline'],
|
||||||
'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
|
'arrow-parens': ['error', 'as-needed'], // Changed from 'avoid' to 'as-needed'
|
||||||
'object-curly-spacing': ['error', 'always'],
|
'object-curly-spacing': ['error', 'always'],
|
||||||
'array-bracket-spacing': ['error', 'never'],
|
'array-bracket-spacing': ['error', 'never'],
|
||||||
'space-before-function-paren': ['error', {
|
'space-before-function-paren': [
|
||||||
|
'error',
|
||||||
|
{
|
||||||
anonymous: 'never',
|
anonymous: 'never',
|
||||||
named: 'never',
|
named: 'never',
|
||||||
asyncArrow: 'always',
|
asyncArrow: 'always',
|
||||||
}],
|
},
|
||||||
|
],
|
||||||
'no-trailing-spaces': 'error',
|
'no-trailing-spaces': 'error',
|
||||||
'eol-last': ['error', 'always'],
|
'eol-last': ['error', 'always'],
|
||||||
'prefer-const': 'error',
|
'prefer-const': 'error',
|
||||||
'no-var': 'error',
|
'no-var': 'error',
|
||||||
'eqeqeq': ['error', 'always'],
|
eqeqeq: ['error', 'always'],
|
||||||
'curly': ['error', 'all'],
|
curly: ['error', 'all'],
|
||||||
'brace-style': ['error', '1tbs', { allowSingleLine: false }],
|
'brace-style': ['error', '1tbs', { allowSingleLine: false }],
|
||||||
'keyword-spacing': ['error', { before: true, after: true }],
|
'keyword-spacing': ['error', { before: true, after: true }],
|
||||||
'space-infix-ops': 'error',
|
'space-infix-ops': 'error',
|
||||||
@@ -56,8 +59,8 @@ export default [
|
|||||||
{
|
{
|
||||||
files: ['src/TokenCleaner.js'],
|
files: ['src/TokenCleaner.js'],
|
||||||
rules: {
|
rules: {
|
||||||
'no-useless-escape': 'off'
|
'no-useless-escape': 'off',
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
files: ['**/*.test.js', '**/*.spec.js'],
|
files: ['**/*.test.js', '**/*.spec.js'],
|
||||||
|
@@ -15,8 +15,8 @@
|
|||||||
"prepublishOnly": "npm run build",
|
"prepublishOnly": "npm run build",
|
||||||
"dev": "node ./src/cli.js",
|
"dev": "node ./src/cli.js",
|
||||||
"deploy:dev": "pnpm publish .",
|
"deploy:dev": "pnpm publish .",
|
||||||
"lint": "eslint . --ext .js,.jsx,.ts,.tsx",
|
"lint": "eslint src/",
|
||||||
"lint:fix": "eslint . --ext .js,.jsx,.ts,.tsx --fix",
|
"lint:fix": "eslint src/ --fix",
|
||||||
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"",
|
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"",
|
||||||
"fix": "pnpm format && pnpm lint:fix"
|
"fix": "pnpm format && pnpm lint:fix"
|
||||||
},
|
},
|
||||||
|
622
pnpm-lock.yaml
generated
622
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,34 @@ import { readFile, writeFile } from 'fs/promises';
|
|||||||
import llama3Tokenizer from 'llama3-tokenizer-js';
|
import llama3Tokenizer from 'llama3-tokenizer-js';
|
||||||
import { TokenCleaner } from './TokenCleaner.js';
|
import { TokenCleaner } from './TokenCleaner.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @typedef {Object} MarkdownGeneratorOptions
|
||||||
|
* @property {string} [dir='.'] - The directory to process files from
|
||||||
|
* @property {string} [outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
|
||||||
|
* @property {Set<string>} [fileTypeExclusions] - Set of file extensions to exclude
|
||||||
|
* @property {string[]} [fileExclusions] - Array of specific files or patterns to exclude
|
||||||
|
* @property {Object} [customPatterns] - Custom patterns for token cleaning
|
||||||
|
* @property {Object} [customSecretPatterns] - Custom patterns for identifying and redacting secrets
|
||||||
|
* @property {boolean} [verbose=true] - Whether to log detailed information during processing
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @class MarkdownGenerator
|
||||||
|
* @description A class that generates markdown documentation from tracked Git files in a project.
|
||||||
|
* It can exclude specific file types and files, clean tokens, and include todo lists.
|
||||||
|
*/
|
||||||
export class MarkdownGenerator {
|
export class MarkdownGenerator {
|
||||||
|
/**
|
||||||
|
* Creates an instance of MarkdownGenerator.
|
||||||
|
* @param {Object} [options={}] - Configuration options for the generator
|
||||||
|
* @param {string} [options.dir='.'] - The directory to process files from
|
||||||
|
* @param {string} [options.outputFilePath='./prompt.md'] - Path where the output markdown file will be saved
|
||||||
|
* @param {Set<string>} [options.fileTypeExclusions] - Set of file extensions to exclude (defaults to common image and asset files)
|
||||||
|
* @param {string[]} [options.fileExclusions] - Array of specific files or patterns to exclude
|
||||||
|
* @param {Object} [options.customPatterns] - Custom patterns for token cleaning
|
||||||
|
* @param {Object} [options.customSecretPatterns] - Custom patterns for identifying and redacting secrets
|
||||||
|
* @param {boolean} [options.verbose=true] - Whether to log detailed information during processing
|
||||||
|
*/
|
||||||
constructor(options = {}) {
|
constructor(options = {}) {
|
||||||
this.dir = options.dir || '.';
|
this.dir = options.dir || '.';
|
||||||
this.outputFilePath = options.outputFilePath || './prompt.md';
|
this.outputFilePath = options.outputFilePath || './prompt.md';
|
||||||
@@ -35,7 +62,12 @@ export class MarkdownGenerator {
|
|||||||
this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
|
this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
|
||||||
this.verbose = options.verbose ?? true;
|
this.verbose = options.verbose ?? true;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions.
|
||||||
|
* @async
|
||||||
|
* @returns {Promise<string[]>} Array of tracked file paths that aren't excluded
|
||||||
|
* @throws {Error} When unable to execute git command or access files
|
||||||
|
*/
|
||||||
async getTrackedFiles() {
|
async getTrackedFiles() {
|
||||||
try {
|
try {
|
||||||
const output = this.execCommand('git ls-files');
|
const output = this.execCommand('git ls-files');
|
||||||
@@ -55,7 +87,17 @@ export class MarkdownGenerator {
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Determines if a file should be excluded based on the given pattern.
|
||||||
|
* @param {string} filePath - Path of the file to check
|
||||||
|
* @param {string} pattern - Exclusion pattern to match against
|
||||||
|
* @returns {boolean} True if the file should be excluded, false otherwise
|
||||||
|
* @example
|
||||||
|
* // Excludes all files in a directory
|
||||||
|
* isFileExcluded('src/tests/file.js', 'src/tests/*') // returns true
|
||||||
|
* // Excludes specific file extensions in a directory
|
||||||
|
* isFileExcluded('src/assets/image.png', 'src/assets/*.png') // returns true
|
||||||
|
*/
|
||||||
isFileExcluded(filePath, pattern) {
|
isFileExcluded(filePath, pattern) {
|
||||||
if (pattern.endsWith('/*')) {
|
if (pattern.endsWith('/*')) {
|
||||||
const directory = pattern.slice(0, -2);
|
const directory = pattern.slice(0, -2);
|
||||||
@@ -67,7 +109,13 @@ export class MarkdownGenerator {
|
|||||||
}
|
}
|
||||||
return filePath === pattern;
|
return filePath === pattern;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Reads and processes the content of a file, cleaning and redacting sensitive information.
|
||||||
|
* @async
|
||||||
|
* @param {string} filePath - Path to the file to read
|
||||||
|
* @returns {Promise<string>} Cleaned and redacted content of the file
|
||||||
|
* @throws {Error} When unable to read or process the file
|
||||||
|
*/
|
||||||
async readFileContent(filePath) {
|
async readFileContent(filePath) {
|
||||||
try {
|
try {
|
||||||
const content = await fs.readFile(filePath, 'utf-8');
|
const content = await fs.readFile(filePath, 'utf-8');
|
||||||
@@ -84,7 +132,12 @@ export class MarkdownGenerator {
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Generates markdown content from all tracked files in the project.
|
||||||
|
* @async
|
||||||
|
* @returns {Promise<string>} Generated markdown content containing all processed files
|
||||||
|
* @throws {Error} When unable to generate markdown content
|
||||||
|
*/
|
||||||
async generateMarkdown() {
|
async generateMarkdown() {
|
||||||
const trackedFiles = await this.getTrackedFiles();
|
const trackedFiles = await this.getTrackedFiles();
|
||||||
if (this.verbose) {
|
if (this.verbose) {
|
||||||
@@ -98,7 +151,12 @@ export class MarkdownGenerator {
|
|||||||
}
|
}
|
||||||
return markdownContent;
|
return markdownContent;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Retrieves the content of the project's todo file, creating it if it doesn't exist.
|
||||||
|
* @async
|
||||||
|
* @returns {Promise<string>} Content of the todo file
|
||||||
|
* @throws {Error} When unable to read or create the todo file
|
||||||
|
*/
|
||||||
async getTodo() {
|
async getTodo() {
|
||||||
try {
|
try {
|
||||||
console.log('getting project todo');
|
console.log('getting project todo');
|
||||||
@@ -113,7 +171,15 @@ export class MarkdownGenerator {
|
|||||||
console.error('Error reading todo file:', error);
|
console.error('Error reading todo file:', error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Creates a complete markdown document combining code documentation and todos.
|
||||||
|
* @async
|
||||||
|
* @returns {Promise<Object>} Result object
|
||||||
|
* @returns {boolean} result.success - Whether the operation was successful
|
||||||
|
* @returns {number} [result.tokenCount] - Number of tokens in the generated document
|
||||||
|
* @returns {Error} [result.error] - Error object if operation failed
|
||||||
|
* @throws {Error} When unable to create or write the markdown document
|
||||||
|
*/
|
||||||
async createMarkdownDocument() {
|
async createMarkdownDocument() {
|
||||||
try {
|
try {
|
||||||
const codeMarkdown = await this.generateMarkdown();
|
const codeMarkdown = await this.generateMarkdown();
|
||||||
@@ -133,7 +199,13 @@ export class MarkdownGenerator {
|
|||||||
return { success: false, error };
|
return { success: false, error };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Executes a shell command in the specified directory.
|
||||||
|
* @param {string} command - Shell command to execute
|
||||||
|
* @returns {string} Output of the command
|
||||||
|
* @throws {Error} When command execution fails
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
execCommand(command) {
|
execCommand(command) {
|
||||||
try {
|
try {
|
||||||
return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();
|
return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();
|
||||||
|
Reference in New Issue
Block a user