diff --git a/package.json b/package.json index 7ae183d..fd248ea 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "code-tokenizer-md", - "version": "1.0.7", + "version": "1.0.8", "type": "module", "main": "dist/index.js", "bin": { @@ -13,15 +13,16 @@ "build": "rm -rf dist && mkdir dist && cp src/*.js dist/", "test": "echo \"No tests specified\" && exit 0", "prepublishOnly": "npm run build", - "dev": "node ./src/cli.js", - "deploy:dev": "pnpm publish .", + "dev": "npx .", + "deploy:dev": "pnpm build && pnpm publish .", "lint": "eslint src/", "lint:fix": "eslint src/ --fix", "format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml}\"", "fix": "pnpm format && pnpm lint:fix" }, "dependencies": { - "llama3-tokenizer-js": "^1.0.0" + "llama3-tokenizer-js": "^1.0.0", + "micromatch": "^4.0.8" }, "peerDependencies": { "node": ">=14.0.0" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c2c693f..8eae635 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,6 +8,9 @@ dependencies: llama3-tokenizer-js: specifier: ^1.0.0 version: 1.2.0 + micromatch: + specifier: ^4.0.8 + version: 4.0.8 node: specifier: '>=14.0.0' version: 22.11.0 @@ -174,6 +177,13 @@ packages: concat-map: 0.0.1 dev: true + /braces@3.0.3: + resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==} + engines: {node: '>=8'} + dependencies: + fill-range: 7.1.1 + dev: false + /callsites@3.1.0: resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} engines: {node: '>=6'} @@ -351,6 +361,13 @@ packages: flat-cache: 4.0.1 dev: true + /fill-range@7.1.1: + resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} + engines: {node: '>=8'} + dependencies: + to-regex-range: 5.0.1 + dev: false + /find-up@5.0.0: resolution: {integrity: sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==} engines: {node: '>=10'} @@ -423,6 +440,11 @@ packages: is-extglob: 2.1.1 dev: true + /is-number@7.0.0: + resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} + engines: {node: '>=0.12.0'} + dev: false + /isexe@2.0.0: resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} dev: true @@ -475,6 +497,14 @@ packages: resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} dev: true + /micromatch@4.0.8: + resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} + engines: {node: '>=8.6'} + dependencies: + braces: 3.0.3 + picomatch: 2.3.1 + dev: false + /minimatch@3.1.2: resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} dependencies: @@ -545,6 +575,11 @@ packages: engines: {node: '>=8'} dev: true + /picomatch@2.3.1: + resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==} + engines: {node: '>=8.6'} + dev: false + /prelude-ls@1.2.1: resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==} engines: {node: '>= 0.8.0'} @@ -594,6 +629,13 @@ packages: resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==} dev: true + /to-regex-range@5.0.1: + resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} + engines: {node: '>=8.0'} + dependencies: + is-number: 7.0.0 + dev: false + /type-check@0.4.0: resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==} engines: {node: '>= 0.8.0'} diff --git a/src/MarkdownGenerator.js b/src/MarkdownGenerator.js index 00e6a21..e29a106 100644 --- a/src/MarkdownGenerator.js +++ b/src/MarkdownGenerator.js @@ -1,9 +1,11 @@ +// MarkdownGenerator.js + import path from 'path'; import { execSync } from 'child_process'; -import fs from 'fs/promises'; import { readFile, writeFile } from 'fs/promises'; import llama3Tokenizer from 'llama3-tokenizer-js'; import { TokenCleaner } from './TokenCleaner.js'; +import micromatch from 'micromatch'; /** * @typedef {Object} MarkdownGeneratorOptions @@ -24,14 +26,7 @@ import { TokenCleaner } from './TokenCleaner.js'; export class MarkdownGenerator { /** * Creates an instance of MarkdownGenerator. - * @param {Object} [options={}] - Configuration options for the generator - * @param {string} [options.dir='.'] - The directory to process files from - * @param {string} [options.outputFilePath='./prompt.md'] - Path where the output markdown file will be saved - * @param {Set} [options.fileTypeExclusions] - Set of file extensions to exclude (defaults to common image and asset files) - * @param {string[]} [options.fileExclusions] - Array of specific files or patterns to exclude - * @param {Object} [options.customPatterns] - Custom patterns for token cleaning - * @param {Object} [options.customSecretPatterns] - Custom patterns for identifying and redacting secrets - * @param {boolean} [options.verbose=true] - Whether to log detailed information during processing + * @param {MarkdownGeneratorOptions} [options={}] - Configuration options for the generator */ constructor(options = {}) { this.dir = options.dir || '.'; @@ -110,7 +105,7 @@ export class MarkdownGenerator { '**/jsconfig.json', '**/jsconfig*.json', '**/package-lock.json', - + '**/.prettierignore', // Environment and variables '**/.env*', '**/*.vars', @@ -171,9 +166,11 @@ export class MarkdownGenerator { '**/temp/', '**/*.log' ]; + this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns); - this.verbose = options.verbose ?? true; + this.verbose = options.verbose !== undefined ? options.verbose : true; } + /** * Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions. * @async @@ -183,15 +180,21 @@ export class MarkdownGenerator { async getTrackedFiles() { try { const output = this.execCommand('git ls-files'); - const trackedFiles = output.split('\n').filter(file => file.length > 0); + const trackedFiles = output.split('\n').filter(file => file.trim().length > 0); if (this.verbose) { console.log(`Total tracked files: ${trackedFiles.length}`); } - return trackedFiles.filter(file => { + // Use micromatch to filter out excluded files + const filteredFiles = trackedFiles.filter(file => { const fileExt = path.extname(file).toLowerCase(); - const isExcluded = this.fileExclusions.some(pattern => this.isFileExcluded(file, pattern)); - return !this.fileTypeExclusions.has(fileExt) && !isExcluded; + return !this.fileTypeExclusions.has(fileExt) && !micromatch.isMatch(file, this.fileExclusions, { dot: true }); }); + if (this.verbose) { + const excludedCount = trackedFiles.length - filteredFiles.length; + console.log(`Excluded files: ${excludedCount}`); + console.log(`Files to process after exclusions: ${filteredFiles.length}`); + } + return filteredFiles; } catch (error) { if (this.verbose) { console.error('Error fetching tracked files:', error); @@ -199,72 +202,7 @@ export class MarkdownGenerator { return []; } } - /** - * Determines if a file should be excluded based on the given pattern. - * @param {string} filePath - Path of the file to check - * @param {string} pattern - Exclusion pattern to match against - * @returns {boolean} True if the file should be excluded, false otherwise - * @example - * // Excludes all files in a directory - * isFileExcluded('src/tests/file.js', 'src/tests/*') // returns true - * // Excludes specific file extensions in a directory - * isFileExcluded('src/assets/image.png', 'src/assets/*.png') // returns true - */ - isFileExcluded(filePath, pattern) { - // Normalize paths to use forward slashes - filePath = filePath.replace(/\\/g, '/'); - pattern = pattern.replace(/\\/g, '/'); - // Handle directory-only patterns (ending with /) - if (pattern.endsWith('/')) { - const directory = pattern.slice(0, -1); - if (directory.startsWith('**/')) { - const dirToMatch = directory.slice(3); - return filePath.includes(`${dirToMatch}/`); - } - return filePath.startsWith(directory + '/'); - } - - // Handle brace expansion for extensions {js,ts} - if (pattern.includes('{') && pattern.includes('}')) { - const [basePath, extensionsGroup] = pattern.split('{'); - const extensions = extensionsGroup.slice(0, -1).split(','); - return extensions.some(ext => - this.isFileExcluded(filePath, basePath + ext) - ); - } - - // Handle directory/*.ext patterns (extension wildcards in specific directories) - if (pattern.includes('/*.')) { - const [directory, ext] = pattern.split('/*.'); - const relativePath = filePath.slice(directory.length + 1); - return filePath.startsWith(directory + '/') && - !relativePath.includes('/') && - filePath.endsWith('.' + ext); - } - - // Handle pure extension wildcards (*.ext) - if (pattern.startsWith('*.')) { - return filePath.endsWith(pattern.slice(1)); - } - - // Convert glob pattern to regex for remaining cases - const regexPattern = pattern - // Escape special regex characters except * and ? - .replace(/[.+^${}()|[\]\\]/g, '\\$&') - // Replace ** with special placeholder - .replace(/\*\*/g, '{{GLOBSTAR}}') - // Replace * with regex pattern - .replace(/\*/g, '[^/]*') - // Replace globstar placeholder with proper regex - .replace(/{{GLOBSTAR}}/g, '.*') - // Anchor the regex - .replace(/^/, '^') - .replace(/$/, '$'); - - const regex = new RegExp(regexPattern); - return regex.test(filePath); - } /** * Reads and processes the content of a file, cleaning and redacting sensitive information. * @async @@ -274,7 +212,7 @@ export class MarkdownGenerator { */ async readFileContent(filePath) { try { - const content = await fs.readFile(filePath, 'utf-8'); + const content = await readFile(filePath, 'utf-8'); const cleanedAndRedactedContent = this.tokenCleaner.cleanAndRedact(content); if (this.verbose) { const tokenCount = llama3Tokenizer.encode(cleanedAndRedactedContent).length; @@ -288,6 +226,7 @@ export class MarkdownGenerator { return ''; } } + /** * Generates markdown content from all tracked files in the project. * @async @@ -302,11 +241,17 @@ export class MarkdownGenerator { let markdownContent = '# Project Files\n\n'; for (const file of trackedFiles) { - const content = await this.readFileContent(path.join(this.dir, file)); - markdownContent += `## ${file}\n~~~\n${content.trim()}\n~~~\n`; + const absolutePath = path.join(this.dir, file); + const content = await this.readFileContent(absolutePath); + if (content.trim()) { // Only include files with content after cleaning + markdownContent += `## ${file}\n~~~\n${content.trim()}\n~~~\n\n`; + } else if (this.verbose) { + console.log(`Skipping ${file} as it has no content after cleaning.`); + } } return markdownContent; } + /** * Retrieves the content of the project's todo file, creating it if it doesn't exist. * @async @@ -314,19 +259,28 @@ export class MarkdownGenerator { * @throws {Error} When unable to read or create the todo file */ async getTodo() { + const todoPath = path.join(this.dir, 'todo'); try { - console.log('Reading todo file'); - return await readFile('./todo', 'utf-8'); + if (this.verbose) { + console.log('Reading todo file'); + } + return await readFile(todoPath, 'utf-8'); } catch (error) { if (error.code === 'ENOENT') { // File does not exist - console.log("File not found, creating a new 'todo' file."); - await writeFile('./todo', ''); // Create an empty 'todo' file - return this.getTodo(); // Call the function again + if (this.verbose) { + console.log("File not found, creating a new 'todo' file."); + } + await writeFile(todoPath, ''); // Create an empty 'todo' file + return await this.getTodo(); // Await the recursive call } - console.error('Error reading todo file:', error); + if (this.verbose) { + console.error('Error reading todo file:', error); + } + throw error; } } + /** * Creates a complete markdown document combining code documentation and todos. * @async @@ -340,8 +294,8 @@ export class MarkdownGenerator { try { const codeMarkdown = await this.generateMarkdown(); const todos = await this.getTodo(); - const markdown = codeMarkdown + `\n---\n${todos}\n`; - await fs.writeFile(this.outputFilePath, markdown); + const markdown = codeMarkdown + `\n---\n\n${todos}\n`; + await writeFile(this.outputFilePath, markdown); if (this.verbose) { console.log(`Markdown document created at ${this.outputFilePath}`); const totalTokens = llama3Tokenizer.encode(markdown).length; @@ -355,6 +309,7 @@ export class MarkdownGenerator { return { success: false, error }; } } + /** * Executes a shell command in the specified directory. * @param {string} command - Shell command to execute diff --git a/src/cli.js b/src/cli.js index bbac1e5..de1bb05 100755 --- a/src/cli.js +++ b/src/cli.js @@ -1,4 +1,5 @@ #!/usr/bin/env node +console.log("RUNNING TOKENIZER") import { MarkdownGenerator } from './MarkdownGenerator.js'; const generator = new MarkdownGenerator();