This commit is contained in:
2024-10-30 11:59:30 -04:00
commit 17031d8be8
8 changed files with 342 additions and 0 deletions

98
src/MarkdownGenerator.js Normal file
View File

@@ -0,0 +1,98 @@
// src/MarkdownGenerator.js
import path from 'path';
import { execSync } from 'child_process';
import fs from 'fs/promises';
import llama3Tokenizer from 'llama3-tokenizer-js';
import { TokenCleaner } from './TokenCleaner.js';
export class MarkdownGenerator {
constructor(options = {}) {
this.dir = options.dir || '.';
this.outputFilePath = options.outputFilePath || './prompt.md';
this.fileTypeExclusions = new Set(options.fileTypeExclusions || ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp', '.tiff', '.lockb', '.yaml', '.ico', '.ttf', '.css']);
this.fileExclusions = options.fileExclusions || ['prompt.js', '.gitignore', '.env', '.dev.vars'];
this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
this.verbose = options.verbose ?? true;
}
async getTrackedFiles() {
try {
const output = this.execCommand('git ls-files');
const trackedFiles = output.split('\n').filter(file => file.length > 0);
if (this.verbose) console.log(`Total tracked files: ${trackedFiles.length}`);
return trackedFiles.filter(file => {
const fileExt = path.extname(file).toLowerCase();
const isExcluded = this.fileExclusions.some(pattern => this.isFileExcluded(file, pattern));
return !this.fileTypeExclusions.has(fileExt) && !isExcluded;
});
} catch (error) {
if (this.verbose) console.error('Error fetching tracked files:', error);
return [];
}
}
isFileExcluded(filePath, pattern) {
if (pattern.endsWith('/*')) {
const directory = pattern.slice(0, -2);
return filePath.startsWith(directory);
}
if (pattern.includes('/*')) {
const [directory, ext] = pattern.split('/*');
return filePath.startsWith(directory) && filePath.endsWith(ext);
}
return filePath === pattern;
}
async readFileContent(filePath) {
try {
const content = await fs.readFile(filePath, 'utf-8');
const cleanedAndRedactedContent = this.tokenCleaner.cleanAndRedact(content);
if (this.verbose) {
const tokenCount = llama3Tokenizer.encode(cleanedAndRedactedContent).length;
console.log(`${filePath}: Tokens[${tokenCount}]`);
}
return cleanedAndRedactedContent;
} catch (error) {
if (this.verbose) console.error(`Error reading file ${filePath}:`, error);
return '';
}
}
async generateMarkdown() {
const trackedFiles = await this.getTrackedFiles();
if (this.verbose) console.log(`Generating markdown for ${trackedFiles.length} files`);
let markdownContent = '# Project Files\n\n';
for (const file of trackedFiles) {
const content = await this.readFileContent(path.join(this.dir, file));
markdownContent += `## ${file}\n~~~\n${content.trim()}\n~~~\n`;
}
return markdownContent;
}
async createMarkdownDocument() {
try {
const markdownContent = await this.generateMarkdown();
await fs.writeFile(this.outputFilePath, markdownContent);
if (this.verbose) {
console.log(`Markdown document created at ${this.outputFilePath}`);
const totalTokens = llama3Tokenizer.encode(markdownContent).length;
console.log({total_tokens: totalTokens});
}
return { success: true, tokenCount: llama3Tokenizer.encode(markdownContent).length };
} catch (error) {
if (this.verbose) console.error('Error writing markdown document:', error);
return { success: false, error };
}
}
execCommand(command) {
try {
return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();
} catch (error) {
if (this.verbose) console.error(`Error executing command: ${command}`, error);
throw error;
}
}
}