convert project to typescript

2024-11-21 13:23:45 -05:00
parent 1226a742b5
commit 88dced2c4d
11 changed files with 218 additions and 165 deletions
--- a/src/MarkdownGenerator.ts
+++ b/src/MarkdownGenerator.ts
@@ -0,0 +1,209 @@
+// MarkdownGenerator.ts
+
+import path from 'path';
+import { execSync } from 'child_process';
+import { readFile, writeFile } from 'fs/promises';
+import llama3Tokenizer from 'llama3-tokenizer-js';
+import { TokenCleaner } from './TokenCleaner.js';
+import micromatch from 'micromatch';
+import fileTypeExclusions from './fileTypeExclusions.js';
+import fileExclusions from './fileExclusions.js';
+
+interface MarkdownGeneratorOptions {
+  dir?: string;
+  outputFilePath?: string;
+  fileTypeExclusions?: Set<string>;
+  fileExclusions?: string[];
+  customPatterns?: Record<string, any>;
+  customSecretPatterns?: Record<string, any>;
+  verbose?: boolean;
+}
+
+/**
+ * @class MarkdownGenerator
+ * @description A class that generates markdown documentation from tracked Git files in a project.
+ * It can exclude specific file types and files, clean tokens, and include todo lists.
+ */
+export class MarkdownGenerator {
+  private dir: string;
+  private outputFilePath: string;
+  private fileTypeExclusions: Set<string>;
+  private fileExclusions: string[];
+  private tokenCleaner: TokenCleaner;
+  private verbose: boolean;
+
+  /**
+   * Creates an instance of MarkdownGenerator.
+   * @param {MarkdownGeneratorOptions} [options={}] - Configuration options for the generator
+   */
+  constructor(options: MarkdownGeneratorOptions = {}) {
+    this.dir = options.dir || '.';
+    this.outputFilePath = options.outputFilePath || './prompt.md';
+
+    this.fileTypeExclusions = new Set(
+      options.fileTypeExclusions || fileTypeExclusions,
+    );
+
+    this.fileExclusions = options.fileExclusions || fileExclusions;
+
+    this.tokenCleaner = new TokenCleaner(options.customPatterns, options.customSecretPatterns);
+    this.verbose = options.verbose !== undefined ? options.verbose : true;
+  }
+
+  /**
+   * Retrieves a list of files tracked by Git, excluding those specified in fileTypeExclusions and fileExclusions.
+   * @async
+   * @returns {Promise<string[]>} Array of tracked file paths that aren't excluded
+   * @throws {Error} When unable to execute git command or access files
+   */
+  async getTrackedFiles(): Promise<string[]> {
+    try {
+      const output = this.execCommand('git ls-files');
+      const trackedFiles = output.split('\n').filter(file => file.trim().length > 0);
+      if (this.verbose) {
+        console.log(`Total tracked files: ${trackedFiles.length}`);
+      }
+      // Use micromatch to filter out excluded files
+      const filteredFiles = trackedFiles.filter(file => {
+        const fileExt = path.extname(file).toLowerCase();
+        return !this.fileTypeExclusions.has(fileExt) && !micromatch.isMatch(file, this.fileExclusions, { dot: true });
+      });
+      if (this.verbose) {
+        const excludedCount = trackedFiles.length - filteredFiles.length;
+        console.log(`Excluded files: ${excludedCount}`);
+        console.log(`Files to process after exclusions: ${filteredFiles.length}`);
+      }
+      return filteredFiles;
+    } catch (error) {
+      if (this.verbose) {
+        console.error('Error fetching tracked files:', error);
+      }
+      return [];
+    }
+  }
+
+  /**
+   * Reads and processes the content of a file, cleaning and redacting sensitive information.
+   * @async
+   * @param {string} filePath - Path to the file to read
+   * @returns {Promise<string>} Cleaned and redacted content of the file
+   * @throws {Error} When unable to read or process the file
+   */
+  async readFileContent(filePath: string): Promise<string> {
+    try {
+      const content = await readFile(filePath, 'utf-8');
+      const cleanedAndRedactedContent = this.tokenCleaner.cleanAndRedact(content);
+      if (this.verbose) {
+        const tokenCount = llama3Tokenizer.encode(cleanedAndRedactedContent).length;
+        console.log(`${filePath}: Tokens[${tokenCount}]`);
+      }
+      return cleanedAndRedactedContent;
+    } catch (error) {
+      if (this.verbose) {
+        console.error(`Error reading file ${filePath}:`, error);
+      }
+      return '';
+    }
+  }
+
+  /**
+   * Generates markdown content from all tracked files in the project.
+   * @async
+   * @returns {Promise<string>} Generated markdown content containing all processed files
+   * @throws {Error} When unable to generate markdown content
+   */
+  async generateMarkdown(): Promise<string> {
+    const trackedFiles = await this.getTrackedFiles();
+    if (this.verbose) {
+      console.log(`Generating markdown for ${trackedFiles.length} files`);
+    }
+    let markdownContent = '# Project Files\n\n';
+
+    for (const file of trackedFiles) {
+      const absolutePath = path.join(this.dir, file);
+      const content = await this.readFileContent(absolutePath);
+      if (content.trim()) { // Only include files with content after cleaning
+        markdownContent += `## ${file}\n~~~\n${content.trim()}\n~~~\n\n`;
+      } else if (this.verbose) {
+        console.log(`Skipping ${file} as it has no content after cleaning.`);
+      }
+    }
+    return markdownContent;
+  }
+
+  /**
+   * Retrieves the content of the project's todo file, creating it if it doesn't exist.
+   * @async
+   * @returns {Promise<string>} Content of the todo file
+   * @throws {Error} When unable to read or create the todo file
+   */
+  async getTodo(): Promise<string> {
+    const todoPath = path.join(this.dir, 'todo');
+    try {
+      if (this.verbose) {
+        console.log('Reading todo file');
+      }
+      return await readFile(todoPath, 'utf-8');
+    } catch (error: any) {
+      if (error.code === 'ENOENT') {
+        // File does not exist
+        if (this.verbose) {
+          console.log('File not found, creating a new \'todo\' file.');
+        }
+        await writeFile(todoPath, ''); // Create an empty 'todo' file
+        return await this.getTodo(); // Await the recursive call
+      }
+      if (this.verbose) {
+        console.error('Error reading todo file:', error);
+      }
+      throw error;
+    }
+  }
+
+  /**
+   * Creates a complete markdown document combining code documentation and todos.
+   * @async
+   * @returns {Promise<Object>} Result object
+   * @returns {boolean} result.success - Whether the operation was successful
+   * @returns {number} [result.tokenCount] - Number of tokens in the generated document
+   * @returns {Error} [result.error] - Error object if operation failed
+   * @throws {Error} When unable to create or write the markdown document
+   */
+  async createMarkdownDocument(): Promise<{ success: boolean, tokenCount?: number, error?: Error }> {
+    try {
+      const codeMarkdown = await this.generateMarkdown();
+      const todos = await this.getTodo();
+      const markdown = codeMarkdown + `\n---\n\n${todos}\n`;
+      await writeFile(this.outputFilePath, markdown);
+      if (this.verbose) {
+        console.log(`Markdown document created at ${this.outputFilePath}`);
+        const totalTokens = llama3Tokenizer.encode(markdown).length;
+        console.log({ total_tokens: totalTokens });
+      }
+      return { success: true, tokenCount: llama3Tokenizer.encode(markdown).length };
+    } catch (error) {
+      if (this.verbose) {
+        console.error('Error writing markdown document:', error);
+      }
+      return { success: false, error };
+    }
+  }
+
+  /**
+   * Executes a shell command in the specified directory.
+   * @param {string} command - Shell command to execute
+   * @returns {string} Output of the command
+   * @throws {Error} When command execution fails
+   * @private
+   */
+  private execCommand(command: string): string {
+    try {
+      return execSync(command, { cwd: this.dir, encoding: 'utf-8' }).toString().trim();
+    } catch (error) {
+      if (this.verbose) {
+        console.error(`Error executing command: ${command}`, error);
+      }
+      throw error;
+    }
+  }
+}