257 lines
9.8 KiB
Python
257 lines
9.8 KiB
Python
import os
|
|
import sys
|
|
import fnmatch
|
|
|
|
def get_language(file_path):
|
|
"""Detect programming language based on file extension."""
|
|
extension_map = {
|
|
'.html': 'html', '.htm': 'html', '.css': 'css', '.js': 'javascript',
|
|
'.mjs': 'javascript', '.cjs': 'javascript', '.py': 'python',
|
|
'.pyc': 'python', '.pyo': 'python', '.md': 'markdown',
|
|
'.markdown': 'markdown', '.txt': 'text', '.json': 'json',
|
|
'.geojson': 'json', '.xml': 'xml', '.php': 'php', '.phtml': 'php',
|
|
'.sql': 'sql', '.sh': 'bash', '.bash': 'bash', '.zsh': 'bash',
|
|
'.fish': 'fish', '.yml': 'yaml', '.yaml': 'yaml', '.toml': 'toml',
|
|
'.ini': 'ini', '.cfg': 'ini', '.conf': 'ini', '.config': 'ini',
|
|
'.log': 'text', '.bat': 'batch', '.cmd': 'batch', '.ps1': 'powershell',
|
|
'.psm1': 'powershell', '.psd1': 'powershell', '.rb': 'ruby',
|
|
'.gemspec': 'ruby', '.go': 'go', '.java': 'java', '.class': 'java',
|
|
'.c': 'c', '.h': 'cpp', '.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp',
|
|
'.c++': 'cpp', '.hpp': 'cpp', '.hh': 'cpp', '.hxx': 'cpp',
|
|
'.cs': 'csharp', '.csx': 'csharp', '.swift': 'swift', '.kt': 'kotlin',
|
|
'.kts': 'kotlin', '.rs': 'rust', '.ts': 'typescript', '.tsx': 'typescript',
|
|
'.mts': 'typescript', '.cts': 'typescript', '.jsx': 'javascript',
|
|
'.vue': 'vue', '.scss': 'scss', '.sass': 'sass', '.less': 'less',
|
|
'.styl': 'stylus', '.stylus': 'stylus', '.graphql': 'graphql',
|
|
'.gql': 'graphql', '.dockerfile': 'dockerfile', '.dockerignore': 'dockerignore',
|
|
'.editorconfig': 'ini', '.gitignore': 'gitignore', '.gitattributes': 'gitattributes',
|
|
'.gitmodules': 'gitmodules', '.prettierrc': 'json', '.eslintrc': 'json',
|
|
'.babelrc': 'json', '.npmignore': 'gitignore', '.lock': 'text',
|
|
'.env': 'env', '.env.local': 'env', '.env.development': 'env',
|
|
'.env.production': 'env', '.env.test': 'env',
|
|
}
|
|
|
|
ext = os.path.splitext(file_path)[1].lower()
|
|
return extension_map.get(ext, '')
|
|
|
|
def should_exclude(file_path, root_dir):
|
|
"""Determine if a file should be excluded from copying."""
|
|
abs_path = os.path.abspath(file_path)
|
|
rel_path = os.path.relpath(abs_path, root_dir)
|
|
rel_path_forward = rel_path.replace(os.sep, '/')
|
|
basename = os.path.basename(file_path)
|
|
|
|
# Exclude specific files
|
|
exclude_files = {'.pyc'}
|
|
if rel_path_forward in exclude_files or basename in exclude_files:
|
|
return True
|
|
|
|
# Exclude image files
|
|
image_extensions = {
|
|
'.png', '.jpg', '.jpeg', '.gif', '.svg', '.bmp', '.ico',
|
|
'.tiff', '.tif', '.webp', '.heic', '.heif', '.avif',
|
|
'.jfif', '.pjpeg', '.pjp', '.tga', '.psd', '.raw',
|
|
'.cr2', '.nef', '.orf', '.sr2', '.arw', '.dng', '.rw2',
|
|
'.raf', '.3fr', '.kdc', '.mef', '.mrw', '.pef', '.srw',
|
|
'.x3f', '.r3d', '.fff', '.iiq', '.erf', '.nrw'
|
|
}
|
|
|
|
ext = os.path.splitext(file_path)[1].lower()
|
|
if ext in image_extensions:
|
|
return True
|
|
|
|
return False
|
|
|
|
def load_gitignore_patterns(root_dir):
|
|
"""Load patterns from .gitignore file."""
|
|
patterns = []
|
|
gitignore_path = os.path.join(root_dir, '.gitignore')
|
|
|
|
if os.path.isfile(gitignore_path):
|
|
try:
|
|
with open(gitignore_path, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
# Skip empty lines and comments
|
|
if line and not line.startswith('#'):
|
|
# Remove trailing backslash for escaped #
|
|
if line.startswith(r'\#'):
|
|
line = line[1:]
|
|
patterns.append(line)
|
|
except Exception as e:
|
|
print(f"Warning: Could not read .gitignore: {e}")
|
|
|
|
return patterns
|
|
|
|
def is_ignored(path, patterns, root_dir):
|
|
"""Check if path matches any gitignore pattern (simplified)."""
|
|
if not patterns:
|
|
return False
|
|
|
|
# Get relative path with forward slashes
|
|
rel_path = os.path.relpath(path, root_dir).replace(os.sep, '/')
|
|
|
|
# For directories, also check with trailing slash
|
|
if os.path.isdir(path):
|
|
rel_path_with_slash = rel_path + '/'
|
|
else:
|
|
rel_path_with_slash = rel_path
|
|
|
|
for pattern in patterns:
|
|
# Skip negation patterns (too complex for this script)
|
|
if pattern.startswith('!'):
|
|
continue
|
|
|
|
# Directory pattern (ending with /)
|
|
if pattern.endswith('/'):
|
|
if not os.path.isdir(path):
|
|
continue
|
|
pattern = pattern.rstrip('/')
|
|
# Match directory name or anything inside it
|
|
if fnmatch.fnmatch(rel_path, pattern) or fnmatch.fnmatch(rel_path_with_slash, pattern + '/*'):
|
|
return True
|
|
continue
|
|
|
|
# Absolute pattern (starting with /) - match from root only
|
|
if pattern.startswith('/'):
|
|
pattern = pattern.lstrip('/')
|
|
if fnmatch.fnmatch(rel_path, pattern):
|
|
return True
|
|
continue
|
|
|
|
# Pattern without slash - matches at any level
|
|
if '/' not in pattern:
|
|
# Check basename
|
|
basename = os.path.basename(rel_path)
|
|
if fnmatch.fnmatch(basename, pattern):
|
|
return True
|
|
else:
|
|
# Pattern with slash - relative path match
|
|
if fnmatch.fnmatch(rel_path, pattern):
|
|
return True
|
|
|
|
return False
|
|
|
|
def get_files_from_directory(directory, recursive=False, root_dir=None, gitignore_patterns=None):
|
|
"""Get all files from a directory, optionally recursively."""
|
|
if root_dir is None:
|
|
root_dir = os.getcwd()
|
|
|
|
if gitignore_patterns is None:
|
|
gitignore_patterns = []
|
|
|
|
files_list = []
|
|
abs_directory = os.path.abspath(directory)
|
|
|
|
if not os.path.exists(abs_directory):
|
|
print(f"Warning: Directory '{directory}' not found.")
|
|
return files_list
|
|
|
|
# Skip if directory itself is ignored
|
|
if is_ignored(abs_directory, gitignore_patterns, root_dir):
|
|
return files_list
|
|
|
|
if recursive:
|
|
for dirpath, dirnames, filenames in os.walk(abs_directory):
|
|
# Filter directories: exclude hidden and gitignored
|
|
dirnames[:] = [
|
|
d for d in dirnames
|
|
if not d.startswith('.') and not is_ignored(os.path.join(dirpath, d), gitignore_patterns, root_dir)
|
|
]
|
|
|
|
# Filter files
|
|
for filename in filenames:
|
|
if filename.startswith('.'):
|
|
continue
|
|
|
|
full_path = os.path.join(dirpath, filename)
|
|
if (os.path.isfile(full_path) and
|
|
not should_exclude(full_path, root_dir) and
|
|
not is_ignored(full_path, gitignore_patterns, root_dir)):
|
|
files_list.append(full_path)
|
|
else:
|
|
for filename in os.listdir(abs_directory):
|
|
if filename.startswith('.'):
|
|
continue
|
|
|
|
full_path = os.path.join(abs_directory, filename)
|
|
|
|
# Skip directories in non-recursive mode
|
|
if os.path.isdir(full_path):
|
|
continue
|
|
|
|
if (os.path.isfile(full_path) and
|
|
not should_exclude(full_path, root_dir) and
|
|
not is_ignored(full_path, gitignore_patterns, root_dir)):
|
|
files_list.append(full_path)
|
|
|
|
return files_list
|
|
|
|
def main():
|
|
"""Main execution function."""
|
|
root_dir = os.getcwd()
|
|
script_path = os.path.abspath(__file__)
|
|
output_file = "copy.md"
|
|
codeblock = "```"
|
|
|
|
# Load .gitignore patterns
|
|
gitignore_patterns = load_gitignore_patterns(root_dir)
|
|
if gitignore_patterns:
|
|
print(f"Loaded {len(gitignore_patterns)} patterns from .gitignore")
|
|
|
|
def is_output_file(path):
|
|
return os.path.abspath(path) == os.path.abspath(output_file)
|
|
|
|
# Directories to process: (path, recursive)
|
|
directories = [
|
|
("./", False), # Root directory
|
|
("assets/", True), # Archive directory (with subdirectories)
|
|
("core/", True), # Legacy directory
|
|
("strings/", True), # Maybe directory
|
|
("windows/", True)
|
|
]
|
|
|
|
all_files = []
|
|
for directory, recursive in directories:
|
|
files = get_files_from_directory(directory, recursive, root_dir, gitignore_patterns)
|
|
files = [f for f in files if not is_output_file(f) and os.path.abspath(f) != script_path]
|
|
all_files.extend(files)
|
|
|
|
# Remove duplicates and sort
|
|
all_files = sorted(set(all_files))
|
|
|
|
markdown_content = "# Main website\n\n"
|
|
file_count = 0
|
|
|
|
for file_path in all_files:
|
|
try:
|
|
rel_path = os.path.relpath(file_path, root_dir)
|
|
language = get_language(file_path)
|
|
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
markdown_content += f"### {rel_path.replace(os.sep, '/')}\n\n"
|
|
markdown_content += f"{codeblock}{language}\n" if language else f"{codeblock}\n"
|
|
markdown_content += content
|
|
markdown_content += f"\n{codeblock}\n\n"
|
|
|
|
file_count += 1
|
|
|
|
except UnicodeDecodeError:
|
|
print(f"Warning: Could not read {file_path} as text. Skipping.")
|
|
except Exception as e:
|
|
print(f"Error processing {file_path}: {e}")
|
|
|
|
markdown_content += f"<!-- Processed {file_count} files -->\n"
|
|
|
|
try:
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
f.write(markdown_content)
|
|
print(f"Successfully created {output_file} with {file_count} files.")
|
|
except Exception as e:
|
|
print(f"Error writing to {output_file}: {e}")
|
|
sys.exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main() |