import os
import re
import subprocess

def find_wp_installations(root_dir, filename_to_check):
    """Find directories containing the specified file."""
    wp_directories = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        if filename_to_check in filenames:
            wp_directories.append(dirpath)
            level = dirpath.replace(root_dir, '').count(os.sep)
            indent = ' ' * 4 * (level)
            print(f"{indent}{os.path.basename(dirpath)}/")
            print(f"{indent}{filename_to_check} exists in this directory")
    return wp_directories

def extract_db_credentials(config_file_path):
    """Extract database credentials from wp-config.php."""
    db_config = {}
    with open(config_file_path, 'r', encoding='utf-8') as file:
        content = file.read()
        db_config['DB_NAME'] = re.search(r"define\('DB_NAME',\s*'([^']+)'\);", content).group(1)
        db_config['DB_USER'] = re.search(r"define\('DB_USER',\s*'([^']+)'\);", content).group(1)
        db_config['DB_PASSWORD'] = re.search(r"define\('DB_PASSWORD',\s*'([^']+)'\);", content).group(1)
        db_config['DB_HOST'] = re.search(r"define\('DB_HOST',\s*'([^']+)'\);", content).group(1)
    return db_config

def find_and_log_urls(wp_directories, url_pattern, log_file):
    """Search for URLs in the given directories and log them."""
    url_regex = re.compile(url_pattern)
    urls_found = []

    for wp_dir in wp_directories:
        config_file_path = os.path.join(wp_dir, 'wp-config.php')
        db_config = extract_db_credentials(config_file_path)
        
        # Run wp-cli search-replace command to find URLs in the database
        command = [
            'wp', 'search-replace', url_pattern, '', '--dry-run', '--allow-root',
            '--path=' + wp_dir, '--format=csv'
        ]
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        
        if result.returncode != 0:
            print(f"Error running wp-cli for {wp_dir}: {result.stderr}")
            continue
        
        # Extract URLs from the wp-cli output
        for line in result.stdout.splitlines():
            if url_regex.search(line):
                urls_found.append(line.strip())
        
        # Search for URLs in the file system
        for dirpath, dirnames, filenames in os.walk(wp_dir):
            for filename in filenames:
                file_path = os.path.join(dirpath, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                        content = file.read()
                        found_urls = url_regex.findall(content)
                        if found_urls:
                            urls_found.extend(found_urls)
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

    with open(log_file, 'w', encoding='utf-8') as log:
        for url in urls_found:
            log.write(url + '\n')

    if not urls_found:
        print("No matching URLs were found in any directory.")
    else:
        print(f"Found {len(urls_found)} URLs matching the pattern. Check {log_file} for details.")

if __name__ == "__main__":
    root_directory = os.path.dirname(os.path.abspath(__file__))  # Set root directory to the location of this script
    log_file = "found_urls.txt"  # Specify the log file name
    url_pattern = r"https://cdn\d+\.akmcdn\d+\.com/redirect\.aspx\?pid=\d+&bid=\d+"  # Regex pattern for the URLs

    wp_directories = find_wp_installations(root_directory, 'wp-config.php')
    find_and_log_urls(wp_directories, url_pattern, log_file)
