Thank you all for sharing your ideas and suggestions. It is helping to jog my memory.
I'm wondering how to create a CUDA program. The goal of this program would be to generate all conceivable permutations based on the current understanding of characters. From these permutations, the program should then be able to produce both compressed and uncompressed private keys. Once these keys are generated, it should derive their corresponding addresses. Then, it should check the balances of these addresses against a public directory to find matches.
I stumbled upon a potential hint that I had penned down in a different section of the notebook. However, I'm struggling to decode its exact meaning. Given that the missing characters are expected to be of the HEX variety, I'm leaning towards the idea that creating a CUDA-based solution might be our most efficient route to uncover these characters.
This is the hint: The numbers in my note are separated by single, double, or even multiple vertical lines. For context, I've denoted these vertical lines using the character located directly below the backspace key on a standard keyboard.
5| | | 6| | | 7|9|8| 5| | | 6|7|8|9|10|11|12
2|3|4| 2|3|4|
1| 1|
I did a little research. Would a good starting template for the CUDA program look like this? Do you have any feedback?
// Including required header files
#include
#include
#include
// CUDA Kernel function for generating permutations
__global__ void generatePermutations(char *baseString, char *result, unsigned long long num_permutations, unsigned long long offset) {
// Calculate the global index for this thread across all blocks
unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x + offset;
// Check if the index exceeds the number of permutations we want to generate
if (idx >= num_permutations) return;
// Create a local copy of the base string in this thread
char localString[81];
memcpy(localString, baseString, 81);
// Loop to replace '?' with hexadecimal characters based on idx
// Start from the end of the string and move toward the beginning
for (int i = 79; i >= 0; i--) {
if (localString[i] == '?') {
int hexVal = idx % 16; // Get the remainder when idx is divided by 16
if (hexVal < 10) {
localString[i] = '0' + hexVal; // If hexVal is a single-digit number
} else {
localString[i] = 'A' + (hexVal - 10); // If hexVal is a letter (A-F)
}
idx /= 16; // Divide idx by 16 for the next '?'
}
}
// Store the generated string into the result array on the GPU
memcpy(&result[(blockIdx.x * blockDim.x + threadIdx.x) * 81], localString, 81);
}
// Main program function
int main() {
// Initialize your 80-character string with 14 '?' characters, plus a null-terminator
char baseString[81] = "BASE_STRING_WITH_14_?";
// Declare device pointers for the base string and the result array
char *d_baseString, *d_result;
// Number of permutations to generate in this subset (due to memory limitations)
unsigned long long subset_size = 1e5;
size_t result_size = 81 * subset_size; // Size of the result array in bytes
// Allocate memory on the GPU for the base string and result array
cudaMalloc((void**)&d_baseString, 81);
cudaMalloc((void**)&d_result, result_size);
// Copy the base string from the CPU to the GPU
cudaMemcpy(d_baseString, baseString, 81, cudaMemcpyHostToDevice);
// Define the number of threads per block and the number of blocks per grid
int threadsPerBlock = 256;
int blocksPerGrid = (subset_size + threadsPerBlock - 1) / threadsPerBlock;
// Launch the CUDA Kernel
generatePermutations<<>>(d_baseString, d_result, subset_size, 0);
// Allocate memory on the CPU to store the result and copy it from the GPU
char *h_result = (char*) malloc(result_size);
cudaMemcpy(h_result, d_result, result_size, cudaMemcpyDeviceToHost);
// You can now process the result further or save it to a file
// Free the allocated memory on the GPU and CPU
cudaFree(d_baseString);
cudaFree(d_result);
free(h_result);
return 0;
}
To compile the code:
nvcc filename.cu -o outputname
./outputname
Retrieve Permutations from GPU: After the CUDA kernel execution, there is a subset of permutations stored in a device array. Copy this array back to the host.
char *h_result = (char*) malloc(result_size);
cudaMemcpy(h_result, d_result, result_size, cudaMemcpyDeviceToHost);
Initialize OpenSSL: Before using OpenSSL's functions, initialize the library.
OpenSSL_add_all_algorithms();
Generate Private Key: For each permutation string, generate a SHA-256 hash to serve as a Bitcoin private key
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, permutation_string, strlen(permutation_string));
SHA256_Final(hash, &sha256);
Generate Public Key: Use the private key to generate an uncompressed public key using ECDSA with the secp256k1 curve
EC_KEY *eckey = EC_KEY_new_by_curve_name(NID_secp256k1);
BIGNUM *bn = BN_bin2bn(hash, 32, NULL);
EC_KEY_set_private_key(eckey, bn);
EC_KEY_generate_key(eckey);
Generate Compressed Public Key: The compressed public key is essentially the X-coordinate of the public key point along with one byte that helps to recover the Y-coordinate.
const EC_POINT *pub_key = EC_KEY_get0_public_key(eckey);
size_t size = EC_POINT_point2oct(group, pub_key, POINT_CONVERSION_COMPRESSED, NULL, 0, NULL);
unsigned char *compressed_key = malloc(size);
EC_POINT_point2oct(group, pub_key, POINT_CONVERSION_COMPRESSED, compressed_key, size, NULL);
Cleanup: Don't forget to free any dynamically allocated resources.
free(h_result);
EC_KEY_free(eckey);
BN_free(bn);
free(compressed_key);
Compile and link C code with OpenSSL:
gcc your_file.c -o your_program -lcrypto
Combined into a unified program#include
#include
#include
#include
#include
#include
#include
__global__ void generatePermutations(char *baseString, char *result, unsigned long long num_permutations, unsigned long long offset) {
unsigned long long idx = blockIdx.x * blockDim.x + threadIdx.x + offset;
if (idx >= num_permutations) return;
char localString[81];
memcpy(localString, baseString, 81);
for (int i = 79; i >= 0; i--) {
if (localString[i] == '?') {
int hexVal = idx % 16;
localString[i] = hexVal < 10 ? '0' + hexVal : 'A' + (hexVal - 10);
idx /= 16;
}
}
memcpy(&result[(blockIdx.x * blockDim.x + threadIdx.x) * 81], localString, 81);
}
int main() {
char baseString[81] = "BASE_STRING_WITH_14_?";
char *d_baseString, *d_result;
unsigned long long subset_size = 1e3;
size_t result_size = 81 * subset_size;
cudaMalloc((void**)&d_baseString, 81);
cudaMalloc((void**)&d_result, result_size);
cudaMemcpy(d_baseString, baseString, 81, cudaMemcpyHostToDevice);
int threadsPerBlock = 256;
int blocksPerGrid = (subset_size + threadsPerBlock - 1) / threadsPerBlock;
generatePermutations<<>>(d_baseString, d_result, subset_size, 0);
char *h_result = (char*) malloc(result_size);
cudaMemcpy(h_result, d_result, result_size, cudaMemcpyDeviceToHost);
// Initialize OpenSSL
OpenSSL_add_all_algorithms();
for (int i = 0; i < subset_size; ++i) {
char *perm = &h_result[i * 81];
// Generate SHA-256 Hash
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, perm, strlen(perm));
SHA256_Final(hash, &sha256);
// Generate Public Key
EC_KEY *eckey = EC_KEY_new_by_curve_name(NID_secp256k1);
BIGNUM *bn = BN_bin2bn(hash, 32, NULL);
EC_KEY_set_private_key(eckey, bn);
EC_KEY_generate_key(eckey);
// Generate Compressed Public Key
const EC_GROUP *group = EC_KEY_get0_group(eckey);
const EC_POINT *pub_key = EC_KEY_get0_public_key(eckey);
size_t size = EC_POINT_point2oct(group, pub_key, POINT_CONVERSION_COMPRESSED, NULL, 0, NULL);
unsigned char *compressed_key = (unsigned char*) malloc(size);
EC_POINT_point2oct(group, pub_key, POINT_CONVERSION_COMPRESSED, compressed_key, size, NULL);
// TODO: Store or process the keys
// Cleanup
EC_KEY_free(eckey);
BN_free(bn);
free(compressed_key);
}
// Final Cleanup
free(h_result);
cudaFree(d_baseString);
cudaFree(d_result);
return 0;
}
Compile the program using both the CUDA and OpenSSL libraries:
nvcc -o combined_program combined_program.cu -lssl -lcrypto
Run the program:
./combined_program