Author

Topic: I found a way to implement real asic-resistant POW algorithm (Read 706 times)

newbie
Activity: 148
Merit: 0
The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.

You don't understand what I'm saying. You just spray trash

If you go to github, download and compile askcoin's source code, you will understand how I achieved ASIC-resistant

https://github.com/lichuan/askcoin

here is the ASIC-resistant data file:

https://github.com/lichuan/askcoin/blob/master/src/asic_resistant.data



newbie
Activity: 148
Merit: 0
newbie
Activity: 148
Merit: 0
The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.

SHA256 + huge memory access, It can greatly increase the difficulty of asic mine.
staff
Activity: 4242
Merit: 8672
The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.
newbie
Activity: 148
Merit: 0
The mining process from askcoin project (www.askcoin.me) is:

Code:
void Blockchain::do_mine()
{
    while(!m_stop.load(std::memory_order_relaxed))
    {
        if(!m_need_remine.load(std::memory_order_acquire))
        {
            std::this_thread::sleep_for(std::chrono::milliseconds(10));
            continue;
        }
        
        std::list> mined_txs;
        uint64 cur_block_id, cur_block_utc;
        uint32 zero_bits;
        std::string cur_block_hash, miner_key;
        std::atomic mine_id_2 {0};
    remine:
        {
            std::lock_guard guard(m_mine_mutex);
            mined_txs = std::move(m_mined_txs);
            mine_id_2.store(m_mine_id_1.load(std::memory_order_relaxed), std::memory_order_relaxed);
            cur_block_id = m_mine_cur_block_id;
            cur_block_utc = m_mine_cur_block_utc;
            cur_block_hash = m_mine_cur_block_hash;
            zero_bits = m_mine_zero_bits;
            miner_key = m_miner_privkey;
            m_need_remine.store(false, std::memory_order_relaxed);
        }

        char privk[32];
        fly::base::base64_decode(miner_key.c_str(), miner_key.length(), privk, 32);
        CKey miner_priv_key;
        miner_priv_key.Set(privk, privk + 32, false);
        CPubKey miner_pub_key = miner_priv_key.GetPubKey();
        std::string miner_pub_key_b64 = fly::base::base64_encode(miner_pub_key.begin(), miner_pub_key.size());
        
        auto doc_ptr = std::make_shared();
        rapidjson::Document &doc = *doc_ptr;
        doc.SetObject();
        rapidjson::Document::AllocatorType &allocator = doc.GetAllocator();
        doc.AddMember("hash", "", allocator);
        doc.AddMember("sign", "", allocator);
        rapidjson::Value data(rapidjson::kObjectType);
        data.AddMember("id", cur_block_id + 1, allocator);
        data.AddMember("utc", time(NULL), allocator);
        data.AddMember("version", ASKCOIN_VERSION, allocator);
        data.AddMember("zero_bits", zero_bits, allocator);
        data.AddMember("pre_hash", rapidjson::Value(cur_block_hash.c_str(), allocator), allocator);
        data.AddMember("miner", rapidjson::Value(miner_pub_key_b64.c_str(), allocator), allocator);
        rapidjson::Value tx_ids(rapidjson::kArrayType);
        
        for(auto tx : mined_txs)
        {
            tx_ids.PushBack(rapidjson::Value(tx->m_id.c_str(), allocator), allocator);
        }

        data.AddMember("tx_ids", tx_ids, allocator);
        rapidjson::Value nonce(rapidjson::kArrayType);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        data.AddMember("nonce", nonce, allocator);
        char hash_raw[32];
        
        for(uint64 i = 0; i < (uint64)-1; ++i)
        {
            for(uint64 j = 0; j < (uint64)-1; ++j)
            {
                for(uint64 k = 0; k < (uint64)-1; ++k)
                {
                    for(uint64 m = 0; m < (uint64)-1; ++m)
                    {
                        if(m_stop.load(std::memory_order_relaxed))
                        {
                            return;
                        }
                        
                        if(m_need_remine.load(std::memory_order_acquire))
                        {
                            goto remine;
                        }
                        
                        uint64 utc = time(NULL);
                        
                        if(utc < cur_block_utc)
                        {
                            utc = cur_block_utc;
                        }
                        
                        data["utc"].SetUint64(utc);
                        data["nonce"][0] = m;
                        data["nonce"][1] = k;
                        data["nonce"][2] = j;
                        data["nonce"][3] = i;
                        rapidjson::StringBuffer buffer;
                        rapidjson::Writer writer(buffer);
                        data.Accept(writer);
                        uint32 buf[16] = {0};
                        char *p = (char*)buf;
                        coin_hash(buffer.GetString(), buffer.GetSize(), p);
                        char * ptr = buffer.Push(16);
                        memcpy(ptr, "another_32_bytes", 16);
                        coin_hash(buffer.GetString(), buffer.GetSize(), p + 32);
                        uint32 arr_16[16] = {0};
                            
                        for(uint32 i = 0; i < 16; ++i)
                        {
                            arr_16[i] = ntohl(buf[i]);
                        }
    
                        for(uint32 i = 0; i < ASIC_RESISTANT_DATA_NUM;)
                        {
                            for(int j = 0; j < 16; ++j)
                            {
                                arr_16[j] = (arr_16[j] + __asic_resistant_data__[i + j]) * (arr_16[j] ^ __asic_resistant_data__[i + j]);
                            }
        
                            i += 16;
                        }
    
                        for(uint32 i = 0; i < 16; ++i)
                        {
                            buf[i] = htonl(arr_16[i]);
                        }

                        ptr = buffer.Push(88);
                        std::string p_b64 = fly::base::base64_encode(p, 64);
                        memcpy(ptr, p_b64.data(), 88);
                        coin_hash(buffer.GetString(), buffer.GetSize(), hash_raw);
                        uint32 zero_char_num = zero_bits / 8;
                        uint32 zero_remain_bit = 0;

                        for(uint32 i = 0; i < zero_char_num; ++i)
                        {
                            if(hash_raw[i] != 0)
                            {
                                goto try_next;
                            }
                        }

                        zero_remain_bit = zero_bits % 8;
                        
                        if(zero_remain_bit == 0)
                        {
                            goto mine_success;
                        }
                            
                        if((uint8)hash_raw[zero_char_num] < 1 << 8 - zero_remain_bit)
                        {
                            goto mine_success;
                        }

                    try_next:
                        ;
                    }
                }
            }
        }
        
    mine_success:
        std::string hex_hash = fly::base::byte2hexstr(hash_raw, 32);
        std::string block_hash = fly::base::base64_encode(hash_raw, 32);
        LOG_DEBUG_INFO("mine successfully, zero_bits: %u, block_id: %lu, block_hash: %s (hex: %s)", \
                       zero_bits, cur_block_id + 1, block_hash.c_str(), hex_hash.c_str());
        doc["hash"].SetString(block_hash.c_str(), allocator);
        std::vector sign_vec;

        if(!miner_priv_key.Sign(uint256(std::vector(hash_raw, hash_raw + 32)), sign_vec))
        {
            ASKCOIN_EXIT(EXIT_FAILURE);
        }
        
        std::string block_sign = fly::base::base64_encode(&sign_vec[0], sign_vec.size());
        doc["sign"].SetString(block_sign.c_str(), allocator);
        doc.AddMember("data", data, allocator);
        rapidjson::Value doc_tx(rapidjson::kArrayType);
        
        for(auto tx : mined_txs)
        {
            rapidjson::Document &doc = *tx->m_doc;
            rapidjson::Value tx_node(rapidjson::kObjectType);
            tx_node.AddMember("sign", doc["sign"], allocator);
            tx_node.AddMember("data", doc["data"], allocator);
            doc_tx.PushBack(tx_node, allocator);
        }
        
        doc.AddMember("tx", doc_tx, allocator);
        {
            std::lock_guard guard(m_mine_mutex);
            m_mine_doc = doc_ptr;
            m_mine_id_2.store(mine_id_2.load(std::memory_order_relaxed), std::memory_order_relaxed);
        }
        
        m_mine_success.store(true, std::memory_order_release);
    }
}

The mainnet of askcoin is coming soon, the source code will be open when this project has passed its infancy.
FYI: https://bitcointalksearch.org/topic/annaskcoin-is-a-decentralized-real-time-qa-platform-3733771
legendary
Activity: 990
Merit: 1108
There is a reason ZCash paid the inventor of Yescrypt to evaluate their use of Equihash.  Unfortunately they didn't heed his advice and now a short time period later they are suffering for it.

There is a proof of work where the memory requirements can vary from 1KB to 1PB (a petabyte is a billion GB), where verification is instant, and where increasing the requirement is just a soft fork.
legendary
Activity: 990
Merit: 1108
The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

Why is it such a MUST?  Cumbersome validation, big deal.  Lets say it takes even 20 seconds to validate a block, a blocktime of 10 minutes means  that the head-start the original broadcasting miner gets is negligible.

It is a MUST for at least two reasons:

1) because new nodes trying to identify the longest valid chain must process all proofs of work in the entire blockchain history. That's on the order of a million of them. Even if you allow an hour for downloading just the headers, that's only a few milliseconds you can spend on each pow.

and even more importantly:

2) a slow verification is an invitation for a Denial of Service attack where attackers keep feeding you bogus proofs-of-work which you need to spend significant time on to recognize as bogus. Verification should therefore take no more time than it takes to receive a header (well under a millisecond).
member
Activity: 322
Merit: 54
Consensus is Constitution
Quote
And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example.  Scrypt was tuned for GPU's with 1024 as the N value.  It was cracked by ASIC's.  If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well.  To this day Scrypt would still be ASIC free in my view.  
The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

As I have mentioned above it is not about making algorithms gpu friendly. I guess you have been somhow misinformed about the situation. On the contrary, any PoW designer wishes to resists gpu optimization, it is just impractical imo.

Why is it such a MUST?  Cumbersome validation, big deal.  Lets say it takes even 20 seconds to validate a block, a blocktime of 10 minutes means  that the head-start the original broadcasting miner gets is negligible.

And considering that the INVENTORS of Scrypt and Yescrypt say that they should have raised the N value speaks volumes to me.  Do you really think Charlie Lee or whoever's code he was copying knew how to implement Scrypt better for this application than the INVENTOR of Scrypt does?

There is a reason ZCash paid the inventor of Yescrypt to evaluate their use of Equihash.  Unfortunately they didn't heed his advice and now a short time period later they are suffering for it.
newbie
Activity: 148
Merit: 0
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member  Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a  designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:
  • a large enough memory footprint
  • a complex enough function that is not feasible to be implemented directly on the memory bank
The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

 The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.


You're right, the real memory-hard algorithm should require that each data designed to be accessed from memory must be fetched from real memory. If some data could be deduced from other part by design some extra gate circuits, then it is not really memory-hard algorithm.
legendary
Activity: 1456
Merit: 1174
Always remember the cause!
Quote
And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example.  Scrypt was tuned for GPU's with 1024 as the N value.  It was cracked by ASIC's.  If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well.  To this day Scrypt would still be ASIC free in my view.  
The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

As I have mentioned above it is not about making algorithms gpu friendly. I guess you have been somhow misinformed about the situation. On the contrary, any PoW designer wishes to resists gpu optimization, it is just impractical imo.
legendary
Activity: 990
Merit: 1108
Well lets look at scrypt for example.  Scrypt was tuned for GPU's with 1024 as the N value.  It was cracked by ASIC's.  If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well.  To this day Scrypt would still be ASIC free in my view.  

The N value had to be set low so that verification wouldn't become cumbersome.

Quote
Reading Solar Designer's (inventor of Yescrypt) analysis of ZCash's choice of Equihash, he also noted that they chose a too low "N value" type setting because they wanted it to be asymetric and verify very fast and be "GPU friendly".

They chose a lower-than-desired memory setting because early solver implementations were comically inefficient and they worried miners would be too slow. They also lacked the patience to wait for miner improvements. Verification was never the issue.
member
Activity: 322
Merit: 54
Consensus is Constitution
Quote
And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example.  Scrypt was tuned for GPU's with 1024 as the N value.  It was cracked by ASIC's.  If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well.  To this day Scrypt would still be ASIC free in my view.  

Reading Solar Designer's (inventor of Yescrypt) analysis of ZCash's choice of Equihash, he also noted that they chose a too low "N value" type setting because they wanted it to be asymetric and verify very fast and be "GPU friendly".  It didn't work out too well.
https://blog.z.cash/the-zcash-equihash-analysis/

Here is his analysis of Argon2 Vs Yescrypt.  Seems Yescrypt is more parallelization resistant:
http://discussions.password-hashing.narkive.com/pYOIcGhK/phc-argon2-cpu-gpu-benchmarks

Quote
And even if  it was purely just a simple, innocent game about redefining and extending words I don't think it would be counted as a good move to extend Application Specific Integrated Circuit (what Bitmain is proud to be specialized in designing it) to include re-programmable appliances that are designed for a specific range of applications.

Good point, I didn't think about the Integrated Circuit part since everything is integrated circuits now.  But yes if they can integrate multiple parts of a typical motherboard in a way that excludes it's use in commodity hardware then that could be a definition of ASIC as well.
legendary
Activity: 1456
Merit: 1174
Always remember the cause!
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member  Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a  designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:
  • a large enough memory footprint
  • a complex enough function that is not feasible to be implemented directly on the memory bank
The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

 The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.






I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit.  It is a tiring and unproductive argument though.  Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc.  The point is though it would still be a functioning computer, just designed for CPU mining.  So technically it would be an asic but not an asic by any real metric.  So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.

But yes ASIC resistance is absolutely real; the problem is even algorithms that can be asic resistant are implemented in ways to benefit GPU's over CPU's. In this process of allowing for GPU acceleration, they make it significantly less resistant to real ASIC's.
Although it is totally acceptable that always you can improve systems to be more adequate you can't do it in 'an order of magnitude' more efficient and cost effective way, at least you can't do it 'always'.

Plus, it is not just about tautology and playing with words. It is a god damned multi billion dollars business and our heads are right in the mouth of the lion. From a sociological point of view, it is highly recommended to be aware of the consequences of declaring ASICs as the ultimate winner of the PoW paradigm.

And even if  it was purely just a simple, innocent game about redefining and extending words I don't think it would be counted as a good move to extend Application Specific Integrated Circuit (what Bitmain is proud to be specialized in designing it) to include re-programmable appliances that are designed for a specific range of applications.

And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.
member
Activity: 322
Merit: 54
Consensus is Constitution
Whether it is a single chip or not is not really the point, is it?

To me that is really the point. A competitive single chip ASIC is a big hot-running chip requiring tons of design talent and access to the latest and greatest fabrication technology. This leads to manufacturing centralization and to noisy mining rigs that become obsolete within a year or two.

On the other hand, a chip that needs to interface to separate commodity memory chips can remain smaller, run cooler, and be implemented in a less than bleeding-edge technology, since it only needs to be fast enough to saturate the memory interface.

Quote
Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?

Yes, there is SRAM, but it still has a limited bandwidth.

Right, if the optimum "ASIC" that can be developed for a given algorithm -- if it's main components can all be directly used in "PC's" or general commodity hardware, then the "ASIC resistance" has succeeded in my view.

However we still have centralization issues like "less than order of magnitude improvements" -- joe blow still can't really profitably compete.  I believe the only algorithm that can solve this is "factorization of large numbers" which I invented.  That algorithm will require a GPU + a CPU and therefore all modern phones/pc's/laptops/AI will be able to mine that, whereas Server farms, Multiple CPU rigs, and Botnet's likely wouldn't (or at least wouldn't be able to gain a significant advantage).

https://bitcointalk.org/index.php?topic=2575256.0;all
legendary
Activity: 990
Merit: 1108
Whether it is a single chip or not is not really the point, is it?

To me that is really the point. A competitive single chip ASIC is a big hot-running chip requiring tons of design talent and access to the latest and greatest fabrication technology. This leads to manufacturing centralization and to noisy mining rigs that become obsolete within a year or two.

On the other hand, a chip that needs to interface to separate commodity memory chips can remain smaller, run cooler, and be implemented in a less than bleeding-edge technology, since it only needs to be fast enough to saturate the memory interface.

Quote
Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?

Yes, there is SRAM, but it still has a limited bandwidth.
legendary
Activity: 4438
Merit: 3387
An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a  designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

...

I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit.  It is a tiring and unproductive argument though.  Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc.  The point is though it would still be a functioning computer, just designed for CPU mining.  So technically it would be an asic but not an asic by any real metric.  So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.
...

I'm not just arguing semantics or definitions. It seems to me that the goal of "ASIC-resistance" is really to prevent a specialized device whose performance is an order of magnitude improvement over a PC, but at the same cost. Whether it is a single chip or not is not really the point, is it?

Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?
member
Activity: 322
Merit: 54
Consensus is Constitution
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member  Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a  designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:
  • a large enough memory footprint
  • a complex enough function that is not feasible to be implemented directly on the memory bank
The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

 The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.






I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit.  It is a tiring and unproductive argument though.  Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc.  The point is though it would still be a functioning computer, just designed for CPU mining.  So technically it would be an asic but not an asic by any real metric.  So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.

But yes ASIC resistance is absolutely real; the problem is even algorithms that can be asic resistant are implemented in ways to benefit GPU's over CPU's. In this process of allowing for GPU acceleration, they make it significantly less resistant to real ASIC's.
legendary
Activity: 1456
Merit: 1174
Always remember the cause!
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member  Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a  designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:
  • a large enough memory footprint
  • a complex enough function that is not feasible to be implemented directly on the memory bank
The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

 The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.




newbie
Activity: 148
Merit: 0
So are you saying this can be implemented in a soft fork?  Would there be any way for miners using old sha256 to fool the algorithm to accept their old type sha256 values, or skip a step of the memory hard part if their values look correct?  In other words, how is doing the memory hard part enforced?

It need hard fork, but doesn't need any major code changes.
member
Activity: 322
Merit: 54
Consensus is Constitution
So are you saying this can be implemented in a soft fork?  Would there be any way for miners using old sha256 to fool the algorithm to accept their old type sha256 values, or skip a step of the memory hard part if their values look correct?  In other words, how is doing the memory hard part enforced?
newbie
Activity: 148
Merit: 0
In the above verify_hash() function, the coin_hash_b64 function is defined as:

Code:
std::string coin_hash_b64(const char *data, uint32 size)
{
    char h_256[CSHA256::OUTPUT_SIZE];
    CHash256().Write(data, size).Finalize(h_256);
    std::string b64 = fly::base::base64_encode(h_256, CSHA256::OUTPUT_SIZE);
   
    return b64;
}
newbie
Activity: 148
Merit: 0
If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example.  Memory hard algorithms aren't new.  Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.

So basically what your saying is your algorithm can be a plug in replacement for sha256?  So you could plug this directly into bitcoin without any major code changes?  Soft fork, hard fork?

Here is Yescrypt pseudocode:
Code:
# ** Functions/symbols **
# ||                           Concatenate two strings
# HMAC(h, k, v)                HMAC with hash function h and key k over value v
# PBKDF2(prf, p, s, c, dklen)  PBKDF2 key derivation function
# substr(s, start, length)     Substring from start (zero-based) of length bytes
# le32dec(), le32enc()         32-bit little-endian decoding/encoding
# SIMD_[un]shuffle()           Salsa20 SIMD (un)shuffling of 32-bit words
# Integerify(B, r)             Parse B_{2r-1} as a little-endian integer
# p2floor(x)                   Largest power of 2 not greater than x
 
# ** Inputs **
string   password
string   salt
integer  t_cost
integer  m_cost
integer  outlen
 
# ** Algorithm **
N = 8 << m_cost
r = 8
 
# ** Settings hard-coded/assumed below in this pseudocode **
# p = 1
# g = 0
# flags = YESCRYPT_RW
# no ROM
 
# If m_cost is 16 MB per thread or more, pre-hash using 1/64th of m_cost first,
# to mitigate garbage collector attacks.  yescrypt_prehash() is almost the same
# as this function, but its personalization HMAC key is "yescrypt-prehash"
# rather than "yescrypt", it skips builtin SCRAM finalization, and it will not
# invoke another yescrypt_prehash().
if (N / p >= 0x100 && N / p * r >= 0x20000)
    password = yescrypt_prehash(password, salt, t_cost, m_cost / 64, 32)
 
password = HMAC(SHA-256, "yescrypt", password)
B = PBKDF2(HMAC-SHA-256, password, salt, 1, 128 * r)
password = substr(B, 0, 32)
 
# SMix1 invoked to initialize pwxform S-boxes
X = SIMD_shuffle(le32dec(B))
for i = 0 to Sbytes/128 - 1
    S[i] = X
    X = BlockMix_{Salsa20/8, 1}(X)
 
# SMix1 invoked "for real"
for i = 0 to N - 1
    V[i] = X
    if (i > 1)
        j = Wrap(Integerify(X, r), i)
        X = X xor V[j]
    X = BlockMix_pwxform{Salsa20/2, S, r}(X)
 
# SMix2
if (t_cost = 0)
    Nloop = (N + 2) / 3
else if (t_cost = 1)
    Nloop = (N * 2 + 2) / 3
else
    Nloop = N * (t - 1)
for i = 0 to Nloop - 1
    j = Integerify(X, r) mod N
    X = X xor V[j]
    V[j] = X
    X = BlockMix_pwxform{Salsa20/2, S, r}(X)
B = le32enc(SIMD_unshuffle(X))
 
out = PBKDF2(HMAC-SHA-256, password, B, 1, outlen)
 
# Builtin SCRAM (RFC 5802) support
clen = min(outlen, 32)
dk = PBKDF2(HMAC-SHA-256, password, B, 1, 32)
dk = SHA-256(HMAC(SHA-256, dk, "Client Key"))
out = substr(dk, 0, clen) || substr(out, clen, outlen - clen)
 
return out
 
# ** Helper functions **
 
# Wrap x to the range 0 to i-1
Wrap(x, i)
    n = p2floor(i)
    return (x mod n) + (i - n)

Yes, the result value of my algorithm is sha256d value too, please refer to the hash-verify code above.
member
Activity: 322
Merit: 54
Consensus is Constitution
If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example.  Memory hard algorithms aren't new.  Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.

So basically what your saying is your algorithm can be a plug in replacement for sha256?  So you could plug this directly into bitcoin without any major code changes?  Soft fork, hard fork?

Here is Yescrypt pseudocode:
Code:
# ** Functions/symbols **
# ||                           Concatenate two strings
# HMAC(h, k, v)                HMAC with hash function h and key k over value v
# PBKDF2(prf, p, s, c, dklen)  PBKDF2 key derivation function
# substr(s, start, length)     Substring from start (zero-based) of length bytes
# le32dec(), le32enc()         32-bit little-endian decoding/encoding
# SIMD_[un]shuffle()           Salsa20 SIMD (un)shuffling of 32-bit words
# Integerify(B, r)             Parse B_{2r-1} as a little-endian integer
# p2floor(x)                   Largest power of 2 not greater than x
 
# ** Inputs **
string   password
string   salt
integer  t_cost
integer  m_cost
integer  outlen
 
# ** Algorithm **
N = 8 << m_cost
r = 8
 
# ** Settings hard-coded/assumed below in this pseudocode **
# p = 1
# g = 0
# flags = YESCRYPT_RW
# no ROM
 
# If m_cost is 16 MB per thread or more, pre-hash using 1/64th of m_cost first,
# to mitigate garbage collector attacks.  yescrypt_prehash() is almost the same
# as this function, but its personalization HMAC key is "yescrypt-prehash"
# rather than "yescrypt", it skips builtin SCRAM finalization, and it will not
# invoke another yescrypt_prehash().
if (N / p >= 0x100 && N / p * r >= 0x20000)
    password = yescrypt_prehash(password, salt, t_cost, m_cost / 64, 32)
 
password = HMAC(SHA-256, "yescrypt", password)
B = PBKDF2(HMAC-SHA-256, password, salt, 1, 128 * r)
password = substr(B, 0, 32)
 
# SMix1 invoked to initialize pwxform S-boxes
X = SIMD_shuffle(le32dec(B))
for i = 0 to Sbytes/128 - 1
    S[i] = X
    X = BlockMix_{Salsa20/8, 1}(X)
 
# SMix1 invoked "for real"
for i = 0 to N - 1
    V[i] = X
    if (i > 1)
        j = Wrap(Integerify(X, r), i)
        X = X xor V[j]
    X = BlockMix_pwxform{Salsa20/2, S, r}(X)
 
# SMix2
if (t_cost = 0)
    Nloop = (N + 2) / 3
else if (t_cost = 1)
    Nloop = (N * 2 + 2) / 3
else
    Nloop = N * (t - 1)
for i = 0 to Nloop - 1
    j = Integerify(X, r) mod N
    X = X xor V[j]
    V[j] = X
    X = BlockMix_pwxform{Salsa20/2, S, r}(X)
B = le32enc(SIMD_unshuffle(X))
 
out = PBKDF2(HMAC-SHA-256, password, B, 1, outlen)
 
# Builtin SCRAM (RFC 5802) support
clen = min(outlen, 32)
dk = PBKDF2(HMAC-SHA-256, password, B, 1, 32)
dk = SHA-256(HMAC(SHA-256, dk, "Client Key"))
out = substr(dk, 0, clen) || substr(out, clen, outlen - clen)
 
return out
 
# ** Helper functions **
 
# Wrap x to the range 0 to i-1
Wrap(x, i)
    n = p2floor(i)
    return (x mod n) + (i - n)
newbie
Activity: 148
Merit: 0
If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example.  Memory hard algorithms aren't new.  Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.
member
Activity: 322
Merit: 54
Consensus is Constitution
If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example.  Memory hard algorithms aren't new.  Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?
newbie
Activity: 148
Merit: 0
If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.
hero member
Activity: 568
Merit: 703
@tromp and @anonymint analyzed this over the past years.
It's not just the latency of memory access, but the power cost.
Memory can be optimized for the access patterns and power cost profile can be optimized.
The ASIC will always be at least an order-of-magnitude more efficient than the general purpose hardware:

https://gist.github.com/shelby3/67d990230e2dc9eb8be9e43e0b0b77a7
member
Activity: 322
Merit: 54
Consensus is Constitution
How is this different than Scrypt of Yescrypt which have high N values set?
newbie
Activity: 148
Merit: 0
Soon or later new ASICS will be developed to adapt your algorithm, unless your  algorithm can self evolution

No, because the amount of memory access is much more than L3 cache, so if a miner want to generate block, he/she must access data from memory, not from L3 cache or L4 (if have) cache.
newbie
Activity: 148
Merit: 0
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

It's harder to design a ASIC with very fast speed of memory access than pure computation, because the speed of memory access has many limitations.
newbie
Activity: 148
Merit: 0
I don't have full understanding about your code, but i wonder about :
1. CryptoNight needs very fast memory such as L3 cache and EtHash needs fast memory with big amount, but both algorithm have ASIC (or at least the manufacture claim that). Are you sure your algorithm really ASIC-resistant?
2. Is your algorithm require big memory amount, very fast memory or both?
You don't understand my code, in my algorithm, only need the time cost by one memory process is more than one sha256 computation in the while loop, and need the amount of memory is much more than L3 cache, this is very different from EtHash or CryptoNight.
legendary
Activity: 4438
Merit: 3387
... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.
newbie
Activity: 2
Merit: 0
Soon or later new ASICS will be developed to adapt your algorithm, unless your  algorithm can self evolution
newbie
Activity: 148
Merit: 0
Hi, everyone
    As we know, the mining process is like this:
Code:
while(hash_value > target)
{
    nonce = new_nonce()
    hash_value = sha256d(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + something else)
}
   the ASIC's advantage over GPU or CPU is that ASIC can run sha256d more times under the same time condition, so in the loop, we can combine sha256 with a lot of memory access to resist ASIC, because ASIC is good at sha256 computation but not memory access, as following:
Code:
while(hash_value > target)
{
    nonce = new_nonce()
    memory_access_result = access_memory_process(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + something else)
    hash_value = sha256d(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + memory_access_result + something else)
}

    In my own askcoin projcect (www.askcoin.me), we name this asic-resistant sha256 algorithm as "SHA256-AR", the verify-hash code is something like this:
Code:
bool Blockchain::hash_pow(char hash_arr[32], uint32 zero_bits)
{
    uint32 zero_char_num = zero_bits / 8;

    for(uint32 i = 0; i < zero_char_num; ++i)
    {
        if(hash_arr[i] != 0)
        {
            return false;
        }
    }
    
    uint32 zero_remain_bit = zero_bits % 8;

    if(zero_remain_bit == 0)
    {
        return true;
    }
    
    return hash_arr[zero_char_num] < 1 << 8 - zero_remain_bit;
}

bool Blockchain::verify_hash(std::string block_hash, std::string block_data, uint32 zero_bits)
{
    char hash_raw[32];
    uint32 len = fly::base::base64_decode(block_hash.c_str(), block_hash.length(), hash_raw, 32);

    if(len != 32)
    {
        return false;
    }
    
    uint32 buf[16] = {0};
    char *p = (char*)buf;
    coin_hash(block_data.c_str(), block_data.length(), p);
    block_data += "another_32_bytes";
    coin_hash(block_data.c_str(), block_data.length(), p + 32);
    uint32 arr_16[16] = {0};
    
    for(uint32 i = 0; i < 16; ++i)
    {
        arr_16[i] = ntohl(buf[i]);
    }

    for(uint32 i = 0; i < ASIC_RESISTANT_DATA_NUM;)
    {
        for(int j = 0; j < 16; ++j)
        {
            arr_16[j] = (arr_16[j] + __asic_resistant_data__[i + j]) * (arr_16[j] ^ __asic_resistant_data__[i + j]);
        }
        
        i += 16;
    }
    
    for(uint32 i = 0; i < 16; ++i)
    {
        buf[i] = htonl(arr_16[i]);
    }

    std::string hash_data = block_data + fly::base::base64_encode(p, 64);
    std::string block_hash_verify = coin_hash_b64(hash_data.c_str(), hash_data.length());

    if(block_hash != block_hash_verify)
    {
        return false;
    }
    
    return hash_pow(hash_raw, zero_bits);
}
   the __asic_resistant_data__ is a big table contains predefined random uint32 data, the number of element is:
Code:
const uint32 ASIC_RESISTANT_DATA_NUM = 5 * 1024 * 1024;
Jump to: