I found a way to implement real asic-resistant POW algorithm

lichuan

newbie

Activity: 148

Merit: 0

Quote from: gmaxwell on November 09, 2018, 07:01:28 PM

The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.

You don't understand what I'm saying. You just spray trash

If you go to github, download and compile askcoin's source code, you will understand how I achieved ASIC-resistant

https://github.com/lichuan/askcoin

here is the ASIC-resistant data file:

https://github.com/lichuan/askcoin/blob/master/src/asic_resistant.data

lichuan

newbie

Activity: 148

Merit: 0

The doc of askcoin's full node: https://github.com/lichuan/askcoin#askcoin

lichuan

newbie

Activity: 148

Merit: 0

Quote from: gmaxwell on November 09, 2018, 07:01:28 PM

The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.

SHA256 + huge memory access, It can greatly increase the difficulty of asic mine.

gmaxwell

staff

Activity: 4242

Merit: 8672

The function described here is not asic resistant, in fact it looks like an outright scam. Buyer beware.

lichuan

newbie

Activity: 148

Merit: 0

The mining process from askcoin project (www.askcoin.me) is:

Code:

void Blockchain::do_mine()
{
    while(!m_stop.load(std::memory_order_relaxed))
    {
        if(!m_need_remine.load(std::memory_order_acquire))
        {
            std::this_thread::sleep_for(std::chrono::milliseconds(10));
            continue;
        }
        
        std::list> mined_txs;
        uint64 cur_block_id, cur_block_utc;
        uint32 zero_bits;
        std::string cur_block_hash, miner_key;
        std::atomic mine_id_2 {0};
    remine:
        {
            std::lock_guard guard(m_mine_mutex);
            mined_txs = std::move(m_mined_txs);
            mine_id_2.store(m_mine_id_1.load(std::memory_order_relaxed), std::memory_order_relaxed);
            cur_block_id = m_mine_cur_block_id;
            cur_block_utc = m_mine_cur_block_utc;
            cur_block_hash = m_mine_cur_block_hash;
            zero_bits = m_mine_zero_bits;
            miner_key = m_miner_privkey;
            m_need_remine.store(false, std::memory_order_relaxed);
        }

        char privk[32];
        fly::base::base64_decode(miner_key.c_str(), miner_key.length(), privk, 32);
        CKey miner_priv_key;
        miner_priv_key.Set(privk, privk + 32, false);
        CPubKey miner_pub_key = miner_priv_key.GetPubKey();
        std::string miner_pub_key_b64 = fly::base::base64_encode(miner_pub_key.begin(), miner_pub_key.size());
        
        auto doc_ptr = std::make_shared();
        rapidjson::Document &doc = *doc_ptr;
        doc.SetObject();
        rapidjson::Document::AllocatorType &allocator = doc.GetAllocator();
        doc.AddMember("hash", "", allocator);
        doc.AddMember("sign", "", allocator);
        rapidjson::Value data(rapidjson::kObjectType);
        data.AddMember("id", cur_block_id + 1, allocator);
        data.AddMember("utc", time(NULL), allocator);
        data.AddMember("version", ASKCOIN_VERSION, allocator);
        data.AddMember("zero_bits", zero_bits, allocator);
        data.AddMember("pre_hash", rapidjson::Value(cur_block_hash.c_str(), allocator), allocator);
        data.AddMember("miner", rapidjson::Value(miner_pub_key_b64.c_str(), allocator), allocator);
        rapidjson::Value tx_ids(rapidjson::kArrayType);
        
        for(auto tx : mined_txs)
        {
            tx_ids.PushBack(rapidjson::Value(tx->m_id.c_str(), allocator), allocator);
        }

        data.AddMember("tx_ids", tx_ids, allocator);
        rapidjson::Value nonce(rapidjson::kArrayType);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        nonce.PushBack(0, allocator);
        data.AddMember("nonce", nonce, allocator);
        char hash_raw[32];
        
        for(uint64 i = 0; i < (uint64)-1; ++i)
        {
            for(uint64 j = 0; j < (uint64)-1; ++j)
            {
                for(uint64 k = 0; k < (uint64)-1; ++k)
                {
                    for(uint64 m = 0; m < (uint64)-1; ++m)
                    {
                        if(m_stop.load(std::memory_order_relaxed))
                        {
                            return;
                        }
                        
                        if(m_need_remine.load(std::memory_order_acquire))
                        {
                            goto remine;
                        }
                        
                        uint64 utc = time(NULL);
                        
                        if(utc < cur_block_utc)
                        {
                            utc = cur_block_utc;
                        }
                        
                        data["utc"].SetUint64(utc);
                        data["nonce"][0] = m;
                        data["nonce"][1] = k;
                        data["nonce"][2] = j;
                        data["nonce"][3] = i;
                        rapidjson::StringBuffer buffer;
                        rapidjson::Writer writer(buffer);
                        data.Accept(writer);
                        uint32 buf[16] = {0};
                        char *p = (char*)buf;
                        coin_hash(buffer.GetString(), buffer.GetSize(), p);
                        char * ptr = buffer.Push(16);
                        memcpy(ptr, "another_32_bytes", 16);
                        coin_hash(buffer.GetString(), buffer.GetSize(), p + 32);
                        uint32 arr_16[16] = {0};
                            
                        for(uint32 i = 0; i < 16; ++i)
                        {
                            arr_16[i] = ntohl(buf[i]);
                        }
    
                        for(uint32 i = 0; i < ASIC_RESISTANT_DATA_NUM;)
                        {
                            for(int j = 0; j < 16; ++j)
                            {
                                arr_16[j] = (arr_16[j] + __asic_resistant_data__[i + j]) * (arr_16[j] ^ __asic_resistant_data__[i + j]);
                            }
        
                            i += 16;
                        }
    
                        for(uint32 i = 0; i < 16; ++i)
                        {
                            buf[i] = htonl(arr_16[i]);
                        }

                        ptr = buffer.Push(88);
                        std::string p_b64 = fly::base::base64_encode(p, 64);
                        memcpy(ptr, p_b64.data(), 88);
                        coin_hash(buffer.GetString(), buffer.GetSize(), hash_raw);
                        uint32 zero_char_num = zero_bits / 8;
                        uint32 zero_remain_bit = 0;

                        for(uint32 i = 0; i < zero_char_num; ++i)
                        {
                            if(hash_raw[i] != 0)
                            {
                                goto try_next;
                            }
                        }

                        zero_remain_bit = zero_bits % 8;
                        
                        if(zero_remain_bit == 0)
                        {
                            goto mine_success;
                        }
                            
                        if((uint8)hash_raw[zero_char_num] < 1 << 8 - zero_remain_bit)
                        {
                            goto mine_success;
                        }

                    try_next:
                        ;
                    }
                }
            }
        }
        
    mine_success:
        std::string hex_hash = fly::base::byte2hexstr(hash_raw, 32);
        std::string block_hash = fly::base::base64_encode(hash_raw, 32);
        LOG_DEBUG_INFO("mine successfully, zero_bits: %u, block_id: %lu, block_hash: %s (hex: %s)", \
                       zero_bits, cur_block_id + 1, block_hash.c_str(), hex_hash.c_str());
        doc["hash"].SetString(block_hash.c_str(), allocator);
        std::vector sign_vec;

        if(!miner_priv_key.Sign(uint256(std::vector(hash_raw, hash_raw + 32)), sign_vec))
        {
            ASKCOIN_EXIT(EXIT_FAILURE);
        }
        
        std::string block_sign = fly::base::base64_encode(&sign_vec[0], sign_vec.size());
        doc["sign"].SetString(block_sign.c_str(), allocator);
        doc.AddMember("data", data, allocator);
        rapidjson::Value doc_tx(rapidjson::kArrayType);
        
        for(auto tx : mined_txs)
        {
            rapidjson::Document &doc = *tx->m_doc;
            rapidjson::Value tx_node(rapidjson::kObjectType);
            tx_node.AddMember("sign", doc["sign"], allocator);
            tx_node.AddMember("data", doc["data"], allocator);
            doc_tx.PushBack(tx_node, allocator);
        }
        
        doc.AddMember("tx", doc_tx, allocator);
        {
            std::lock_guard guard(m_mine_mutex);
            m_mine_doc = doc_ptr;
            m_mine_id_2.store(mine_id_2.load(std::memory_order_relaxed), std::memory_order_relaxed);
        }
        
        m_mine_success.store(true, std::memory_order_release);
    }
}

The mainnet of askcoin is coming soon, the source code will be open when this project has passed its infancy.
FYI: https://bitcointalksearch.org/topic/annaskcoin-is-a-decentralized-real-time-qa-platform-3733771

tromp

legendary

Activity: 990

Merit: 1108

Quote from: ir.hn on June 12, 2018, 12:06:43 AM

There is a reason ZCash paid the inventor of Yescrypt to evaluate their use of Equihash. Unfortunately they didn't heed his advice and now a short time period later they are suffering for it.

There is a proof of work where the memory requirements can vary from 1KB to 1PB (a petabyte is a billion GB), where verification is instant, and where increasing the requirement is just a soft fork.

tromp

legendary

Activity: 990

Merit: 1108

Quote from: ir.hn on June 12, 2018, 12:06:43 AM

Quote from: aliashraf on June 10, 2018, 06:58:27 AM

The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

Why is it such a MUST? Cumbersome validation, big deal. Lets say it takes even 20 seconds to validate a block, a blocktime of 10 minutes means that the head-start the original broadcasting miner gets is negligible.

It is a MUST for at least two reasons:

1) because new nodes trying to identify the longest valid chain must process all proofs of work in the entire blockchain history. That's on the order of a million of them. Even if you allow an hour for downloading just the headers, that's only a few milliseconds you can spend on each pow.

and even more importantly:

2) a slow verification is an invitation for a Denial of Service attack where attackers keep feeding you bogus proofs-of-work which you need to spend significant time on to recognize as bogus. Verification should therefore take no more time than it takes to receive a header (well under a millisecond).

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote from: aliashraf on June 10, 2018, 06:58:27 AM

Quote from: ir.hn on June 09, 2018, 06:52:03 PM

Quote

And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example. Scrypt was tuned for GPU's with 1024 as the N value. It was cracked by ASIC's. If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well. To this day Scrypt would still be ASIC free in my view.

The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

As I have mentioned above it is not about making algorithms gpu friendly. I guess you have been somhow misinformed about the situation. On the contrary, any PoW designer wishes to resists gpu optimization, it is just impractical imo.

Why is it such a MUST? Cumbersome validation, big deal. Lets say it takes even 20 seconds to validate a block, a blocktime of 10 minutes means that the head-start the original broadcasting miner gets is negligible.

And considering that the INVENTORS of Scrypt and Yescrypt say that they should have raised the N value speaks volumes to me. Do you really think Charlie Lee or whoever's code he was copying knew how to implement Scrypt better for this application than the INVENTOR of Scrypt does?

There is a reason ZCash paid the inventor of Yescrypt to evaluate their use of Equihash. Unfortunately they didn't heed his advice and now a short time period later they are suffering for it.

lichuan

newbie

Activity: 148

Merit: 0

Quote from: aliashraf on June 09, 2018, 01:02:48 PM

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:

a large enough memory footprint
a complex enough function that is not feasible to be implemented directly on the memory bank

The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.

You're right, the real memory-hard algorithm should require that each data designed to be accessed from memory must be fetched from real memory. If some data could be deduced from other part by design some extra gate circuits, then it is not really memory-hard algorithm.

aliashraf

legendary

Activity: 1456

Merit: 1174

Always remember the cause!

Quote from: ir.hn on June 09, 2018, 06:52:03 PM

Quote

And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example. Scrypt was tuned for GPU's with 1024 as the N value. It was cracked by ASIC's. If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well. To this day Scrypt would still be ASIC free in my view.

The N value is required to be kept low in Scrypt to allow costeffectiev and fast validation that is a MUST in PoW.

As I have mentioned above it is not about making algorithms gpu friendly. I guess you have been somhow misinformed about the situation. On the contrary, any PoW designer wishes to resists gpu optimization, it is just impractical imo.

tromp

legendary

Activity: 990

Merit: 1108

Quote from: ir.hn on June 09, 2018, 06:52:03 PM

Well lets look at scrypt for example. Scrypt was tuned for GPU's with 1024 as the N value. It was cracked by ASIC's. If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well. To this day Scrypt would still be ASIC free in my view.

The N value had to be set low so that verification wouldn't become cumbersome.

Quote

Reading Solar Designer's (inventor of Yescrypt) analysis of ZCash's choice of Equihash, he also noted that they chose a too low "N value" type setting because they wanted it to be asymetric and verify very fast and be "GPU friendly".

They chose a lower-than-desired memory setting because early solver implementations were comically inefficient and they worried miners would be too slow. They also lacked the patience to wait for miner improvements. Verification was never the issue.

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote

And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

Well lets look at scrypt for example. Scrypt was tuned for GPU's with 1024 as the N value. It was cracked by ASIC's. If the N value was set to say 100,000 GPU's would likely struggle with it but CPU's would be able to do it pretty well. To this day Scrypt would still be ASIC free in my view.

Reading Solar Designer's (inventor of Yescrypt) analysis of ZCash's choice of Equihash, he also noted that they chose a too low "N value" type setting because they wanted it to be asymetric and verify very fast and be "GPU friendly". It didn't work out too well.
https://blog.z.cash/the-zcash-equihash-analysis/

Here is his analysis of Argon2 Vs Yescrypt. Seems Yescrypt is more parallelization resistant:
http://discussions.password-hashing.narkive.com/pYOIcGhK/phc-argon2-cpu-gpu-benchmarks

Quote

And even if it was purely just a simple, innocent game about redefining and extending words I don't think it would be counted as a good move to extend Application Specific Integrated Circuit (what Bitmain is proud to be specialized in designing it) to include re-programmable appliances that are designed for a specific range of applications.

Good point, I didn't think about the Integrated Circuit part since everything is integrated circuits now. But yes if they can integrate multiple parts of a typical motherboard in a way that excludes it's use in commodity hardware then that could be a definition of ASIC as well.

aliashraf

legendary

Activity: 1456

Merit: 1174

Always remember the cause!

Quote from: ir.hn on June 09, 2018, 02:00:09 PM

Quote from: aliashraf on June 09, 2018, 01:02:48 PM

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:

a large enough memory footprint
a complex enough function that is not feasible to be implemented directly on the memory bank

The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.

I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit. It is a tiring and unproductive argument though. Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc. The point is though it would still be a functioning computer, just designed for CPU mining. So technically it would be an asic but not an asic by any real metric. So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.

But yes ASIC resistance is absolutely real; the problem is even algorithms that can be asic resistant are implemented in ways to benefit GPU's over CPU's. In this process of allowing for GPU acceleration, they make it significantly less resistant to real ASIC's.

Although it is totally acceptable that always you can improve systems to be more adequate you can't do it in 'an order of magnitude' more efficient and cost effective way, at least you can't do it 'always'.

Plus, it is not just about tautology and playing with words. It is a god damned multi billion dollars business and our heads are right in the mouth of the lion. From a sociological point of view, it is highly recommended to be aware of the consequences of declaring ASICs as the ultimate winner of the PoW paradigm.

And even if it was purely just a simple, innocent game about redefining and extending words I don't think it would be counted as a good move to extend Application Specific Integrated Circuit (what Bitmain is proud to be specialized in designing it) to include re-programmable appliances that are designed for a specific range of applications.

And there is very little difference between gpu and cpus in this context. Modern gpus are not optimized enough for I/O and typically are optimized for vector operations. PoW is naturally a good problem for gpus to solve and I don't believe it to be the inverse case, I mean designing algorithms for gpus.

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote from: tromp on June 09, 2018, 06:07:57 PM

Quote from: odolvlobo on June 09, 2018, 05:22:03 PM

Whether it is a single chip or not is not really the point, is it?

To me that is really the point. A competitive single chip ASIC is a big hot-running chip requiring tons of design talent and access to the latest and greatest fabrication technology. This leads to manufacturing centralization and to noisy mining rigs that become obsolete within a year or two.

On the other hand, a chip that needs to interface to separate commodity memory chips can remain smaller, run cooler, and be implemented in a less than bleeding-edge technology, since it only needs to be fast enough to saturate the memory interface.

Quote

Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?

Yes, there is SRAM, but it still has a limited bandwidth.

Right, if the optimum "ASIC" that can be developed for a given algorithm -- if it's main components can all be directly used in "PC's" or general commodity hardware, then the "ASIC resistance" has succeeded in my view.

However we still have centralization issues like "less than order of magnitude improvements" -- joe blow still can't really profitably compete. I believe the only algorithm that can solve this is "factorization of large numbers" which I invented. That algorithm will require a GPU + a CPU and therefore all modern phones/pc's/laptops/AI will be able to mine that, whereas Server farms, Multiple CPU rigs, and Botnet's likely wouldn't (or at least wouldn't be able to gain a significant advantage).

https://bitcointalk.org/index.php?topic=2575256.0;all

tromp

legendary

Activity: 990

Merit: 1108

Quote from: odolvlobo on June 09, 2018, 05:22:03 PM

Whether it is a single chip or not is not really the point, is it?

To me that is really the point. A competitive single chip ASIC is a big hot-running chip requiring tons of design talent and access to the latest and greatest fabrication technology. This leads to manufacturing centralization and to noisy mining rigs that become obsolete within a year or two.

On the other hand, a chip that needs to interface to separate commodity memory chips can remain smaller, run cooler, and be implemented in a less than bleeding-edge technology, since it only needs to be fast enough to saturate the memory interface.

Quote

Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?

Yes, there is SRAM, but it still has a limited bandwidth.

odolvlobo

legendary

Activity: 4438

Merit: 3387

Quote from: ir.hn on June 09, 2018, 02:00:09 PM

Quote from: aliashraf on June 09, 2018, 01:02:48 PM

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

...

I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit. It is a tiring and unproductive argument though. Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc. The point is though it would still be a functioning computer, just designed for CPU mining. So technically it would be an asic but not an asic by any real metric. So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.
...

I'm not just arguing semantics or definitions. It seems to me that the goal of "ASIC-resistance" is really to prevent a specialized device whose performance is an order of magnitude improvement over a PC, but at the same cost. Whether it is a single chip or not is not really the point, is it?

Now, I am not an EE so humor me, but aren't there RAMs that have access speeds that are an order of magnitude greater than standard DRAM, and wouldn't using that defeat the memory access bottleneck in an ASIC-resistant design?

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote from: aliashraf on June 09, 2018, 01:02:48 PM

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:

a large enough memory footprint
a complex enough function that is not feasible to be implemented directly on the memory bank

The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.

I think he was just arguing semantics more than anything, just the definition of asic as application specific integrated circuit. It is a tiring and unproductive argument though. Technically I could make an asic for Yescrypt by removing usb ports and GPU ports and shrinking the board, maximizing copper for heat dissipation for 100% max CPU usage, etc. The point is though it would still be a functioning computer, just designed for CPU mining. So technically it would be an asic but not an asic by any real metric. So basically ASIC just needs to be defined more closely to its real world form, say that to be defined as an asic, it must achieve at least an order of magnitude improvement in running a given application over general purpose computers.

But yes ASIC resistance is absolutely real; the problem is even algorithms that can be asic resistant are implemented in ways to benefit GPU's over CPU's. In this process of allowing for GPU acceleration, they make it significantly less resistant to real ASIC's.

aliashraf

legendary

Activity: 1456

Merit: 1174

Always remember the cause!

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

Unbelievable question from a decent bitcointalk member Shocked

Accessing memory bus is already optimized in cpu/gpu tech and it does not make sense for a designer/manufacturer to claim such a thing: An asic optimized for memory bus access!

Any algorithm, (I mean any algorithm) being truly memory hard by means of memory access is inherently ASIC resistant as long it has two important properties:

a large enough memory footprint
a complex enough function that is not feasible to be implemented directly on the memory bank

The latter is new and I have to explain a bit:
It suggests that the fetch operation MUST be necessary and not feasible to be bypassed by adding some extra gates to a special memory bank to perform an in-place hash (like what happened for Equihash).

The problem with Equihash and Cryptonight is that they have not achieved the desired level of hardness but Ethash (a Dagger Hasihmoto variant) is performing well and Bitmain's E3 by no means deserves to be called an ASIC.

To put it straight:
Any educated enough computer engineer can easily confirm that a memory bound algorithm can never be optimized by means of optimization of processing power (by implementing the algorithm in ASIC for instance).

So, it is just a ridiculous and naive assumption that every PoW algorithm is vulnerable to ASIC attacks.
On the contrary, I believe that ASIC resistance (and behind, ASIC proof) is achievable no matter what Bitmain and its agents try to induce in the community.

lichuan

newbie

Activity: 148

Merit: 0

Quote from: ir.hn on June 09, 2018, 09:15:30 AM

So are you saying this can be implemented in a soft fork? Would there be any way for miners using old sha256 to fool the algorithm to accept their old type sha256 values, or skip a step of the memory hard part if their values look correct? In other words, how is doing the memory hard part enforced?

It need hard fork, but doesn't need any major code changes.

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

So are you saying this can be implemented in a soft fork? Would there be any way for miners using old sha256 to fool the algorithm to accept their old type sha256 values, or skip a step of the memory hard part if their values look correct? In other words, how is doing the memory hard part enforced?

lichuan

newbie

Activity: 148

Merit: 0

In the above verify_hash() function, the coin_hash_b64 function is defined as:

Code:

std::string coin_hash_b64(const char *data, uint32 size)
{
char h_256[CSHA256::OUTPUT_SIZE];
CHash256().Write(data, size).Finalize(h_256);
std::string b64 = fly::base::base64_encode(h_256, CSHA256::OUTPUT_SIZE);

return b64;
}

lichuan

newbie

Activity: 148

Merit: 0

Quote from: ir.hn on June 08, 2018, 11:23:08 PM

Quote from: lichuan on June 08, 2018, 11:09:28 PM

Quote from: ir.hn on June 08, 2018, 12:20:03 PM

Quote from: lichuan on June 08, 2018, 01:08:01 AM

If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example. Memory hard algorithms aren't new. Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.

So basically what your saying is your algorithm can be a plug in replacement for sha256? So you could plug this directly into bitcoin without any major code changes? Soft fork, hard fork?

Here is Yescrypt pseudocode:

Code:

# ** Functions/symbols **
# || Concatenate two strings
# HMAC(h, k, v) HMAC with hash function h and key k over value v
# PBKDF2(prf, p, s, c, dklen) PBKDF2 key derivation function
# substr(s, start, length) Substring from start (zero-based) of length bytes
# le32dec(), le32enc() 32-bit little-endian decoding/encoding
# SIMD_[un]shuffle() Salsa20 SIMD (un)shuffling of 32-bit words
# Integerify(B, r) Parse B_{2r-1} as a little-endian integer
# p2floor(x) Largest power of 2 not greater than x

# ** Inputs **
string password
string salt
integer t_cost
integer m_cost
integer outlen

# ** Algorithm **
N = 8 << m_cost
r = 8

# ** Settings hard-coded/assumed below in this pseudocode **
# p = 1
# g = 0
# flags = YESCRYPT_RW
# no ROM

# If m_cost is 16 MB per thread or more, pre-hash using 1/64th of m_cost first,
# to mitigate garbage collector attacks. yescrypt_prehash() is almost the same
# as this function, but its personalization HMAC key is "yescrypt-prehash"
# rather than "yescrypt", it skips builtin SCRAM finalization, and it will not
# invoke another yescrypt_prehash().
if (N / p >= 0x100 && N / p * r >= 0x20000)
password = yescrypt_prehash(password, salt, t_cost, m_cost / 64, 32)

password = HMAC(SHA-256, "yescrypt", password)
B = PBKDF2(HMAC-SHA-256, password, salt, 1, 128 * r)
password = substr(B, 0, 32)

# SMix1 invoked to initialize pwxform S-boxes
X = SIMD_shuffle(le32dec(B))
for i = 0 to Sbytes/128 - 1
S[i] = X
X = BlockMix_{Salsa20/8, 1}(X)

# SMix1 invoked "for real"
for i = 0 to N - 1
V[i] = X
if (i > 1)
j = Wrap(Integerify(X, r), i)
X = X xor V[j]
X = BlockMix_pwxform{Salsa20/2, S, r}(X)

# SMix2
if (t_cost = 0)
Nloop = (N + 2) / 3
else if (t_cost = 1)
Nloop = (N * 2 + 2) / 3
else
Nloop = N * (t - 1)
for i = 0 to Nloop - 1
j = Integerify(X, r) mod N
X = X xor V[j]
V[j] = X
X = BlockMix_pwxform{Salsa20/2, S, r}(X)
B = le32enc(SIMD_unshuffle(X))

out = PBKDF2(HMAC-SHA-256, password, B, 1, outlen)

# Builtin SCRAM (RFC 5802) support
clen = min(outlen, 32)
dk = PBKDF2(HMAC-SHA-256, password, B, 1, 32)
dk = SHA-256(HMAC(SHA-256, dk, "Client Key"))
out = substr(dk, 0, clen) || substr(out, clen, outlen - clen)

return out

# ** Helper functions **

# Wrap x to the range 0 to i-1
Wrap(x, i)
n = p2floor(i)
return (x mod n) + (i - n)

Yes, the result value of my algorithm is sha256d value too, please refer to the hash-verify code above.

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote from: lichuan on June 08, 2018, 11:09:28 PM

Quote from: ir.hn on June 08, 2018, 12:20:03 PM

Quote from: lichuan on June 08, 2018, 01:08:01 AM

If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example. Memory hard algorithms aren't new. Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.

So basically what your saying is your algorithm can be a plug in replacement for sha256? So you could plug this directly into bitcoin without any major code changes? Soft fork, hard fork?

Here is Yescrypt pseudocode:

Code:

# ** Functions/symbols **
# || Concatenate two strings
# HMAC(h, k, v) HMAC with hash function h and key k over value v
# PBKDF2(prf, p, s, c, dklen) PBKDF2 key derivation function
# substr(s, start, length) Substring from start (zero-based) of length bytes
# le32dec(), le32enc() 32-bit little-endian decoding/encoding
# SIMD_[un]shuffle() Salsa20 SIMD (un)shuffling of 32-bit words
# Integerify(B, r) Parse B_{2r-1} as a little-endian integer
# p2floor(x) Largest power of 2 not greater than x

# ** Inputs **
string password
string salt
integer t_cost
integer m_cost
integer outlen

# ** Algorithm **
N = 8 << m_cost
r = 8

# ** Settings hard-coded/assumed below in this pseudocode **
# p = 1
# g = 0
# flags = YESCRYPT_RW
# no ROM

# If m_cost is 16 MB per thread or more, pre-hash using 1/64th of m_cost first,
# to mitigate garbage collector attacks. yescrypt_prehash() is almost the same
# as this function, but its personalization HMAC key is "yescrypt-prehash"
# rather than "yescrypt", it skips builtin SCRAM finalization, and it will not
# invoke another yescrypt_prehash().
if (N / p >= 0x100 && N / p * r >= 0x20000)
password = yescrypt_prehash(password, salt, t_cost, m_cost / 64, 32)

password = HMAC(SHA-256, "yescrypt", password)
B = PBKDF2(HMAC-SHA-256, password, salt, 1, 128 * r)
password = substr(B, 0, 32)

# SMix1 invoked to initialize pwxform S-boxes
X = SIMD_shuffle(le32dec(B))
for i = 0 to Sbytes/128 - 1
S[i] = X
X = BlockMix_{Salsa20/8, 1}(X)

# SMix1 invoked "for real"
for i = 0 to N - 1
V[i] = X
if (i > 1)
j = Wrap(Integerify(X, r), i)
X = X xor V[j]
X = BlockMix_pwxform{Salsa20/2, S, r}(X)

# SMix2
if (t_cost = 0)
Nloop = (N + 2) / 3
else if (t_cost = 1)
Nloop = (N * 2 + 2) / 3
else
Nloop = N * (t - 1)
for i = 0 to Nloop - 1
j = Integerify(X, r) mod N
X = X xor V[j]
V[j] = X
X = BlockMix_pwxform{Salsa20/2, S, r}(X)
B = le32enc(SIMD_unshuffle(X))

out = PBKDF2(HMAC-SHA-256, password, B, 1, outlen)

# Builtin SCRAM (RFC 5802) support
clen = min(outlen, 32)
dk = PBKDF2(HMAC-SHA-256, password, B, 1, 32)
dk = SHA-256(HMAC(SHA-256, dk, "Client Key"))
out = substr(dk, 0, clen) || substr(out, clen, outlen - clen)

return out

# ** Helper functions **

# Wrap x to the range 0 to i-1
Wrap(x, i)
n = p2floor(i)
return (x mod n) + (i - n)

lichuan

newbie

Activity: 148

Merit: 0

Quote from: ir.hn on June 08, 2018, 12:20:03 PM

Quote from: lichuan on June 08, 2018, 01:08:01 AM

If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example. Memory hard algorithms aren't new. Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

My algorithm combine the advantages of both SHA256 and memory access, SHA256 have been proved in bitcoin for many years.

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

Quote from: lichuan on June 08, 2018, 01:08:01 AM

If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Again look into yescrypt for example. Memory hard algorithms aren't new. Even scrypt can be asic resistance with high enough N value set.

How is yours different than these?

lichuan

newbie

Activity: 148

Merit: 0

If you want to develop a new POW coin, you can't use SHA256 directly, but you can use my method, because the existing ASIC machine is not good at process massive memory operation.

Traxo

hero member

Activity: 568

Merit: 703

@tromp and @anonymint analyzed this over the past years.
It's not just the latency of memory access, but the power cost.
Memory can be optimized for the access patterns and power cost profile can be optimized.
The ASIC will always be at least an order-of-magnitude more efficient than the general purpose hardware:

https://gist.github.com/shelby3/67d990230e2dc9eb8be9e43e0b0b77a7

ir.hn

member

Activity: 322

Merit: 54

Consensus is Constitution

How is this different than Scrypt of Yescrypt which have high N values set?

lichuan

newbie

Activity: 148

Merit: 0

Quote from: johnny.jiang on June 01, 2018, 02:29:44 AM

Soon or later new ASICS will be developed to adapt your algorithm, unless your algorithm can self evolution

No, because the amount of memory access is much more than L3 cache, so if a miner want to generate block, he/she must access data from memory, not from L3 cache or L4 (if have) cache.

lichuan

newbie

Activity: 148

Merit: 0

Quote from: odolvlobo on June 01, 2018, 02:39:39 AM

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

It's harder to design a ASIC with very fast speed of memory access than pure computation, because the speed of memory access has many limitations.

lichuan

newbie

Activity: 148

Merit: 0

Quote from: ?? on ??

I don't have full understanding about your code, but i wonder about :
1. CryptoNight needs very fast memory such as L3 cache and EtHash needs fast memory with big amount, but both algorithm have ASIC (or at least the manufacture claim that). Are you sure your algorithm really ASIC-resistant?
2. Is your algorithm require big memory amount, very fast memory or both?

You don't understand my code, in my algorithm, only need the time cost by one memory process is more than one sha256 computation in the while loop, and need the amount of memory is much more than L3 cache, this is very different from EtHash or CryptoNight.

odolvlobo

legendary

Activity: 4438

Merit: 3387

Quote from: lichuan on May 31, 2018, 11:46:00 PM

... because ASIC is good at sha256 computation but not memory access, ...

An ASIC is simply a chip designed for a specific purpose. There is no reason why an ASIC can't be designed to be good at memory access.

johnny.jiang

newbie

Activity: 2

Merit: 0

Soon or later new ASICS will be developed to adapt your algorithm, unless your algorithm can self evolution

lichuan

newbie

Activity: 148

Merit: 0

Hi, everyone
As we know, the mining process is like this:

Code:

while(hash_value > target)
{
nonce = new_nonce()
hash_value = sha256d(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + something else)
}

the ASIC's advantage over GPU or CPU is that ASIC can run sha256d more times under the same time condition, so in the loop, we can combine sha256 with a lot of memory access to resist ASIC, because ASIC is good at sha256 computation but not memory access, as following:

Code:

while(hash_value > target)
{
   nonce = new_nonce()
   memory_access_result = access_memory_process(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + something else)
   hash_value = sha256d(utc + version + pre_block_hash + nonce + miner_pubkey + merkle_root + memory_access_result + something else)
}

In my own askcoin projcect (www.askcoin.me), we name this asic-resistant sha256 algorithm as "SHA256-AR", the verify-hash code is something like this:

Code:

bool Blockchain::hash_pow(char hash_arr[32], uint32 zero_bits)
{
   uint32 zero_char_num = zero_bits / 8;

   for(uint32 i = 0; i < zero_char_num; ++i)
   {
   if(hash_arr[i] != 0)
   {
   return false;
   }
   }

   uint32 zero_remain_bit = zero_bits % 8;

   if(zero_remain_bit == 0)
   {
   return true;
   }

   return hash_arr[zero_char_num] < 1 << 8 - zero_remain_bit;
}

bool Blockchain::verify_hash(std::string block_hash, std::string block_data, uint32 zero_bits)
{
   char hash_raw[32];
   uint32 len = fly::base::base64_decode(block_hash.c_str(), block_hash.length(), hash_raw, 32);

   if(len != 32)
   {
   return false;
   }

   uint32 buf[16] = {0};
   char *p = (char*)buf;
   coin_hash(block_data.c_str(), block_data.length(), p);
   block_data += "another_32_bytes";
   coin_hash(block_data.c_str(), block_data.length(), p + 32);
   uint32 arr_16[16] = {0};

   for(uint32 i = 0; i < 16; ++i)
   {
   arr_16[i] = ntohl(buf[i]);
   }

   for(uint32 i = 0; i < ASIC_RESISTANT_DATA_NUM;)
   {
   for(int j = 0; j < 16; ++j)
   {
   arr_16[j] = (arr_16[j] + __asic_resistant_data__[i + j]) * (arr_16[j] ^ __asic_resistant_data__[i + j]);
   }

   i += 16;
   }

   for(uint32 i = 0; i < 16; ++i)
   {
   buf[i] = htonl(arr_16[i]);
   }

   std::string hash_data = block_data + fly::base::base64_encode(p, 64);
   std::string block_hash_verify = coin_hash_b64(hash_data.c_str(), hash_data.length());

   if(block_hash != block_hash_verify)
   {
   return false;
   }

   return hash_pow(hash_raw, zero_bits);
}

the __asic_resistant_data__ is a big table contains predefined random uint32 data, the number of element is:

Code:

const uint32 ASIC_RESISTANT_DATA_NUM = 5 * 1024 * 1024;

Topic: I found a way to implement real asic-resistant POW algorithm (Read 706 times)