It was the Bitcointalk forum that inspired us to create Bitcointalksearch.org - Bitcointalk is an excellent site that should be the default page for anybody dealing in cryptocurrency, since it is a virtual gold-mine of data. However, our experience and user feedback led us create our site; Bitcointalk's search is slow, and difficult to get the results you need, because you need to log in first to find anything useful - furthermore, there are rate limiters for their search functionality.
The aim of our project is to create a faster website that yields more results and faster without having to create an account and eliminate the need to log in - your personal data, therefore, will never be in jeopardy since we are not asking for any of your data and you don't need to provide them to use our site with all of its capabilities.
We created this website with the sole purpose of users being able to search quickly and efficiently in the field of cryptocurrency so they will have access to the latest and most accurate information and thereby assisting the crypto-community at large.
#!/bin/bash
total_articles=1760
decrement=40
function scrape {
curl "$1" | sed -rn 's#.*([0-9]{4}-[0-1][0-9]-[0-3][0-9]).*#\2#p' ;
}
{
for ((x=total_articles; x>40; x-=decrement))
do
scrape "https://bitcointalk.org/index.php?board=77.$x"
sleep 5 # This is here just to be kind to the server, remove for speedup.
done
scrape "https://bitcointalk.org/index.php?board=77"
} |
sort |
uniq -c |
sed -r 's/^ *([0-9]+) (.*)/\1,\2/'
total_articles="${1:-1760}"
decrement="${2:-40}"
#!/bin/bash
# press_scraper.sh - scrape and collate bitcoin press articles, output csv.
# usage - ./press_scraper.sh
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
total_articles=1760
decrement=40
tempfile=$(mktemp)
outfile=press_articles.csv
[ -f $tempfile ] || { echo "Error: Could not make temporary file. Exiting..."; \
exit 1 ; }
function scrape {
curl "$1" | sed -rn 's#.*([0-9]{4}-[0-1][0-9]-[0-3][0-9]).*#\2#p' ;
}
for ((x=total_articles; x>40; x-=decrement))
do
scrape "https://bitcointalk.org/index.php?board=77.$x" >> $tempfile
sleep 5 # This is here just to be kind to the server, remove for speedup.
done
scrape "https://bitcointalk.org/index.php?board=77" >> $tempfile
sort $tempfile | uniq -c | sed -r 's/^ *([0-9]+) (.*)/\1,\2/' >$outfile
reset
clear
set xdata time
set format x "%Y-%m-%d"
set timefmt "%Y-%m-%d"
set datafile separator ","
set style fill solid noborder
set xtics rotate by -90 out nomirror 604800
set ytics out nomirror
set grid ytics
set ylabel "Press hits/day"
set xrange ["2012-04-07":"2013-04-26"]
set yrange [0:*]
set boxwidth 43200 absolute
set datafile separator ","
set term pngcairo truecolor font "Arial,11" size 1200,1200
set output "press_hits.png"
plot "press_articles.csv" using 2:1 with boxes ti "Press Article Frequency" lt 1 linecolor rgb "#FF0000"