Wrote a little script that generates word clouds for a user's last 25 posts.
Each post gets a word cloud but I could easily change it to generate one for each week, month, whatever.
This is a tiny part of a hopefull larger project approaching shilling analysis and detection. Later versions may calculate and make use of sentiment.
Here are my last 25! If you want me to do one send your user id. Any requests for special formatting or additional features may or may not be granted so feel free to ask. The python script is included at the end of this post so you can do it yourself as well though I just wrote it so like most code it could use some refactoring.
import requests
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from bs4 import BeautifulSoup
import time
def processPostsPage(pageNumber, userId):
#grab the next page of posts
pageUrl = "https://bitcointalk.org/index.php?action=profile;u=" + str(userId) + ";sa=showPosts;start="+str(pageNumber*20)
response = requests.get(pageUrl)
data = response.content
#grab divs with the 'post' attribute
soup = BeautifulSoup(data)
postsOnPage = soup.findAll("div", { "class" : "post" })
#return an array of word clouds
wordCloudsForPageOfPosts = []
count = 1;
stopwords = ["post","posts"]
#convert posts to wordcloud images
for div in postsOnPage:
text = div.get_text()
#remove the stopwords with a little magic
text = " ".join([word for word in text.split() if word not in stopwords])
nextCloud = WordCloud().generate(text)
wordCloudsForPageOfPosts.append(nextCloud)
count = count+1
return wordCloudsForPageOfPosts
def buildPlot(userId, numberOfPosts):
width = 25
height = 15
fig = plt.figure(figsize=(width,height))
currentPage = 1
count = 0
while(count <= numberOfPosts):
#sleep in between grabbing pages of posts for a stress free bitcointalk crawl
time.sleep(1)
#grab the clouds for that page
clouds = processPostsPage(currentPage,userId)
#display the clouds until we reach the right number
for cloud in clouds:
count = count+1
if(count < numberOfPosts):
a = fig.add_subplot(5,5,count)
#hack to delete tick marks
for a in fig.get_axes():
a.set_xticks([])
a.set_yticks([])
a.set_title("Post " + str(count))
plt.imshow(cloud)
userID = 249526
numberOfPostToDisplay = 25
buildPlot(userID, numberOfPostToDisplay)
plt.show()