In order to create the .Net application that will connect to the twitter API we will need a developer account. We can create the account at the twitter developer’s site (https://dev.twitter.com/).
In this link you can find a post that explain how to create a twitter account and get the Consumer key, Consumer secret, Access token and the Access token secret, these key will allow us to connect to the twitter API.
Now we need to create a console application, in my case I’m using Visual Studio 2012 and .Net 4.0
The Main method
This is the entry point of the application, it ask to the user for the search criteria and the crawling time.
We must remember that the Twitter Search API will not return tweets over 6 days ago, for that reason this application is designed to collect results in real time, also we’ll get always the more recent tweets in groups of 100
You can read more about the API limitations here https://dev.twitter.com/docs/using-search
static void Main(string[] args) { Console.WriteLine("What's the search criteria?"); string word = Console.ReadLine(); Console.WriteLine("How long do you want to perform the crawl (in hours)?"); string horas = Console.ReadLine(); query = word; Console.WriteLine("The magic is happening...."); string url = "https://api.twitter.com/1.1/users/search.json"; url = "https://api.twitter.com/1.1/search/tweets.json"; DateTime EndDate= DateTime.Now.AddHours (double.Parse (horas)); SqlConnection conn = new SqlConnection("Server=server\\MSSQLSERVER2012;Database=TwitterSearchContent;Trusted_Connection=True;"); try { conn.Open(); SqlCommand cmd = new SqlCommand(); cmd.Connection = conn; Guid idCrewl = InsertCrawl(query, cmd); while (EndDate > DateTime.Now) { SearchTwitter(url, query, cmd, idCrewl); } } catch (Exception ex) { Console.Write(ex.ToString()); Console.ReadLine(); } finally { conn.Close(); } Console.Write("Magic is done, look at your data base"); Console.ReadLine(); }
Now let’s see the SearchTwitter method
Here is where we connect to twitter API and download the information in JSON format, the query is passed through the q parameter. In this method we need to set the Consumer key, Consumer secret, Access token and the Access token secret.
public static void SearchTwitter(string resource_url, string q,SqlCommand cmd,Guid idCrewl) { // oauth application keys var oauth_token = "xxxxx"; //"insert here..."; var oauth_token_secret = "xxxx"; //"insert here..."; var oauth_consumer_key = "xxxx";// = "insert here..."; var oauth_consumer_secret = "xxxxx";// = "insert here..."; // oauth implementation details var oauth_version = "1.0"; var oauth_signature_method = "HMAC-SHA1"; // unique request details var oauth_nonce = Convert.ToBase64String( new ASCIIEncoding().GetBytes(DateTime.Now.Ticks.ToString())); var timeSpan = DateTime.UtcNow - new DateTime(1970, 1, 1, 0, 0, 0, 0, DateTimeKind.Utc); var oauth_timestamp = Convert.ToInt64(timeSpan.TotalSeconds).ToString(); // create oauth signature var baseFormat = "oauth_consumer_key={0}&oauth_nonce={1}&oauth_signature_method={2}" + "&oauth_timestamp={3}&oauth_token={4}&oauth_version={5}&q={6}&result_type={7}&rpp={8}"; var baseString = string.Format(baseFormat, oauth_consumer_key, oauth_nonce, oauth_signature_method, oauth_timestamp, oauth_token, oauth_version, Uri.EscapeDataString(q), "recent", "99" ); baseString = string.Concat("GET&", Uri.EscapeDataString(resource_url), "&", Uri.EscapeDataString(baseString)); var compositeKey = string.Concat(Uri.EscapeDataString(oauth_consumer_secret), "&", Uri.EscapeDataString(oauth_token_secret)); string oauth_signature; using (HMACSHA1 hasher = new HMACSHA1(ASCIIEncoding.ASCII.GetBytes(compositeKey))) { oauth_signature = Convert.ToBase64String( hasher.ComputeHash(ASCIIEncoding.ASCII.GetBytes(baseString))); } // create the request header var headerFormat = "OAuth oauth_nonce=\"{0}\", oauth_signature_method=\"{1}\", " + "oauth_timestamp=\"{2}\", oauth_consumer_key=\"{3}\", " + "oauth_token=\"{4}\", oauth_signature=\"{5}\", " + "oauth_version=\"{6}\""; var authHeader = string.Format(headerFormat, Uri.EscapeDataString(oauth_nonce), Uri.EscapeDataString(oauth_signature_method), Uri.EscapeDataString(oauth_timestamp), Uri.EscapeDataString(oauth_consumer_key), Uri.EscapeDataString(oauth_token), Uri.EscapeDataString(oauth_signature), Uri.EscapeDataString(oauth_version) ); ServicePointManager.Expect100Continue = false; // make the request var postBody = "q=" + Uri.EscapeDataString(q);// resource_url += "?" + postBody + "&result_type=recent&rpp=99"; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(resource_url); request.Headers.Add("Authorization", authHeader); request.Method = "GET"; request.ContentType = "application/x-www-form-urlencoded"; var response = (HttpWebResponse)request.GetResponse(); var reader = new StreamReader(response.GetResponseStream()); var objText = reader.ReadToEnd(); try { string json = JsonConvert.SerializeObject(objText); JObject o = JObject.Parse(objText); ProcessResults (objText, cmd, idCrewl); } catch (Exception twit_error) { Console.Write( twit_error.ToString()); Console.ReadLine(); } } The next thing we need to do is process the JSON result, for this purpose we have the method ProcessResults public static string ProcessResults(object jsonData, SqlCommand cmd, Guid idCrewl) { try { XmlDocument xd = new XmlDocument(); xd = (XmlDocument)JsonConvert.DeserializeXmlNode(jsonData.ToString(), "jsonData"); XmlNode Xn = xd.SelectSingleNode("/jsonData"); try { foreach (XmlNode Xnn in Xn.ChildNodes) { if (Xnn.Name != "search_metadata") { XmlNode XnUser = Xnn.SelectSingleNode("user"); int userid = InsertUser(XnUser, cmd); XmlNode XnHasthags = Xnn.SelectSingleNode("entities"); long idtwitt = InsertTweet(Xnn, idCrewl, userid, cmd, XnHasthags); } } } catch (Exception ex) { Console.Write(ex.ToString()); Console.ReadLine(); return "ERROR"; } return "success"; } catch (Exception e) { Console.Write(e.ToString()); Console.ReadLine(); return "ERROR"; } }
And finally we have the rest of the method that execute the data base operations
public static Guid InsertCrawl(String query, SqlCommand cmd) { Guid id = Guid.NewGuid (); cmd.CommandText = "set dateformat dmy; Insert into Crawls (IDCrawl,Query,CrawlDate) values ('" + id + "','" + @query + "', '" + DateTime.Now.ToString() + "')"; cmd.ExecuteNonQuery(); return id; } public static int InsertUser(XmlNode Xn, SqlCommand cmd) { string IdUser = Xn.ChildNodes[0].InnerText.Replace("'", ""); string Name = Xn.ChildNodes[2].InnerText.Replace ("'",""); string screen_name = Xn.ChildNodes[3].InnerText.Replace("'", ""); string location = Xn.ChildNodes[4].InnerText.Replace("'", ""); string followers = Xn.ChildNodes[9].InnerText; string friends = Xn.ChildNodes[10].InnerText; string time_zone = Xn.ChildNodes[15].InnerText; string verified = Xn.ChildNodes[17].InnerText; string statuses_count = Xn.ChildNodes[18].InnerText; string lang = Xn.ChildNodes[19].InnerText.Replace("'", ""); if (UserExist (IdUser,cmd)) { cmd.CommandText = "update Users " + " set Name='" + Name + "', screen_name='" + screen_name + "', location='" + location + "', followers=" + followers + ", friends=" + friends + ", time_zone='" + time_zone + "', verified='" + verified + "', statuses_count=" + statuses_count + ", lang='" + lang + "' where IdUser= " + IdUser; } else { cmd.CommandText = "Insert into Users " + " (IdUser,Name,screen_name,location,followers,friends,time_zone,verified,statuses_count,lang)" + " values ('" + IdUser + "','" + Name + "','" + screen_name + "','" + location + "'," + followers + "," + friends + ",'" + time_zone + "','" + verified + "' ," + statuses_count + ",'" + lang +"')"; } cmd.ExecuteNonQuery(); return int.Parse (IdUser); } public static bool UserExist(string IdUse, SqlCommand cmd) { cmd.CommandText = "Select count(1) from users where IdUser=" + int.Parse(IdUse); int existe=int.Parse (cmd.ExecuteScalar().ToString ()); if (existe > 0) { return true; } else { return false; } } public static bool TweetExist(string IdTweet, SqlCommand cmd) { cmd.CommandText = "Select count(1) from Tweets where IdTweet=" + Int64.Parse(IdTweet); int existe = int.Parse(cmd.ExecuteScalar().ToString()); if (existe > 0) { return true; } else { return false; } } public static long InsertTweet(XmlNode Xn, Guid IdCrawl, int iduser, SqlCommand cmd, XmlNode XnHasthags) { string IdTwitt = Xn.ChildNodes[2].InnerText; ; if (!TweetExist(IdTwitt, cmd)) { string created_at = Xn.ChildNodes[1].InnerText; DateTime Twitt_Created = ParseTwitterTime(created_at); string Source = Xn.ChildNodes[5].InnerText.Replace("'", ""); string retweet_count = Xn.SelectSingleNode("retweet_count").InnerText; string favorite_count = Xn.SelectSingleNode("favorite_count").InnerText; string text = Xn.ChildNodes[4].InnerText.Replace("'", ""); cmd.CommandText = "Insert into Tweets (IdTweet,IdCrawl,IdUser,created_at,Source,retweet_count,favorite_count,text)" + " values (" + IdTwitt + ",'" + IdCrawl + "','" + iduser + "','" + Twitt_Created + "','" + @Source + "'," + retweet_count + "," + favorite_count + ",'" + @text + "' )"; cmd.ExecuteNonQuery(); InsertHasthags(XnHasthags, long.Parse(IdTwitt), cmd); } return long.Parse(IdTwitt); } public static void InsertHasthags(XmlNode Xn, long idTwitt, SqlCommand cmd) { foreach (XmlNode Xnn in Xn.ChildNodes) { string Text = Xnn.ChildNodes[0].InnerText; cmd.CommandText = "Insert into Hasthag (IdTweet,Text) values (" + idTwitt + ",'" + Text + "')"; cmd.ExecuteNonQuery(); } } public static DateTime ParseTwitterTime(this string date) { const string format = "ddd MMM dd HH:mm:ss zzzz yyyy"; return DateTime.ParseExact(date, format, CultureInfo.InvariantCulture); }
This is all the code of our solution in the next post we are going to see how to create a multidimensional database solution to create a cube that allow users to analysis the results of the information recollected by our .net application.
After running the application you tweets table should look like this
