Not that this is is going to compete with a full blown keyword or tag service like Google offer, but I needed a very quick way to suggest keywords and tags as users filled out a blog form. There’s no fancy thesaurus or library of data, but it gives surprisingly good results. At least, it gives the user a few hints and ideas about what they should put in as a keywords.

public string GetKeywords(string text, int minCount, int minLength, string additionalWords)

{

string keywords = "";

Dictionary dict = getKeywords(text, minCount, minLength);

foreach (var entry in dict)

 

keywords += entry.Key + ",";

if (additionalWords.Length > 0)

keywords += additionalWords;

return keywords.Trim(',');

}

private Dictionary getKeywords(string text, int minCount, int minLength)

{

text = StripHTML(text);

text = StripCrap(text);

text = text.ToLower();

var stopWords = new string[] {"about","above","across","after","again","against","all","almost","alone","along","already","also","although","always","among","an","and","another","any","anybody","anyone","anything","anywhere","are","area","areas","around","as","ask","asked","asking","asks","at","away","back","backed","backing","backs","be","became","because","become","becomes","been","before","began","behind","being","beings","best","better","between","big","both","but","by","came","can","cannot","case","cases","certain","certainly","clear","clearly","come","could","did","differ","different","differently","do","does","done","down","down","downed","downing","downs","during","each","early","either","end","ended","ending","ends","enough","even","evenly","ever","every","everybody","everyone","everything","everywhere","face","faces","fact","facts","far","felt","few","find","finds","first","for","four","from","full","fully","further","furthered","furthering","furthers","gave","general","generally","get","gets","give","given","gives","go","going","good","goods","got","great","greater","greatest","group","grouped","grouping","groups","had","has","have","having","he","her","here","herself","high","high","high","higher","highest","him","himself","his","how","however","if","important","in","interest","interested","interesting","interests","into","is","it","its","itself","just","keep","keeps","kind","knew","know","known","knows","large","largely","last","later","latest","least","less","let","lets","like","likely","long","longer","longest","made","make","making","man","many","may","me","member","members","men","might","more","most","mostly","mr","mrs","much","must","my","myself","necessary","need","needed","needing","needs","never","new","new","newer","newest","next","no","nobody","non","noone","not","nothing","now","nowhere","number","numbers","of","off","often","old","older","oldest","on","once","one","only","open","opened","opening","opens","or","order","ordered","ordering","orders","other","others","our","out","over","part","parted","parting","parts","per","perhaps","place","places","point","pointed","pointing","points","possible","present","presented","presenting","presents","problem","problems","put","puts","quite","rather","really","right","right","room","rooms","said","same","saw","say","says","second","seconds","see","seem","seemed","seeming","seems","sees","several","shall","she","should","show","showed","showing","shows","side","sides","since","small","smaller","smallest","so","some","somebody","someone","something","somewhere","state","states","still","still","such","sure","take","taken","than","that","the","their","them","then","there","therefore","these","they","thing","things","think","thinks","this","those","though","thought","thoughts","three","through","thus","to","today","together","too","took","toward","turn","turned","turning","turns","two","under","until","up","upon","us","use","used","uses","very","want","wanted","wanting","wants","was","way","ways","we","well","wells","went","were","what","when","where","whether","which","while","who","whole","whose","why","will","with","within","without","work","worked","working","works","would","year","years","yet","you","young","younger","youngest","your","yours"};

var words = Regex.Replace(text, @"[,.?\/;:\(\)]", string.Empty).Split(' ');

var occurrences = words.Distinct().Except(stopWords).Select(w =>

new { Word = w, Count = words.Count(s => s == w) });

return occurrences.Where(wo => wo.Count >= minCount && wo.Word.Length >= minLength)

.ToDictionary(wo => wo.Word, wo => wo.Count);

}

 

minCount the minimum number of duplicate words to catch
minLength the minimum length of word to word on
additionalWords is a set of comma separated word you want tagged(!) to the end of the string. I just use this to force certain keywords and tags

So taking the content of this page (less the code snippet) like this:

string tags = GetKeywords(articletext, 2, 4, "dave");

results in:

“keywords,tags,minimum,word,dave” which isn’t a lot different from what WordPress generated for me automatically, so I used it for my post tags.

Advertisements