)>}]
شركة التطبيقات المتكاملة لتصميم وبرمجة البرمجيات الخاصة ش.ش.و.
Integrated Applications Programming Company
Home » Code Library » Language (Ia.Cl.Model)

Public general use code classes and xml files that we've compiled and used over the years:

Language related support class including langauge list and codes.

    1: using Microsoft.AspNetCore.Http;
    2: using System;
    3: using System.Collections;
    4: using System.Collections.Generic;
    5: using System.Globalization;
    6: using System.IO;
    7: using System.Linq;
    8: using System.Reflection;
    9: using System.Text;
   10: using System.Text.RegularExpressions;
   11: using System.Xml.Linq;
   12:  
   13: namespace Ia.Cl.Model
   14: {
   15:     ////////////////////////////////////////////////////////////////////////////
   16:  
   17:     /// <summary publish="true">
   18:     /// Language related support class including langauge list and codes.
   19:     /// </summary>
   20:     /// <value> 
   21:     /// The Arabic part is built upon "The Unicode Standard, Version 5.2" with plain, accented, and koranic chars.
   22:     /// 
   23:     /// For language codes see: ISO 639-2 Language Code List - Codes for the representation of names of languages (Library of Congress)
   24:     /// See: href="http://www-01.sil.org/iso639-3/codes.asp?order=639_1"
   25:     /// See: href="http://en.wikipedia.org/wiki/List_of_ISO_639-2_codes"
   26:     /// </value>
   27:     /// <remarks> 
   28:     /// Copyright © 2001-2015 Jasem Y. Al-Shamlan (info@ia.com.kw), Integrated Applications - Kuwait. All Rights Reserved.
   29:     ///
   30:     /// This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
   31:     /// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
   32:     ///
   33:     /// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
   34:     /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
   35:     /// 
   36:     /// You should have received a copy of the GNU General Public License along with this library. If not, see http://www.gnu.org/licenses.
   37:     /// 
   38:     /// Copyright notice: This notice may not be removed or altered from any source distribution.
   39:     /// </remarks>
   40:     public class Language
   41:     {
   42:         private static XDocument xDocument;
   43:         private static List<Language> languageList;
   44:  
   45:         private const string latinPlainUpper = "\u0041-\u005a"; // ABCDEFGHIJKLMNOPQRSTUVWXYZ
   46:         private const string latinPlainLower = "\u0061-\u007a"; // abcdefghijklmnopqrstuvwxyz
   47:         private const string latinAccent = "\u00c0-\u00fc"; // ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûü
   48:  
   49:         // http://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode
   50:         private const string cyrillicPlain = "\u0400–\u04ff";
   51:         private const string cyrillicSupplement = "\u0500–\u052f";
   52:         private const string cyrillicExtendedA = "\u2de0–\u2dff";
   53:         private const string cyrillicExtendedB = "\ua640–\ua69f";
   54:         private const string choneticExtensions = "\u1d2b|\u1d78";
   55:  
   56:         private const string arabicPlain = "\u0621-\u063a|\u0641-\u064a"; // ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي...
   57:         private const string arabicAccent = "\u064b-\u0652"; //  ًٌٍَُِّْٕٖٜٓٔٗ٘ٙٚٛٝٞ
   58:         private const string arabicDigit = "\u0660-\u0669";
   59:         private const string arabicKoran = "\u0617-\u061a|\u06d6-\u06ed"; //  ۖۗۘۙۚۛۜ۝۞ۣ۟۠ۡۢۤۥۦۧۨ۩۪ۭ۫۬
   60:         private const string arabicPoint = "\u0670";
   61:         private const string arabicKoranExtended = "\u0671";
   62:         private const string arabicExtended = "\u0671-\u06d3";
   63:         //arabicJoined = "\ufe81-\ufefc";
   64:  
   65:         private const string hiragana = "\u3041-\u309f";
   66:         private const string katakana = "\u30a0-\u30ff";
   67:         private const string katakanaPhonecticExtensions = "\u31f0-\u31ff";
   68:         private const string katakanaHalfwidth = "\uff65-\uff9f";
   69:  
   70:         private const string cjkUnifiedIdeographs = "\u4e00-\u9fbb";
   71:         private const string cjkUnifiedIdeographsExtentionA = "\u3400-\u4dbf";
   72:         private const string cjkUnifiedIdeographsExtentionB = "\u20000-\u200ff";
   73:         private const string cjkCompatibilityIdeographs = "\f900-\uf9ff";
   74:         private const string cjkCompatibilityIdeographsSupplement = "\u2f800-\u2f8bf";
   75:  
   76:         private const string hangulSyllables = "\uac00-\ud7af";
   77:         private const string hangulJamo = "\u1100-\u11ff";
   78:         private const string hangulCompatibilityJamo = "\u3130-\u318f";
   79:         private const string hangulHalfwidth = "\uffa0-\uffdc";
   80:  
   81:         private const string latin = latinPlainLower + "|" + latinPlainUpper + "|" + latinAccent;
   82:  
   83:         private const string cyrillic = @"\w+"; //cyrillic_plain + "|" + cyrillicSupplement + "|" + cyrillicExtendedA + "|" + cyrillicExtendedB + "|" + choneticExtensions;
   84:  
   85:         private const string arabic = arabicPlain + "|" + arabicAccent + "|" + arabicDigit + "|" + arabicKoran + "|" + arabicPoint + "|" + arabicKoranExtended;
   86:         private const string arabicNonWord = arabicAccent + "|" + arabicDigit + "|" + arabicKoran + "|" + arabicPoint + "|" + arabicKoranExtended;
   87:         private const string kana = hiragana + "|" + katakana + "|" + katakanaPhonecticExtensions + "|" + katakanaHalfwidth;
   88:         private const string hangul = hangulSyllables + "|" + hangulJamo + "|" + hangulCompatibilityJamo + "|" + hangulHalfwidth;
   89:  
   90:         //word = latin + "|" + arabic + "|" + kana + "|" + hangul;
   91:         //ideograph = cjk_unified_ideographs + "|" + cjk_unified_ideographs_extention_a + "|" + cjk_unified_ideographs_extention_b + "|" + cjk_compatibility_ideographs + "|" + cjk_compatibility_ideographs_supplement;
   92:  
   93:         //
   94:         // below: convert occatinal "أرز بسمتي أبيض عضوى" to UTF8 Arabic
   95:         //name = Encoding.UTF8.GetString(Encoding.Default.GetBytes(name));
   96:  
   97:         /// <summary/>
   98:         public string Id { get; set; }
   99:  
  100:         /// <summary/>
  101:         public string Symbol { get; set; }
  102:  
  103:         /// <summary/>
  104:         public string Name { get; set; }
  105:  
  106:         /// <summary/>
  107:         public string EnglishName { get; set; }
  108:  
  109:         /// <summary/>
  110:         public string ArabicName { get; set; }
  111:  
  112:         /// <summary/>
  113:         public string Iso6391 { get; set; }
  114:  
  115:         /// <summary/>
  116:         public string Iso63925 { get; set; }
  117:  
  118:         /// <summary/>
  119:         public string Iso6393 { get; set; }
  120:  
  121:         ////////////////////////////////////////////////////////////////////////////
  122:  
  123:         /// <summary>
  124:         /// 
  125:         /// </summary>
  126:         public Language()
  127:         {
  128:         }
  129:  
  130:         ////////////////////////////////////////////////////////////////////////////
  131:  
  132:         /// <summary>
  133:         /// 
  134:         /// </summary>
  135:         public Language(string iso_639_1)
  136:         {
  137:             Language language;
  138:  
  139:             language = LanguageByIso6391(iso_639_1);
  140:  
  141:             this.Id = language.Id;
  142:             this.Name = language.Name;
  143:             this.Symbol = language.Symbol;
  144:             this.EnglishName = language.EnglishName;
  145:             this.ArabicName = language.ArabicName;
  146:         }
  147:  
  148:         ////////////////////////////////////////////////////////////////////////////
  149:  
  150:         /// <summary>
  151:         /// 
  152:         /// </summary>
  153:         public static bool HasArabicLetter(string line)
  154:         {
  155:             bool has;
  156:  
  157:             if (line != null)
  158:             {
  159:                 has = Regex.IsMatch(line, "[" + arabic + "]");
  160:             }
  161:             else has = false;
  162:  
  163:             return has;
  164:         }
  165:  
  166:         ////////////////////////////////////////////////////////////////////////////
  167:  
  168:         /// <summary>
  169:         /// 
  170:         /// </summary>
  171:         public static ArrayList ListOfAllArabicWords
  172:         {
  173:             get
  174:             {
  175:                 string u;
  176:                 ArrayList wordArrayList;
  177:                 Assembly _assembly;
  178:                 //StreamReader streamReader;
  179:  
  180:                 wordArrayList = null;
  181:                 _assembly = Assembly.GetExecutingAssembly();
  182:  
  183:                 try
  184:                 {
  185:                     using (var streamReader = new StreamReader(_assembly.GetManifestResourceStream("Ia.Cl.model.data.language.List of all Arabic words.txt")))
  186:                     {
  187:                         wordArrayList = new ArrayList(100000);
  188:  
  189:                         if (streamReader.Peek() != -1)
  190:                         {
  191:                             while (!streamReader.EndOfStream)
  192:                             {
  193:                                 u = streamReader.ReadLine();
  194:                                 if (u.Length > 0) wordArrayList.Add(u.Trim());
  195:                             }
  196:                         }
  197:                     }
  198:                 }
  199:                 catch (Exception)
  200:                 {
  201:                     wordArrayList = null;
  202:                 }
  203:                 finally
  204:                 {
  205:                 }
  206:  
  207:                 return wordArrayList;
  208:             }
  209:         }
  210:  
  211:         ////////////////////////////////////////////////////////////////////////////
  212:  
  213:         /// <summary>
  214:         ///
  215:         /// </summary>
  216:         public static Language LanguageByIso6391(string iso_639_1)
  217:         {
  218:             Language language;
  219:  
  220:             language = (from q in XDocument.Elements("languageList").Elements("iso").Elements("language")
  221:                         where q.Attribute("iso_639_1").Value == iso_639_1
  222:                         select new Language
  223:                         {
  224:                             Id = q.Attribute("iso_639_1").Value,
  225:                             Symbol = q.Attribute("iso_639_1").Value,
  226:                             Name = q.Attribute("name").Value,
  227:                             EnglishName = q.Attribute("englishName").Value,
  228:                             ArabicName = q.Attribute("arabicName").Value
  229:                         }
  230:             ).First<Language>();
  231:  
  232:             return language;
  233:         }
  234:  
  235:         ////////////////////////////////////////////////////////////////////////////
  236:  
  237:         /// <summary>
  238:         ///
  239:         /// </summary>
  240:         public static string Ideograph(string language)
  241:         {
  242:             string s;
  243:  
  244:             s = "";
  245:  
  246:             if (language == "en") s = "";
  247:             else if (language == "es") s = "";
  248:             else if (language == "fr") s = "";
  249:             else if (language == "de") s = "";
  250:             else if (language == "nl") s = "";
  251:             else if (language == "ja") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
  252:             else if (language == "ko") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
  253:  
  254:             else if (language == "zh_traditional") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
  255:             else if (language == "zh_simplified") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
  256:             else if (language == "zh") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
  257:  
  258:             else if (language == "ar") s = "";
  259:  
  260:             return s;
  261:         }
  262:  
  263:         ////////////////////////////////////////////////////////////////////////////
  264:  
  265:         /// <summary>
  266:         ///
  267:         /// </summary>
  268:         public static string WordCharacters(string language)
  269:         {
  270:             string s;
  271:  
  272:             s = "";
  273:  
  274:             if (language == "en") s = latin;
  275:             else if (language == "es") s = latin;
  276:             else if (language == "fr") s = latin;
  277:             else if (language == "de") s = latin;
  278:             else if (language == "nl") s = latin;
  279:             else if (language == "ru") s = cyrillic;
  280:             else if (language == "ja") s = kana;
  281:             else if (language == "ko") s = hangul;
  282:             else if (language == "zh_traditional") s = null;
  283:             else if (language == "zh_simplified") s = null;
  284:             else if (language == "ar") s = arabic;
  285:  
  286:             return s;
  287:         }
  288:  
  289:         ////////////////////////////////////////////////////////////////////////////
  290:  
  291:         /// <summary>
  292:         ///
  293:         /// </summary>
  294:         public static string WordsRegularExpression(string language)
  295:         {
  296:             string s;
  297:  
  298:             if (language == "ja") s = "[" + hiragana + "]+|[" + katakana + "]+|[" + katakanaPhonecticExtensions + "]+|[" + katakanaHalfwidth + "]+";
  299:             else
  300:             {
  301:                 s = "[" + WordCharacters(language) + "]+";
  302:             }
  303:  
  304:             return s;
  305:         }
  306:  
  307:         ////////////////////////////////////////////////////////////////////////////
  308:  
  309:         /// <summary>
  310:         ///
  311:         /// </summary>
  312:         public static string BasicWord(string language)
  313:         {
  314:             string s;
  315:  
  316:             s = "";
  317:  
  318:             if (language == "en") s = latinPlainLower;
  319:             else if (language == "es") s = latinPlainLower;
  320:             else if (language == "fr") s = latinPlainLower;
  321:             else if (language == "de") s = latinPlainLower;
  322:             else if (language == "nl") s = latinPlainLower;
  323:             else if (language == "ru") s = cyrillic;
  324:             else if (language == "ja") s = kana;
  325:             else if (language == "ko") s = hangul;
  326:             else if (language == "zh_traditional") s = null;
  327:             else if (language == "zh_simplified") s = null;
  328:             else if (language == "ar") s = arabicPlain;
  329:  
  330:             return s;
  331:         }
  332:  
  333:         ////////////////////////////////////////////////////////////////////////////
  334:  
  335:         /// <summary>
  336:         ///
  337:         /// </summary>
  338:         public static string BasicWordsRegularExpression(string language)
  339:         {
  340:             string s;
  341:  
  342:             if (language == "ja") s = "[" + hiragana + "]+|[" + katakana + "]+|[" + katakanaPhonecticExtensions + "]+|[" + katakanaHalfwidth + "]+";
  343:             else
  344:             {
  345:                 s = "[" + BasicWord(language) + "]+";
  346:             }
  347:  
  348:             return s;
  349:         }
  350:  
  351:         ////////////////////////////////////////////////////////////////////////////
  352:  
  353:         /// <summary>
  354:         ///
  355:         /// </summary>
  356:         public static string NonWord(string language)
  357:         {
  358:             string s;
  359:  
  360:             s = "";
  361:  
  362:             if (language == "en") s = "";
  363:             else if (language == "es") s = "";
  364:             else if (language == "fr") s = "";
  365:             else if (language == "de") s = "";
  366:             else if (language == "nl") s = "";
  367:             else if (language == "ja") s = "";
  368:             else if (language == "ko") s = "";
  369:             else if (language == "zh_traditional") s = "";
  370:             else if (language == "zh_simplified") s = "";
  371:             else if (language == "ar") s = arabicNonWord;
  372:  
  373:             return s;
  374:         }
  375:  
  376:         ////////////////////////////////////////////////////////////////////////////
  377:  
  378:         /// <summary>
  379:         ///
  380:         /// </summary>
  381:         public static string BasicForm(string word)
  382:         {
  383:             // for Western languages, this function takes in a word and returns a copy of the word with all capital letters changed to small, and all
  384:             // accent letters to standard ASCII ones. For Japanese and Korean, on the other hand, this function is not yet defined. It will just return the
  385:             // same argument unchanged, for now.
  386:  
  387:             word = word.Replace("ß", "ss");
  388:             word = word.ToLowerInvariant();
  389:  
  390:             word = word.Replace("ٱ", "ا");
  391:             word = Regex.Replace(word, "[" + Ia.Cl.Models.Language.NonWord("ar") + "]", "");
  392:  
  393:             word = RemoveDiacritics(word);
  394:  
  395:             return word;
  396:         }
  397:  
  398:         ////////////////////////////////////////////////////////////////////////////
  399:  
  400:         /// <summary>
  401:         /// Remove punctuation marks
  402:         /// http://stackoverflow.com/questions/18830813/how-can-i-remove-punctuation-from-input-text-in-java
  403:         /// </summary>
  404:         public static string RemovePunctuationMarks(string text)
  405:         {
  406:             text = Regex.Replace(text, "\\p{P}", "");
  407:  
  408:             return text;
  409:         }
  410:  
  411:         ////////////////////////////////////////////////////////////////////////////
  412:  
  413:         /// <summary>
  414:         ///
  415:         /// </summary>
  416:         public static string RemoveDiacritics(string text)
  417:         {
  418:             // http://stackoverflow.com/questions/249087/how-do-i-remove-diacritics-accents-from-a-string-in-net
  419:  
  420:             var normalizedString = text.Normalize(NormalizationForm.FormD);
  421:             var stringBuilder = new StringBuilder();
  422:  
  423:             foreach (var c in normalizedString)
  424:             {
  425:                 var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
  426:                 if (unicodeCategory != UnicodeCategory.NonSpacingMark)
  427:                 {
  428:                     stringBuilder.Append(c);
  429:                 }
  430:             }
  431:  
  432:             return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
  433:         }
  434:  
  435:         ////////////////////////////////////////////////////////////////////////////
  436:  
  437:         /// <summary>
  438:         /// Generate an array of "similar" Arabic pronouciations of a word, like أحمد and احمد. 
  439:         /// </summary>
  440:         public static List<string> ProduceSimilarArabicWords(string word)
  441:         {
  442:             List<string> list;
  443:             Hashtable hashtable;
  444:  
  445:             hashtable = new Hashtable(20);
  446:             list = new List<string>();
  447:  
  448:             // add words to Hashtable:
  449:             hashtable[word] = 1;
  450:             hashtable[word.Replace("ـ", "")] = 1;
  451:  
  452:             hashtable[Regex.Replace(word, "\\bو", "و ")] = 1;
  453:             hashtable[Regex.Replace(word, "\\bو\\s+", "و")] = 1;
  454:  
  455:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "أ", "ا", ref hashtable);
  456:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "إ", "ا", ref hashtable);
  457:  
  458:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ى", "ي", ref hashtable);
  459:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ة", "ه", ref hashtable);
  460:  
  461:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "و", "ؤ", ref hashtable);
  462:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "عبد ", "عبد", ref hashtable);
  463:  
  464:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ابو ", "ابو", ref hashtable);
  465:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "داود ", "داوود", ref hashtable);
  466:  
  467:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "آ", "ءا", ref hashtable);
  468:             ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "آ", "ا", ref hashtable);
  469:  
  470:  
  471:  
  472:             foreach (string s in hashtable.Keys) if (s.Length > 0) list.Add(s);
  473:  
  474:             return list;
  475:         }
  476:  
  477:         ////////////////////////////////////////////////////////////////////////////
  478:  
  479:         /// <summary>
  480:         ///
  481:         /// </summary>
  482:         private static void ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(string word, string variant1, string variant2, ref Hashtable hashtable)
  483:         {
  484:             hashtable[word.Replace(variant1, variant2)] = 1;
  485:             hashtable[word.Replace(variant2, variant1)] = 1;
  486:         }
  487:  
  488:         ////////////////////////////////////////////////////////////////////////////
  489:  
  490:         /// <summary>
  491:         /// Convert Arabic numerals ١٢٣٤٥٦٧٨٩٠ to Latin 1234567890
  492:         /// </summary>
  493:         /// <param name="s">Arabic number in string format</param>
  494:         /// <returns>Latin equivalent</returns>
  495:         public static string ConvertArabicNumbersToLatin(string s)
  496:         {
  497:             s = s.Replace("١", "1");
  498:             s = s.Replace("٢", "2");
  499:             s = s.Replace("٣", "3");
  500:             s = s.Replace("٤", "4");
  501:             s = s.Replace("٥", "5");
  502:             s = s.Replace("٦", "6");
  503:             s = s.Replace("٧", "7");
  504:             s = s.Replace("٨", "8");
  505:             s = s.Replace("٩", "9");
  506:             s = s.Replace("٠", "0");
  507:  
  508:             return s;
  509:         }
  510:  
  511:         ////////////////////////////////////////////////////////////////////////////
  512:  
  513:         /// <summary>
  514:         /// Convert Latin numerals 1234567890 to Arabic ١٢٣٤٥٦٧٨٩٠
  515:         /// </summary>
  516:         /// <param name="s">Latin number in string format</param>
  517:         /// <returns>Arabic equivalent</returns>
  518:         public static string ConvertLatinNumbersToArabic(string s)
  519:         {
  520:             s = s.Replace("1", "١");
  521:             s = s.Replace("2", "٢");
  522:             s = s.Replace("3", "٣");
  523:             s = s.Replace("4", "٤");
  524:             s = s.Replace("5", "٥");
  525:             s = s.Replace("6", "٦");
  526:             s = s.Replace("7", "٧");
  527:             s = s.Replace("8", "٨");
  528:             s = s.Replace("9", "٩");
  529:             s = s.Replace("0", "٠");
  530:  
  531:             return s;
  532:         }
  533:  
  534:         ////////////////////////////////////////////////////////////////////////////
  535:  
  536:         /// <summary>
  537:         /// Correct an Arabic string to the proper format of Arabic
  538:         /// </summary>
  539:         /// <param name="name">Name to be examined</param>
  540:         /// <returns>String of correct format</returns>
  541:         public static string CorrectArabicNameNounStringFormat(string name)
  542:         {
  543:             name = Regex.Replace(name, @"\s+", @" ");
  544:             name = name.Trim();
  545:  
  546:             // remove all 'ـ' chars
  547:             name = name.Replace("ـ", "");
  548:  
  549:             // last 'ه' to 'ة' (on word border)
  550:             // exceptions: 'الله' ...etc.
  551:             if (!Regex.IsMatch(name, "\\bشاه\\b")) name = Regex.Replace(name, "ه\\b", "ة");
  552:             name = name.Replace("اللة", "الله");
  553:  
  554:             // remove first 'دكتور' 'د' 'الدكتور'
  555:             name = Regex.Replace(name, "\\bدكتور\\b", "");
  556:             name = Regex.Replace(name, "\\bالدكتور\\b", "");
  557:             name = Regex.Replace(name, "\\bدكتورة\\b", "");
  558:             name = Regex.Replace(name, "\\bالدكتورة\\b", "");
  559:             name = Regex.Replace(name, "\\bد\\b", "");
  560:  
  561:             name = Regex.Replace(name, @"\bعبد\s+", "عبد");
  562:             name = Regex.Replace(name, @"\s+و\s+", " و");
  563:  
  564:             // first ''last 'ى' to 'ي' (on word border)
  565:             if (!name.Contains("يسرى")
  566:                 && !name.Contains("يحيى")
  567:                 && !name.Contains("هدى")
  568:                 && !name.Contains("سلمى")
  569:                 && !name.Contains("منى")
  570:                 && !name.Contains("منتهى")
  571:                 && !name.Contains("ليلى")
  572:                 && !name.Contains("عيسى")
  573:                 && !name.Contains("موسى")
  574:                 && !name.Contains("سلوى")
  575:                 && !name.Contains("بشرى")
  576:                 && !name.Contains("صغرى")
  577:                 && !name.Contains("صدى")
  578:                 && !name.Contains("كبرى")
  579:                 && !name.Contains("مصطفى")
  580:                 && !name.Contains("ندى")
  581:                 && !name.Contains("يسرى")
  582:                 && !name.Contains("يمنى")
  583:                 && !name.Contains("مستشفى")
  584:                 && !name.Contains("تقوى")
  585:                 && !name.Contains("ذكرى")
  586:                 && !name.Contains("بشرى")
  587:                 && !name.Contains("موسيقى")
  588:                 && !name.Contains("ذكرى")
  589:                 && !name.Contains("ضحى")
  590:                 && !name.Contains("لبنى")
  591:                 && !name.Contains("ذكرى")
  592:                 && !name.Contains("مقتدى")
  593:                 && !name.Contains("مقهى")
  594:                 && !name.Contains("ملهى")
  595:                 && !name.Contains("منتدى")
  596:                 && !name.Contains("منتهى")
  597:                 && !name.Contains("يمنى")
  598:                 && !name.Contains("مرتضى")
  599:                 ) name = Regex.Replace(name, "ى\\b", "ي");
  600:  
  601:             // 
  602:             name = Regex.Replace(name, "\\bاحمد", "أحمد");
  603:             name = Regex.Replace(name, "\\bازياء", "أزياء");
  604:             name = Regex.Replace(name, "\\bاوكسجين", "أوكسجين");
  605:             name = Regex.Replace(name, "\\bاقبال", "إقبال");
  606:             name = Regex.Replace(name, "\\bابيار", "أبيار");
  607:             name = Regex.Replace(name, "اسنان", "أسنان");
  608:             name = Regex.Replace(name, "[أ|ا]براهيم", "إبراهيم");
  609:             name = Regex.Replace(name, "[أ|ا]سماعيل", "إسماعيل");
  610:             name = Regex.Replace(name, "اجياد", "أجياد");
  611:             name = Regex.Replace(name, "\\bامل\\b", "أمل");
  612:             name = Regex.Replace(name, "\\bايوب\\b", "أيوب");
  613:             name = Regex.Replace(name, "\\bايهاب\\b", "إيهاب");
  614:             name = Regex.Replace(name, "\\bايمن\\b", "أيمن");
  615:             name = Regex.Replace(name, "\\bايمان\\b", "إيمان");
  616:             name = Regex.Replace(name, "\\bاياد\\b", "أياد");
  617:             name = Regex.Replace(name, "\\bانيسة\\b", "أنيسة");
  618:             name = Regex.Replace(name, "\\bانيس\\b", "أنيس");
  619:             name = Regex.Replace(name, "\\bانور\\b", "أنور");
  620:             name = Regex.Replace(name, "\\bانوار\\b", "أنوار");
  621:             name = Regex.Replace(name, "\\bامينة\\b", "أمينة");
  622:             name = Regex.Replace(name, "\\bامين\\b", "أمين");
  623:             name = Regex.Replace(name, "\\bاميمة\\b", "أميمة");
  624:             name = Regex.Replace(name, "\\bامير\\b", "أمير");
  625:             name = Regex.Replace(name, "\\bاميرة\\b", "أميرة");
  626:             name = Regex.Replace(name, "\\bامنة\\b", "آمنة");
  627:             name = Regex.Replace(name, "\\bامثال\\b", "أمثال");
  628:             name = Regex.Replace(name, "\\bاماني\\b", "أماني");
  629:             name = Regex.Replace(name, "\\bامان\\b", "أمان");
  630:             name = Regex.Replace(name, "\\bامال\\b", "آمال");
  631:             name = Regex.Replace(name, "\\bام\\b", "أم");
  632:             name = Regex.Replace(name, "\\bالهام\\b", "إلهام");
  633:  
  634:             // 'أل to 'ال'
  635:             name = Regex.Replace(name, "\\bأل", "ال");
  636:  
  637:             // 'اا' at begining to 'ا'
  638:             name = name.Replace("\\bاا", "ا");
  639:  
  640:             // reduce any 3 concecutive similar arabic letters to only 2
  641:             name = name.Replace(@"(\d)\1\1", @"\1\1");
  642:  
  643:             name = Regex.Replace(name, @"\s+", @" ");
  644:             name = name.Trim();
  645:  
  646:             return name;
  647:         }
  648:  
  649:         ////////////////////////////////////////////////////////////////////////////
  650:  
  651:         /// <summary>
  652:         /// Remove non latin characters
  653:         /// </summary>
  654:         /// <param name="line">Line to be cleaned</param>
  655:         /// <returns>String of name cleaned</returns>
  656:         public static string RemoveNonLatinCharacters(string line)
  657:         {
  658:             line = Regex.Replace(line, "[^ " + latin + "]", "");
  659:  
  660:             return line;
  661:         }
  662:  
  663:         ////////////////////////////////////////////////////////////////////////////
  664:  
  665:         /// <summary>
  666:         /// Remove non Arabic and non Arabic-Extended letters and digits
  667:         /// </summary>
  668:         /// <param name="iso6393"></param>
  669:         /// <param name="line">Line to filter</param>
  670:         /// <returns>Filtered line</returns>
  671:         public static string RemoveNonNativeAndNonNativeExtendedLettersAndDigitsAccordingToLanguage(string iso6393, string line)
  672:         {
  673:             if (iso6393 == "ar")
  674:             {
  675:                 line = Regex.Replace(line, "[^ " + arabicPlain + "|" + arabicDigit + "|" + arabicExtended + "]", "");
  676:             }
  677:             else
  678:             {
  679:  
  680:             }
  681:  
  682:             return line;
  683:         }
  684:  
  685:         ////////////////////////////////////////////////////////////////////////////
  686:  
  687:         /// <summary>
  688:         /// Convert single latin digits to equivalent native word digits according to language
  689:         /// </summary>
  690:         /// <param name="iso6393"></param>
  691:         /// <param name="line">String to process</param>
  692:         /// <returns>Filtered string</returns>
  693:         public static string ConvertSingleLatinDigitsToNativeWordEquivalentAccordingToLanguage(string iso6393, string line)
  694:         {
  695:             if (iso6393 == "ar")
  696:             {
  697:                 line = ConvertArabicNumbersToLatin(line);
  698:  
  699:                 line = line.Replace("1", "واحد");
  700:                 line = line.Replace("2", "إثنين");
  701:                 line = line.Replace("3", "ثلاثة");
  702:                 line = line.Replace("4", "أربعة");
  703:                 line = line.Replace("5", "خمسة");
  704:                 line = line.Replace("6", "ستة");
  705:                 line = line.Replace("7", "سبعة");
  706:                 line = line.Replace("8", "ثمانية");
  707:                 line = line.Replace("9", "تسعة");
  708:                 line = line.Replace("0", "صفر");
  709:             }
  710:             else
  711:             {
  712:  
  713:             }
  714:  
  715:             return line;
  716:         }
  717:  
  718:         ////////////////////////////////////////////////////////////////////////////
  719:  
  720:         /// <summary>
  721:         /// Removes the latin transliterations of subject references of native words according to language. For example "الصيد" could be transliterated to "al sayid", and we want to remove the "al" from the transliteration.
  722:         /// </summary>
  723:         /// <param name="iso6393"></param>
  724:         /// <param name="line">Line to be probed and cleaned</param>
  725:         /// <returns>String of line cleaned</returns>
  726:         public static string RemoveLatinTransliterationsOfSubjectReferencesOfNativeWordTransliterationAccordingToLanguagee(string iso6393, string line)
  727:         {
  728:             // below: Arabic words
  729:             if (iso6393 == "ar")
  730:             {
  731:                 line = Regex.Replace(line, "\\bas\\b", "", RegexOptions.IgnoreCase);
  732:                 line = Regex.Replace(line, "\\bal\\b", "", RegexOptions.IgnoreCase);
  733:                 line = Regex.Replace(line, "\\bash\\b", "", RegexOptions.IgnoreCase);
  734:                 line = Regex.Replace(line, "\\bat\\b", "", RegexOptions.IgnoreCase);
  735:  
  736:                 line = Regex.Replace(line, "\\bad\\b", "", RegexOptions.IgnoreCase);
  737:                 line = Regex.Replace(line, "\\bar\\b", "", RegexOptions.IgnoreCase);
  738:                 line = Regex.Replace(line, "\\ban\\b", "", RegexOptions.IgnoreCase);
  739:                 line = Regex.Replace(line, "\\bath\\b", "", RegexOptions.IgnoreCase);
  740:  
  741:                 line = Regex.Replace(line, "\\baz\\b", "", RegexOptions.IgnoreCase);
  742:                 line = Regex.Replace(line, "\\baz̧( |\\b)", "", RegexOptions.IgnoreCase); // different than above
  743:             }
  744:             else
  745:             {
  746:  
  747:             }
  748:  
  749:             return line;
  750:         }
  751:  
  752:         ////////////////////////////////////////////////////////////////////////////
  753:  
  754:         /// <summary>
  755:         /// Produce Arabic Text of Latin Transliterations of Arabic Word Definit Article
  756:         /// </summary>
  757:         /// <param name="name">Latin Transliteration of Arabic word</param>
  758:         /// <returns>Arabic text</returns>
  759:         public static string ProduceArabicTextOfLatinTransliterationsOfArabicWordDefinitArticle(string name)
  760:         {
  761:             name = Regex.Replace(name, "\\bas\\s*\\b", "ال", RegexOptions.IgnoreCase);
  762:             name = Regex.Replace(name, "\\bal\\s*\\b", "ال", RegexOptions.IgnoreCase);
  763:             name = Regex.Replace(name, "\\bash\\s*\\b", "ال", RegexOptions.IgnoreCase);
  764:             name = Regex.Replace(name, "\\bat\\s*\\b", "ال", RegexOptions.IgnoreCase);
  765:             name = Regex.Replace(name, "\\bad\\s*\\b", "ال", RegexOptions.IgnoreCase);
  766:             name = Regex.Replace(name, "\\bar\\s*\\b", "ال", RegexOptions.IgnoreCase);
  767:             name = Regex.Replace(name, "\\ban\\s*\\b", "ال", RegexOptions.IgnoreCase);
  768:             name = Regex.Replace(name, "\\bath\\s*\\b", "ال", RegexOptions.IgnoreCase);
  769:             name = Regex.Replace(name, "\\baz\\s*\\b", "ال", RegexOptions.IgnoreCase);
  770:  
  771:             name = Regex.Replace(name, "\\baş\\s*\\b", "ال", RegexOptions.IgnoreCase);
  772:             name = Regex.Replace(name, "\\baţ\\s*\\b", "ال", RegexOptions.IgnoreCase);
  773:             name = Regex.Replace(name, "\\baḑ\\s*\\b", "ال", RegexOptions.IgnoreCase);
  774:             name = Regex.Replace(name, "\\badh\\s*\\b", "ال", RegexOptions.IgnoreCase);
  775:  
  776:             name = Regex.Replace(name, "\\baz̧\\s*\\b", "ال", RegexOptions.IgnoreCase);
  777:             name = Regex.Replace(name, "\\baş\\s*\\b", "ال", RegexOptions.IgnoreCase);
  778:  
  779:             return name;
  780:         }
  781:  
  782:         ////////////////////////////////////////////////////////////////////////////
  783:  
  784:         /// <summary>
  785:         /// Remove space after the latin transliteration of Arabic word's definit article
  786:         /// </summary>
  787:         /// <param name="name">Latin Transliteration of Arabic word with article space</param>
  788:         /// <returns>Latin transliteration without article space</returns>
  789:         public static string RemoveSpaceAfterLatinTransliterationsOfArabicWordsDefinitArticle(string name)
  790:         {
  791:             name = Regex.Replace(name, "\\b(as|al|ash|at|ad|ar|an|ath|az|aş|aţ|aḑ|adh|az̧)\\s*\\b", "$1", RegexOptions.IgnoreCase);
  792:             // note two different z and z̧
  793:  
  794:             return name;
  795:         }
  796:  
  797:         ////////////////////////////////////////////////////////////////////////////
  798:  
  799:         /// <summary>
  800:         /// Remove the wrong space between a native definit article and its word.
  801:         /// </summary>
  802:         /// <param name="iso6393"></param>
  803:         /// <param name="name">Native text</param>
  804:         /// <returns>Natieve text</returns>
  805:         public static string RemoveWrongSpaceBetweenNativeDefinitArticleAndItsWord(string iso6393, string name)
  806:         {
  807:             if (iso6393 == "ar")
  808:             {
  809:                 name = Regex.Replace(name, "\\b[أ|ا]ت ت", "الت", RegexOptions.IgnoreCase);
  810:                 name = Regex.Replace(name, "\\b[أ|ا]ث ث", "الث", RegexOptions.IgnoreCase);
  811:                 name = Regex.Replace(name, "\\b[أ|ا]د د", "الد", RegexOptions.IgnoreCase);
  812:                 name = Regex.Replace(name, "\\b[أ|ا]ذ ذ", "الذ", RegexOptions.IgnoreCase);
  813:                 name = Regex.Replace(name, "\\b[أ|ا]ر ر", "الر", RegexOptions.IgnoreCase);
  814:  
  815:                 name = Regex.Replace(name, "\\b[أ|ا]ز ز", "الز", RegexOptions.IgnoreCase);
  816:                 name = Regex.Replace(name, "\\b[أ|ا]س س", "الس", RegexOptions.IgnoreCase);
  817:                 name = Regex.Replace(name, "\\b[أ|ا]ش ش", "الش", RegexOptions.IgnoreCase);
  818:                 name = Regex.Replace(name, "\\b[أ|ا]ص ص", "الص", RegexOptions.IgnoreCase);
  819:                 name = Regex.Replace(name, "\\b[أ|ا]ض ض", "الض", RegexOptions.IgnoreCase);
  820:  
  821:                 name = Regex.Replace(name, "\\b[أ|ا]ط ط", "الط", RegexOptions.IgnoreCase);
  822:                 name = Regex.Replace(name, "\\b[أ|ا]ظ ظ", "الظ", RegexOptions.IgnoreCase);
  823:                 name = Regex.Replace(name, "\\b[أ|ا]ل ل", "الل", RegexOptions.IgnoreCase);
  824:                 name = Regex.Replace(name, "\\b[أ|ا]ن ن", "الن", RegexOptions.IgnoreCase);
  825:  
  826:                 name = Regex.Replace(name, "\\b[أ|ا]ل ", "ال", RegexOptions.IgnoreCase);
  827:             }
  828:             else
  829:             {
  830:  
  831:             }
  832:  
  833:             return name;
  834:         }
  835:  
  836:         ////////////////////////////////////////////////////////////////////////////
  837:  
  838:         /// <summary>
  839:         /// Produce approximate native text of latin transliterations of native words
  840:         /// </summary>
  841:         /// <param name="iso6393"></param>
  842:         /// <param name="name">Latin transliteration of native word</param>
  843:         /// <returns>Approximate native text</returns>
  844:         public static string ProduceApproximateNativeTextOfLatinTransliterationsOfNativeWordsAccrodingToLanguage(string iso6393, string name)
  845:         {
  846:             /*
  847:              * To match:
  848:              * '’' use '\u2019'
  849:              * '‘' use '\u2018'
  850:              */
  851:  
  852:             if (iso6393 == "ar")
  853:             {
  854:                 name = name.ToLower();
  855:  
  856:                 // for some reason I can not match "'\b"
  857:                 // does not work: name = Regex.Replace(name, "i\u2018\\b", "ع");
  858:                 // works(?): name = Regex.Replace(name, "i\u2018(\\b|$)", "ع");
  859:  
  860:                 name = Regex.Replace(name, "ayyā", "يا");
  861:                 name = Regex.Replace(name, "iyah\\b", "ية");
  862:                 name = name.Replace("dhdh", "ذ");
  863:                 name = name.Replace("thth", "ث");
  864:                 name = name.Replace("shsh", "ش");
  865:                 name = Regex.Replace(name, "deid", "ديد");
  866:  
  867:                 name = Regex.Replace(name, "lay", "لي");
  868:                 name = Regex.Replace(name, "way", "وي");
  869:                 name = Regex.Replace(name, "ain", "ين");
  870:                 name = Regex.Replace(name, "llá\\b", "لا");
  871:                 name = Regex.Replace(name, "iyā", "يا");
  872:                 name = name.Replace("yya", "ي");
  873:                 name = Regex.Replace(name, "\\bAya", "أيا");
  874:  
  875:                 name = name.Replace("mm", "م");
  876:                 name = name.Replace("bb", "ب");
  877:                 name = name.Replace("dd", "د");
  878:                 name = name.Replace("ff", "ف");
  879:                 name = name.Replace("ss", "س");
  880:                 name = name.Replace("ll", "ل");
  881:                 name = name.Replace("rr", "ر");
  882:                 name = name.Replace("zz", "ز");
  883:                 name = name.Replace("nn", "ن");
  884:                 name = name.Replace("jj", "ج");
  885:                 name = name.Replace("ww", "و");
  886:                 name = name.Replace("qq", "ق");
  887:                 name = name.Replace("tt", "ت");
  888:                 name = name.Replace("ḩḩ", "ح");
  889:                 name = name.Replace("kk", "ك");
  890:                 name = name.Replace("ţţ", "ط");
  891:                 name = name.Replace("şş", "ص");
  892:                 name = name.Replace("ḑḑ", "ض"); // not same
  893:                 name = name.Replace("ḍḍ", "ض");
  894:                 name = name.Replace("ay", "ي");
  895:                 name = name.Replace("au", "و");
  896:                 name = name.Replace("āy", "اي");
  897:                 name = name.Replace("kh", "خ");
  898:                 name = name.Replace("sh", "ش");
  899:                 name = name.Replace("th", "ث");
  900:                 name = name.Replace("dh", "ض");
  901:                 name = name.Replace("gh", "غ");
  902:  
  903:                 name = Regex.Replace(name, "ah\\b", "ة");
  904:                 name = Regex.Replace(name, "āt\\b", "ات");
  905:                 name = Regex.Replace(name, "at\\b", "ات");
  906:                 name = Regex.Replace(name, "ā\u2019i", "ائ");
  907:                 name = Regex.Replace(name, "ā\u2019(\\b|$)", "اء");
  908:                 name = Regex.Replace(name, "\u2018ā(\\b|$)", "عا");
  909:                 name = Regex.Replace(name, "\u2018a", "ع");
  910:                 name = Regex.Replace(name, "\u2018u", "ع");
  911:                 name = Regex.Replace(name, "\u2018ū", "عو");
  912:                 name = Regex.Replace(name, "\u2018ī", "عي");
  913:                 name = Regex.Replace(name, "i\u2018(\\b|$)", "ع");
  914:                 name = Regex.Replace(name, "i\u2018", "ئ");
  915:                 name = Regex.Replace(name, "i\u2019", "ئ");
  916:                 name = Regex.Replace(name, "ay\u2018\\b", "يع");
  917:                 name = Regex.Replace(name, "ay\\b", "ي");
  918:                 name = Regex.Replace(name, "ei", "ي");
  919:                 name = Regex.Replace(name, "yā", "يا");
  920:                 name = Regex.Replace(name, "ya", "ي");
  921:                 name = Regex.Replace(name, "īt\\b", "يت");
  922:                 name = Regex.Replace(name, "ḩá\\b", "حا");
  923:  
  924:                 name = Regex.Replace(name, "t\\b", "ة");
  925:                 name = name.Replace("ş", "ص");
  926:                 name = name.Replace("ḑ", "ض"); // not same
  927:                 name = name.Replace("ḍ", "ض");
  928:                 name = name.Replace("ţ", "ط");
  929:                 name = name.Replace("ā", "ا");
  930:                 name = name.Replace("a’", "ا");
  931:                 name = name.Replace("ī", "ي");
  932:                 name = name.Replace("ū", "و");
  933:                 name = name.Replace("ḩ", "ح");
  934:  
  935:                 name = Regex.Replace(name, "\\ba", "أ");
  936:                 name = Regex.Replace(name, "\\bu", "أ");
  937:                 name = Regex.Replace(name, "\\bi", "أ");
  938:                 name = Regex.Replace(name, "i\\b", "ي");
  939:                 name = Regex.Replace(name, "y\\b", "ي");
  940:  
  941:                 name = name.Replace("a", "");
  942:                 name = name.Replace("b", "ب");
  943:                 name = name.Replace("t", "ت");
  944:                 //name = name.Replace("c", "");
  945:                 name = name.Replace("j", "ج");
  946:                 name = name.Replace("g", "ج");
  947:                 name = name.Replace("d", "د");
  948:                 name = name.Replace("r", "ر");
  949:                 name = name.Replace("r", "ر");
  950:                 name = name.Replace("s", "س");
  951:                 name = name.Replace("t", "ت");
  952:                 name = name.Replace("d", "د");
  953:                 name = name.Replace("e", "");
  954:                 name = name.Replace("h", "ه");
  955:                 name = name.Replace("i", "");
  956:                 name = name.Replace("j", "ج");
  957:                 name = name.Replace("p", "ب");
  958:                 name = name.Replace("f", "ف");
  959:                 name = name.Replace("q", "ق");
  960:                 name = name.Replace("k", "ك");
  961:                 name = name.Replace("l", "ل");
  962:                 name = name.Replace("m", "م");
  963:                 name = name.Replace("n", "ن");
  964:                 name = name.Replace("h", "ه");
  965:                 name = name.Replace("u", "");
  966:                 name = name.Replace("v", "ف");
  967:                 name = name.Replace("o", "");
  968:                 name = name.Replace("w", "و");
  969:                 name = name.Replace("y", "");
  970:                 name = name.Replace("w", "و");
  971:                 name = name.Replace("z̧", "ظ"); // note this is not 'z'. KEEP THIS ORDER.
  972:                 name = name.Replace("z", "ز");
  973:                 name = name.Replace("á", "اء");
  974:  
  975:                 name = Regex.Replace(name, "\\b\u2018", "ع");
  976:  
  977:                 //name = name.Replace("7", "ح");
  978:                 //name = name.Replace("3", "ع");
  979:                 //name = name.Replace("6", "ط");
  980:  
  981:                 // name = name.Replace("", "");
  982:                 // name = Regex.Replace(name, "\\b\\b", "");
  983:             }
  984:             else
  985:             {
  986:  
  987:             }
  988:  
  989:             return name;
  990:         }
  991:  
  992:         ////////////////////////////////////////////////////////////////////////////
  993:  
  994:         /// <summary>
  995:         ///
  996:         /// </summary>
  997:         public static string ProduceExactNativeTextOfSingleLatinTransliterationsOfNativeWordsAccordingToLanguage(string iso6393, string name)
  998:         {
  999:             /*
 1000:              * Remove english worlds like Island, Airport, Ridge, North, Ridge, Customs, Long, South, East, Inner,
 1001:              * Channel, Reef Islet...etc.
 1002:              * 
 1003:              * To match:
 1004:              * '’' use '\u2019'
 1005:              * '‘' use "\u2018'
 1006:              */
 1007:  
 1008:             if (iso6393 == "ar")
 1009:             {
 1010:                 // kw
 1011:                 name = Regex.Replace(name, "\\bKhalij\\b", "خليج", RegexOptions.IgnoreCase);
 1012:                 // name = Regex.Replace(name, "\\bAl Kuwait\\b", "الكويت", RegexOptions.IgnoreCase);
 1013:                 //name = Regex.Replace(name, "\\bDā’irī\\b", "دائري", RegexOptions.IgnoreCase);
 1014:  
 1015:                 name = Regex.Replace(name, "\\bSha\u2018īb\\b", "شعيب", RegexOptions.IgnoreCase);
 1016:                 name = Regex.Replace(name, "\\bSha\u2018ib\\b", "شعيب", RegexOptions.IgnoreCase);
 1017:                 name = Regex.Replace(name, "\\bShi\u2018b\\b", "شعب", RegexOptions.IgnoreCase);
 1018:  
 1019:                 name = Regex.Replace(name, "\\bAbū\\b", "أبو", RegexOptions.IgnoreCase);
 1020:                 name = Regex.Replace(name, "\\bBū\\b", "أبو", RegexOptions.IgnoreCase);
 1021:                 name = Regex.Replace(name, "\\bḨisū\\b", "حسو", RegexOptions.IgnoreCase);
 1022:  
 1023:                 name = Regex.Replace(name, "\\bSayḩ\\b", "سيح", RegexOptions.IgnoreCase);
 1024:                 name = Regex.Replace(name, "\\bKharmat\\b", "خرمة", RegexOptions.IgnoreCase);
 1025:  
 1026:                 name = Regex.Replace(name, "\\bŢawī\\b", "طوي", RegexOptions.IgnoreCase);
 1027:                 name = Regex.Replace(name, "\\bKhaţmat\\b", "خطمة", RegexOptions.IgnoreCase);
 1028:                 name = Regex.Replace(name, "\\bYadd\\b", "يد", RegexOptions.IgnoreCase);
 1029:                 name = Regex.Replace(name, "\\bQāriḩat\\b", "قارحات", RegexOptions.IgnoreCase);
 1030:                 name = Regex.Replace(name, "\\bGhāfat\\b", "غافة", RegexOptions.IgnoreCase);
 1031:                 name = Regex.Replace(name, "\\bWara\\b", "وارة", RegexOptions.IgnoreCase);
 1032:                 name = Regex.Replace(name, "\\bYabbah\\b", "يبة", RegexOptions.IgnoreCase);
 1033:                 name = Regex.Replace(name, "\\bBidud\\b", "بدود", RegexOptions.IgnoreCase);
 1034:                 name = Regex.Replace(name, "\\bMayy\\b", "مي", RegexOptions.IgnoreCase);
 1035:                 name = Regex.Replace(name, "\\bYarīrah\\b", "يريرة", RegexOptions.IgnoreCase);
 1036:                 name = Regex.Replace(name, "\\bNā\u2019if\\b", "نايف", RegexOptions.IgnoreCase);
 1037:                 name = Regex.Replace(name, "\\bBarqat\\b", "برقة", RegexOptions.IgnoreCase);
 1038:                 name = Regex.Replace(name, "\\bHaz\u2019", "هزع", RegexOptions.IgnoreCase);
 1039:                 name = Regex.Replace(name, "\\bDhi\u2019b", "ذئب", RegexOptions.IgnoreCase);
 1040:                 name = Regex.Replace(name, "\\bSuyūl\\b", "سيول", RegexOptions.IgnoreCase);
 1041:                 name = Regex.Replace(name, "\\bFā\u2019iyah\\b", "فائية", RegexOptions.IgnoreCase);
 1042:                 name = Regex.Replace(name, "Khawr Fakkān", "خورفكان", RegexOptions.IgnoreCase);
 1043:  
 1044:                 name = Regex.Replace(name, "\\bMusá\\b", "موسى", RegexOptions.IgnoreCase);
 1045:  
 1046:                 name = Regex.Replace(name, "\\bMuhammad\\b", "محمد", RegexOptions.IgnoreCase);
 1047:                 name = Regex.Replace(name, "\\bMohammad\\b", "محمد", RegexOptions.IgnoreCase);
 1048:                 name = Regex.Replace(name, "\\bMuhammed\\b", "محمد", RegexOptions.IgnoreCase);
 1049:                 name = Regex.Replace(name, "\\bMohammed\\b", "محمد", RegexOptions.IgnoreCase);
 1050:                 name = Regex.Replace(name, "\\bAhmad\\b", "أحمد", RegexOptions.IgnoreCase);
 1051:                 name = Regex.Replace(name, "\\bAhmed\\b", "أحمد", RegexOptions.IgnoreCase);
 1052:  
 1053:                 name = Regex.Replace(name, "\\bWādī\\b", "وادي", RegexOptions.IgnoreCase);
 1054:                 name = Regex.Replace(name, "\\bWadi\\b", "وادي", RegexOptions.IgnoreCase);
 1055:                 name = Regex.Replace(name, "\\bWâdi\\b", "وادي", RegexOptions.IgnoreCase);
 1056:                 name = Regex.Replace(name, "\\bWād\\b", "وادي", RegexOptions.IgnoreCase);
 1057:                 name = Regex.Replace(name, "\\bWudayy\\b", "ودي", RegexOptions.IgnoreCase);
 1058:  
 1059:                 name = Regex.Replace(name, "\\bJaww\\b", "جو", RegexOptions.IgnoreCase);
 1060:                 name = Regex.Replace(name, "\\bDarb\\b", "درب", RegexOptions.IgnoreCase);
 1061:                 name = Regex.Replace(name, "\\bManākh\\b", "مناخ", RegexOptions.IgnoreCase);
 1062:                 name = Regex.Replace(name, "\\bJibāl\\b", "جبال", RegexOptions.IgnoreCase);
 1063:                 name = Regex.Replace(name, "\\bJibal\\b", "جبال", RegexOptions.IgnoreCase);
 1064:                 name = Regex.Replace(name, "\\bRawḑat\\b", "روضة", RegexOptions.IgnoreCase);
 1065:  
 1066:                 name = Regex.Replace(name, "\\bNufūd\\b", "نفود", RegexOptions.IgnoreCase);
 1067:                 name = Regex.Replace(name, "\\bNafūd\\b", "نفود", RegexOptions.IgnoreCase);
 1068:  
 1069:                 name = Regex.Replace(name, "\\bMazra\u2018at\\b", "مزرعة", RegexOptions.IgnoreCase);
 1070:                 name = Regex.Replace(name, "\\bFayḑat\\b", "فيضة", RegexOptions.IgnoreCase);
 1071:                 name = Regex.Replace(name, "\\bKhashm\\b", "خشم", RegexOptions.IgnoreCase);
 1072:  
 1073:                 name = Regex.Replace(name, "\\bĀbār\\b", "آبار", RegexOptions.IgnoreCase);
 1074:                 name = Regex.Replace(name, "\\bBi\u2019r\\b", "بئر", RegexOptions.IgnoreCase);
 1075:                 name = Regex.Replace(name, "\\bBi\u2018r\\b", "بئر", RegexOptions.IgnoreCase);
 1076:  
 1077:                 name = Regex.Replace(name, "\\bQalīb\\b", "قليب", RegexOptions.IgnoreCase);
 1078:                 name = Regex.Replace(name, "\\bQulayyib\\b", "قليب", RegexOptions.IgnoreCase);
 1079:                 name = Regex.Replace(name, "\\bQulbān\\b", "قلبان", RegexOptions.IgnoreCase);
 1080:  
 1081:                 name = Regex.Replace(name, "\\bḐulay\u2018\\b", "ضليع", RegexOptions.IgnoreCase);
 1082:                 name = Regex.Replace(name, "\\bḐulū\u2018\\b", "ضلوع", RegexOptions.IgnoreCase);
 1083:  
 1084:                 name = Regex.Replace(name, "\\bḨaşāt\\b", "حصاة", RegexOptions.IgnoreCase);
 1085:  
 1086:                 name = Regex.Replace(name, "\\bJabal\\b", "جبل", RegexOptions.IgnoreCase);
 1087:                 name = Regex.Replace(name, "\\bJubayl\\b", "جبيل", RegexOptions.IgnoreCase);
 1088:  
 1089:  
 1090:                 name = Regex.Replace(name, "\\bHijrat\\b", "هجرة", RegexOptions.IgnoreCase);
 1091:                 name = Regex.Replace(name, "\\bḤazm\\b", "حزم", RegexOptions.IgnoreCase);
 1092:                 name = Regex.Replace(name, "\\bḨazm\\b", "حزم", RegexOptions.IgnoreCase);
 1093:                 name = Regex.Replace(name, "\\bḨuzūm\\b", "حزوم", RegexOptions.IgnoreCase);
 1094:  
 1095:                 name = Regex.Replace(name, "\\bKhabrā\u2019\\b", "خبراء", RegexOptions.IgnoreCase);
 1096:                 name = Regex.Replace(name, "\\bKhubayrā\u2019\\b", "خبيراء", RegexOptions.IgnoreCase);
 1097:                 name = Regex.Replace(name, "\\bKhabrat\\b", "خبرة", RegexOptions.IgnoreCase);
 1098:  
 1099:                 name = Regex.Replace(name, "\\bFāj\\b", "فج", RegexOptions.IgnoreCase);
 1100:                 name = Regex.Replace(name, "\\bSabkhat\\b", "صبخة", RegexOptions.IgnoreCase);
 1101:                 name = Regex.Replace(name, "\\bQā\u2018\\b", "قاع", RegexOptions.IgnoreCase);
 1102:                 name = Regex.Replace(name, "\\bḨalāt\\b", "حالة", RegexOptions.IgnoreCase);
 1103:  
 1104:                 name = Regex.Replace(name, "\u2018Irq\\b", "عرق", RegexOptions.IgnoreCase); // like that
 1105:                 name = Regex.Replace(name, "\u2018Ays\\b", "عيس", RegexOptions.IgnoreCase); // like that
 1106:                 name = Regex.Replace(name, "\u2018Ayb\\b", "عيب", RegexOptions.IgnoreCase); // like that
 1107:  
 1108:                 name = Regex.Replace(name, "\\bFiyāḑ\\b", "فياض", RegexOptions.IgnoreCase);
 1109:                 name = Regex.Replace(name, "\\bHaḑb\\b", "هدب", RegexOptions.IgnoreCase);
 1110:                 name = Regex.Replace(name, "\\bḨarrat\\b", "حرة", RegexOptions.IgnoreCase);
 1111:  
 1112:                 name = Regex.Replace(name, "\\bKhawr\\b", "خور", RegexOptions.IgnoreCase);
 1113:                 name = Regex.Replace(name, "\\bAin\\b", "عين", RegexOptions.IgnoreCase);
 1114:                 name = Regex.Replace(name, "\\b\u2018Ayn\\b", "عين", RegexOptions.IgnoreCase);
 1115:                 name = Regex.Replace(name, "\\bḨijl\\b", "حجل", RegexOptions.IgnoreCase);
 1116:  
 1117:                 name = Regex.Replace(name, "\\bḨattá\\b", "حتا", RegexOptions.IgnoreCase);
 1118:  
 1119:                 name = Regex.Replace(name, "\\bShiqqat\\b", "شقة", RegexOptions.IgnoreCase);
 1120:                 name = Regex.Replace(name, "\\b\u2018Urūq\\b", "عروق", RegexOptions.IgnoreCase);
 1121:  
 1122:                 name = Regex.Replace(name, "\\bQalamat\\b", "قلمة", RegexOptions.IgnoreCase);
 1123:                 name = Regex.Replace(name, "\\bQulumat\\b", "قلمة", RegexOptions.IgnoreCase);
 1124:  
 1125:                 name = Regex.Replace(name, "\\bHadh\\b", "حد", RegexOptions.IgnoreCase);
 1126:                 name = Regex.Replace(name, "\\bUmm\\b", "أم", RegexOptions.IgnoreCase);
 1127:                 name = Regex.Replace(name, "\\bUmmahāt\\b", "أمهات", RegexOptions.IgnoreCase);
 1128:  
 1129:                 name = Regex.Replace(name, "\\bRimāl\\b", "رمال", RegexOptions.IgnoreCase);
 1130:                 name = Regex.Replace(name, "\\bRamlat\\b", "رملة", RegexOptions.IgnoreCase);
 1131:  
 1132:                 name = Regex.Replace(name, "\\bKhayţ\\b", "خيط", RegexOptions.IgnoreCase);
 1133:  
 1134:                 name = Regex.Replace(name, "\\b\u2018Irq\\b", "عرق", RegexOptions.IgnoreCase);
 1135:                 name = Regex.Replace(name, "\\b\u2018Urayq\\b", "عريق", RegexOptions.IgnoreCase);
 1136:  
 1137:                 name = Regex.Replace(name, "\\b\u2018Alam\\b", "علم", RegexOptions.IgnoreCase);
 1138:                 name = Regex.Replace(name, "\\bJazīrat\\b", "جزيرة", RegexOptions.IgnoreCase);
 1139:                 name = Regex.Replace(name, "\\bJezîret\\b", "جزيرة", RegexOptions.IgnoreCase);
 1140:  
 1141:                 name = Regex.Replace(name, "\\bJāl\\b", "جال", RegexOptions.IgnoreCase);
 1142:  
 1143:                 name = Regex.Replace(name, "\\bRās\\b", "رأس", RegexOptions.IgnoreCase);
 1144:                 name = Regex.Replace(name, "\\bRa\u2019s", "رأس", RegexOptions.IgnoreCase); // like this
 1145:  
 1146:                 name = Regex.Replace(name, "\\bQal\u2018at\\b", "قلعة", RegexOptions.IgnoreCase);
 1147:                 name = Regex.Replace(name, "\\bSharm\\b", "شرم", RegexOptions.IgnoreCase);
 1148:                 name = Regex.Replace(name, "\\bQit\u2018at\\b", "قطعة", RegexOptions.IgnoreCase);
 1149:                 name = Regex.Replace(name, "\\bTell\\b", "تل", RegexOptions.IgnoreCase);
 1150:                 name = Regex.Replace(name, "\\bTall\\b", "تل", RegexOptions.IgnoreCase);
 1151:                 name = Regex.Replace(name, "\\bTulūl\\b", "تلول", RegexOptions.IgnoreCase);
 1152:  
 1153:                 name = Regex.Replace(name, "\\bZibārat\\b", "زبارة", RegexOptions.IgnoreCase);
 1154:  
 1155:                 name = Regex.Replace(name, "\\bMiqnāt\\b", "مقناة", RegexOptions.IgnoreCase);
 1156:                 name = Regex.Replace(name, "\\bQārat\\b", "قارة", RegexOptions.IgnoreCase);
 1157:  
 1158:                 name = Regex.Replace(name, "\\bMadīnat\\b", "مدينة", RegexOptions.IgnoreCase);
 1159:                 name = Regex.Replace(name, "\\bMaqbarat\\b", "مقبرة", RegexOptions.IgnoreCase);
 1160:                 name = Regex.Replace(name, "\\bRuḩayyat\\b", "رحية", RegexOptions.IgnoreCase);
 1161:                 name = Regex.Replace(name, "\\bDawḩat\\b", "دوحة", RegexOptions.IgnoreCase);
 1162:                 name = Regex.Replace(name, "\\bQaşr\\b", "قصر", RegexOptions.IgnoreCase);
 1163:                 name = Regex.Replace(name, "\\bNuhaydayn\\b", "نهيدين", RegexOptions.IgnoreCase);
 1164:                 name = Regex.Replace(name, "\\bBaţn\\b", "بطن", RegexOptions.IgnoreCase);
 1165:                 name = Regex.Replace(name, "\\bKhullat\\b", "خلات", RegexOptions.IgnoreCase);
 1166:                 name = Regex.Replace(name, "\\bGhars\\b", "غرس", RegexOptions.IgnoreCase);
 1167:  
 1168:                 name = Regex.Replace(name, "\\bMuqayrāt\\b", "مقيرات", RegexOptions.IgnoreCase);
 1169:                 name = Regex.Replace(name, "\\bGhadīr\\b", "غدير", RegexOptions.IgnoreCase);
 1170:                 name = Regex.Replace(name, "\\b\u2018Uwaynat\\b", "عوينات", RegexOptions.IgnoreCase);
 1171:                 name = Regex.Replace(name, "\\b\u2018Uqlat\\b", "عقلة", RegexOptions.IgnoreCase);
 1172:                 name = Regex.Replace(name, "\\bḨālat\\b", "حالة", RegexOptions.IgnoreCase);
 1173:                 name = Regex.Replace(name, "\\bKhalīj\\b", "خليج", RegexOptions.IgnoreCase);
 1174:  
 1175:                 name = Regex.Replace(name, "\\bDaḩl\\b", "دحل", RegexOptions.IgnoreCase);
 1176:                 name = Regex.Replace(name, "\\bDaḩal\\b", "دحل", RegexOptions.IgnoreCase);
 1177:                 name = Regex.Replace(name, "\\bDaḩlat\\b", "دحلة", RegexOptions.IgnoreCase);
 1178:  
 1179:                 name = Regex.Replace(name, "\\bDil\u2018ān\\b", "دلعان", RegexOptions.IgnoreCase);
 1180:                 name = Regex.Replace(name, "\\bBanī\\b", "بني", RegexOptions.IgnoreCase);
 1181:                 name = Regex.Replace(name, "\\bQurūn\\b", "قرون", RegexOptions.IgnoreCase);
 1182:                 name = Regex.Replace(name, "\\bMīnā\u2019\\b", "ميناء", RegexOptions.IgnoreCase);
 1183:                 name = Regex.Replace(name, "\\bKhuyūţ\\b", "خيوط", RegexOptions.IgnoreCase);
 1184:                 name = Regex.Replace(name, "\\bḐil\u2018ān\\b", "دلعان", RegexOptions.IgnoreCase);
 1185:  
 1186:                 name = Regex.Replace(name, "\\bRijm\\b", "رجم", RegexOptions.IgnoreCase);
 1187:                 name = Regex.Replace(name, "\\bRujm\\b", "رجم", RegexOptions.IgnoreCase);
 1188:                 name = Regex.Replace(name, "\\bRujūm\\b", "رجوم", RegexOptions.IgnoreCase);
 1189:  
 1190:                 name = Regex.Replace(name, "\\bHaḑabat\\b", "حدبة", RegexOptions.IgnoreCase);
 1191:                 name = Regex.Replace(name, "\\bAbraq\\b", "أبرق", RegexOptions.IgnoreCase);
 1192:                 name = Regex.Replace(name, "\\bSayḩ\\b", "سيح", RegexOptions.IgnoreCase);
 1193:                 name = Regex.Replace(name, "\\Sāqiyat\\b", "ساقية", RegexOptions.IgnoreCase);
 1194:                 name = Regex.Replace(name, "\\bQurayy\\b", "قري", RegexOptions.IgnoreCase);
 1195:                 name = Regex.Replace(name, "\\bSuḩūl\\b", "سهول", RegexOptions.IgnoreCase);
 1196:                 name = Regex.Replace(name, "\\bQuwīd\\b", "قويد", RegexOptions.IgnoreCase);
 1197:  
 1198:                 name = Regex.Replace(name, "\\bMishāsh\\b", "مشاش", RegexOptions.IgnoreCase);
 1199:                 name = Regex.Replace(name, "\\bMushāsh\\b", "مشاش", RegexOptions.IgnoreCase);
 1200:  
 1201:                 name = Regex.Replace(name, "\\bKhubb\\b", "خب", RegexOptions.IgnoreCase);
 1202:                 name = Regex.Replace(name, "\\bKhabb\\b", "خب", RegexOptions.IgnoreCase);
 1203:  
 1204:                 name = Regex.Replace(name, "\\bŞayhad\\b", "صيهد", RegexOptions.IgnoreCase);
 1205:  
 1206:                 name = Regex.Replace(name, "\\bSumr\\b", "سمر", RegexOptions.IgnoreCase);
 1207:                 name = Regex.Replace(name, "\\bSamrat\\b", "سمرة", RegexOptions.IgnoreCase);
 1208:                 name = Regex.Replace(name, "\\bSamrā\u2019\\b", "سمراء", RegexOptions.IgnoreCase);
 1209:  
 1210:                 name = Regex.Replace(name, "\\bQuşmat\\b", "قصمة", RegexOptions.IgnoreCase);
 1211:                 name = Regex.Replace(name, "\\bKhafs\\b", "خفس", RegexOptions.IgnoreCase);
 1212:                 name = Regex.Replace(name, "\\bJawb\\b", "جوب", RegexOptions.IgnoreCase);
 1213:                 name = Regex.Replace(name, "\\b\u2018Alāt\\b", "علة", RegexOptions.IgnoreCase);
 1214:                 name = Regex.Replace(name, "\\bQawz\\b", "قوز", RegexOptions.IgnoreCase);
 1215:                 name = Regex.Replace(name, "\\bRijlat\\b", "رجلة", RegexOptions.IgnoreCase);
 1216:                 name = Regex.Replace(name, "\\bŞafrā\u2019\\b", "صفراء", RegexOptions.IgnoreCase);
 1217:                 name = Regex.Replace(name, "\\bQarn\\b", "قرن", RegexOptions.IgnoreCase);
 1218:                 name = Regex.Replace(name, "\\bDibdibat\\b", "دبدبة", RegexOptions.IgnoreCase);
 1219:                 name = Regex.Replace(name, "\\bThulmat\\b", "ثلمة", RegexOptions.IgnoreCase);
 1220:                 name = Regex.Replace(name, "\\bBarqā\u2019\\b", "برقاء", RegexOptions.IgnoreCase);
 1221:                 name = Regex.Replace(name, "\\bGhār\\b", "غار", RegexOptions.IgnoreCase);
 1222:                 name = Regex.Replace(name, "\\bJisrat\\b", "جسرة", RegexOptions.IgnoreCase);
 1223:                 name = Regex.Replace(name, "\\bMadārat\\b", "مدارة", RegexOptions.IgnoreCase);
 1224:                 name = Regex.Replace(name, "\\bSāddat\\b", "صعدة", RegexOptions.IgnoreCase);
 1225:                 name = Regex.Replace(name, "\\bŢu\u2018ūs\\b", "طعوس", RegexOptions.IgnoreCase);
 1226:                 name = Regex.Replace(name, "\\bNāziyat\\b", "نازية", RegexOptions.IgnoreCase);
 1227:                 name = Regex.Replace(name, "\\bQi\u2018r\\b", "قعر", RegexOptions.IgnoreCase);
 1228:                 name = Regex.Replace(name, "\\bḨabl\\b", "حبل", RegexOptions.IgnoreCase);
 1229:                 name = Regex.Replace(name, "\\bBinīyat\\b", "بنية", RegexOptions.IgnoreCase);
 1230:                 name = Regex.Replace(name, "\\bSahlat\\b", "سحلة", RegexOptions.IgnoreCase);
 1231:                 name = Regex.Replace(name, "\\bŢa\u2018aysāt\\b", "طعيسة", RegexOptions.IgnoreCase);
 1232:                 name = Regex.Replace(name, "\\bQaţ\u2018at\\b", "قطعة", RegexOptions.IgnoreCase);
 1233:                 name = Regex.Replace(name, "\\bJadhmā\u2019\\b", "جدماء", RegexOptions.IgnoreCase);
 1234:  
 1235:                 name = Regex.Replace(name, "\\bHashm\\b", "هشم", RegexOptions.IgnoreCase);
 1236:                 name = Regex.Replace(name, "\\bJar\u2018ā\\b", "جرعاء", RegexOptions.IgnoreCase);
 1237:                 name = Regex.Replace(name, "\\bQūr\\b", "قور", RegexOptions.IgnoreCase);
 1238:                 name = Regex.Replace(name, "\\bḨammat\\b", "حمة", RegexOptions.IgnoreCase);
 1239:                 name = Regex.Replace(name, "\\bShāţi\u2019\\b", "شاطيء", RegexOptions.IgnoreCase);
 1240:                 name = Regex.Replace(name, "\\bŢi\u2018s\\b", "طعس", RegexOptions.IgnoreCase);
 1241:                 name = Regex.Replace(name, "\\bḨamrūr\\b", "حمرور", RegexOptions.IgnoreCase);
 1242:                 name = Regex.Replace(name, "\\bLābat\\b", "لابة", RegexOptions.IgnoreCase);
 1243:                 name = Regex.Replace(name, "\\bDuwaykhilat\\b", "دويخيلات", RegexOptions.IgnoreCase);
 1244:                 name = Regex.Replace(name, "\\bMarsá\\b", "مرسى", RegexOptions.IgnoreCase);
 1245:                 name = Regex.Replace(name, "\\bDi\u2018b\\b", "دعب", RegexOptions.IgnoreCase);
 1246:                 name = Regex.Replace(name, "\\b\u2018Idd\\b", "عد", RegexOptions.IgnoreCase);
 1247:                 name = Regex.Replace(name, "\\b\u2018Ulaym\\b", "عليم", RegexOptions.IgnoreCase);
 1248:                 name = Regex.Replace(name, "\\bFasht\\b", "فشت", RegexOptions.IgnoreCase);
 1249:                 name = Regex.Replace(name, "\\bUfīḩim\\b", "عفحم", RegexOptions.IgnoreCase);
 1250:                 name = Regex.Replace(name, "\\bMazāri\u2018\\b", "مزارع", RegexOptions.IgnoreCase);
 1251:                 name = Regex.Replace(name, "\\bŢawīl\\b", "طويل", RegexOptions.IgnoreCase);
 1252:                 name = Regex.Replace(name, "\\bWark\\b", "ورك", RegexOptions.IgnoreCase);
 1253:                 name = Regex.Replace(name, "\\bMaḩfūr\\b", "محفور", RegexOptions.IgnoreCase);
 1254:  
 1255:                 name = Regex.Replace(name, "\\b\u2018Abl\\b", "عبل", RegexOptions.IgnoreCase);
 1256:                 name = Regex.Replace(name, "\\bBinī\\b", "بني", RegexOptions.IgnoreCase);
 1257:                 name = Regex.Replace(name, "\\bḨuşaydat\\b", "حصيدة", RegexOptions.IgnoreCase);
 1258:                 name = Regex.Replace(name, "\\bNuqrat\\b", "نقرة", RegexOptions.IgnoreCase);
 1259:                 name = Regex.Replace(name, "\\bRumḩayn\\b", "رمحين", RegexOptions.IgnoreCase);
 1260:                 name = Regex.Replace(name, "\\bFarshat\\b", "فرشت", RegexOptions.IgnoreCase);
 1261:                 name = Regex.Replace(name, "\\bNaqā\\b", "نقا", RegexOptions.IgnoreCase);
 1262:                 name = Regex.Replace(name, "\\bNuşaylāt\\b", "نصيلات", RegexOptions.IgnoreCase);
 1263:                 name = Regex.Replace(name, "\\bSudd\\b", "سد", RegexOptions.IgnoreCase);
 1264:                 name = Regex.Replace(name, "\\b\u2018Aqabat\\b", "عقبة", RegexOptions.IgnoreCase);
 1265:                 name = Regex.Replace(name, "\\bS\u2019irat\\b", "سعرة", RegexOptions.IgnoreCase);
 1266:                 name = Regex.Replace(name, "\\bTawal\\b", "طوال", RegexOptions.IgnoreCase);
 1267:                 name = Regex.Replace(name, "\\bBaḩr\\b", "بحر", RegexOptions.IgnoreCase);
 1268:                 name = Regex.Replace(name, "\\bNajfat\\b", "نجفة", RegexOptions.IgnoreCase);
 1269:                 name = Regex.Replace(name, "\\bRaḑmat\\b", "ردمة", RegexOptions.IgnoreCase);
 1270:                 name = Regex.Replace(name, "\\bŢiwāl\\b", "طوال", RegexOptions.IgnoreCase);
 1271:                 name = Regex.Replace(name, "\\bSahl\\b", "سهل", RegexOptions.IgnoreCase);
 1272:                 name = Regex.Replace(name, "\\bQīzān\\b", "قيزان", RegexOptions.IgnoreCase);
 1273:                 name = Regex.Replace(name, "\\bHadhlūl\\b", "هدلول", RegexOptions.IgnoreCase);
 1274:  
 1275:                 name = Regex.Replace(name, "\\bIbn\\b", "إبن", RegexOptions.IgnoreCase);
 1276:                 name = Regex.Replace(name, "\\bMīnā’\\b", "ميناء", RegexOptions.IgnoreCase);
 1277:                 name = Regex.Replace(name, "\\bMūsá\\b", "موسى", RegexOptions.IgnoreCase);
 1278:  
 1279:                 name = name.Replace("‘Iyāl", "عيال");
 1280:             }
 1281:             else
 1282:             {
 1283:  
 1284:             }
 1285:  
 1286:             return name;
 1287:         }
 1288:  
 1289:         ////////////////////////////////////////////////////////////////////////////
 1290:  
 1291:         /// <summary>
 1292:         /// From a country code this will return the main language used in this country.
 1293:         /// </summary>
 1294:         /// <param name="countryCode">Country code (iso2)</param>
 1295:         /// <returns>Language Code (ISO 639-2)</returns>
 1296:         public static string ReturnLanguageCodeUsedInCountryFromCountyCode(string countryCode)
 1297:         {
 1298:             // Incomplete
 1299:  
 1300:             string languageCode;
 1301:  
 1302:             countryCode = countryCode.ToLower();
 1303:  
 1304:             switch (countryCode)
 1305:             {
 1306:                 // Korea
 1307:                 case "kr":
 1308:                 case "kp": languageCode = "ko"; break;
 1309:  
 1310:                 // Japan
 1311:                 case "jp": languageCode = "ja"; break;
 1312:  
 1313:                 // Arab countries
 1314:                 case "dz":
 1315:                 case "bh":
 1316:                 case "eg":
 1317:                 case "iq":
 1318:                 case "jo":
 1319:                 case "kw":
 1320:                 case "lb":
 1321:                 case "ly":
 1322:                 case "mr":
 1323:                 case "ma":
 1324:                 case "om":
 1325:                 case "ps":
 1326:                 case "qa":
 1327:                 case "sa":
 1328:                 case "so":
 1329:                 case "sd":
 1330:                 case "sy":
 1331:                 case "tn":
 1332:                 case "ae":
 1333:                 case "ye": languageCode = "ar"; break;
 1334:  
 1335:                 // Iran, Afghanistan, Tajikistan
 1336:                 case "ir":
 1337:                 case "af":
 1338:                 case "tj": languageCode = "fa"; break; // Persian
 1339:  
 1340:                 // English
 1341:                 default: languageCode = "en"; break; // English
 1342:             }
 1343:  
 1344:             return languageCode;
 1345:         }
 1346:  
 1347:         /*
 1348:         ////////////////////////////////////////////////////////////////////////////
 1349: 
 1350:         /// <summary>
 1351:         /// Return the first language used by this user
 1352:         /// <param name="userLanguages">This should be HttpContext.Current.Request from webpages</param>
 1353:         /// </summary>
 1354:         public static string ReturnUserLanguage(HttpRequest userLanguages)
 1355:         {
 1356:             string language;
 1357:             string[] languages = HttpContext.Current.Request.UserLanguages;
 1358: 
 1359:             if (languages == null || languages.Length == 0)
 1360:             {
 1361:                 language = null;
 1362:             }
 1363:             else
 1364:             {
 1365:                 try
 1366:                 {
 1367:                     language = languages[0].ToLowerInvariant().Trim();
 1368:                     language = language.Substring(0, 2);
 1369:                 }
 1370:                 catch (ArgumentException)
 1371:                 {
 1372:                     language = null;
 1373:                 }
 1374:             }
 1375: 
 1376:             return language;
 1377:         }
 1378:         */
 1379:  
 1380:         ////////////////////////////////////////////////////////////////////////////
 1381:  
 1382:         /// <summary>
 1383:         /// Remove the small Koranic jawaz and sili characters because they cause the displayed text to render unstable
 1384:         /// </summary>
 1385:         /// <param name="line"></param>
 1386:         /// <returns>cleaned line</returns>
 1387:         public static string RemoveSmallKoranicJawazAndSiliCharacters(string line)
 1388:         {
 1389:             // 
 1390:             // remove ۚ and ۖ
 1391:  
 1392:             // remove the small character as it makes text unstable
 1393:             line = line.Replace(" ۚ", ""); // small ج
 1394:             line = line.Replace(" ۖ", ""); // small  صلى
 1395:  
 1396:             return line;
 1397:         }
 1398:  
 1399:         ////////////////////////////////////////////////////////////////////////////
 1400:  
 1401:         /// <summary>
 1402:         ///
 1403:         /// </summary>
 1404:         /// <param name="line"></param>
 1405:         /// <returns>cleaned line</returns>
 1406:         public static string SlightlyChangeSomeSentencesToPreventSystemFromGeneratingSingleCharacters(string line)
 1407:         {
 1408:             // slightly change sentences to prevent system single characters
 1409:             //s = s.Replace("الله", "اللـه");
 1410:             //s = s.Replace("ريال", "ريـال");
 1411:             //s = s.Replace("محمد", "مـحمد");
 1412:             //s = s.Replace("جل جلاله", "جل جلالـه");
 1413:             line = line.Replace("صلى الله عليه وسلم", "صـلى الله عليه وسلم");
 1414:  
 1415:             return line;
 1416:         }
 1417:  
 1418:         ////////////////////////////////////////////////////////////////////////////
 1419:  
 1420:         /// <summary>
 1421:         ///
 1422:         /// </summary>
 1423:         public static string Pluralize(string word)
 1424:         {
 1425:             string s;
 1426:  
 1427:             var inflector = new Inflector.Inflector(new CultureInfo("en"));
 1428:             s = inflector.Pluralize(word);
 1429:  
 1430:             return s;
 1431:         }
 1432:  
 1433:         ////////////////////////////////////////////////////////////////////////////
 1434:  
 1435:         /// <summary>
 1436:         ///
 1437:         /// </summary>
 1438:         public static string FirstCharacterToUpper(string input)
 1439:         {
 1440:             string s;
 1441:  
 1442:             if (!string.IsNullOrEmpty(input)) s = input.First().ToString().ToUpper() + String.Join("", input.Skip(1));
 1443:             else s = input;
 1444:  
 1445:             return s;
 1446:         }
 1447:  
 1448:         ////////////////////////////////////////////////////////////////////////////
 1449:  
 1450:         /// <summary>
 1451:         /// Checks if a word contains a German diacritic letter.
 1452:         /// <see href="http://en.wikipedia.org/wiki/German_alphabet"/>
 1453:         /// </summary>
 1454:         /// <remarks> 
 1455:         /// German uses letter-diacritic combinations (Ä/ä, Ö/ö, Ü/ü) using the umlaut and one ligature (ß (called eszett (sz) or scharfes S, sharp s)), but they do not constitute distinct letters in the alphabet.
 1456:         /// </remarks>
 1457:  
 1458:         public static bool ContainsGermanDiacriticLetter(string line)
 1459:         {
 1460:             bool lineContainsGermanDiacriticLetter;
 1461:  
 1462:             if (line != null)
 1463:             {
 1464:                 lineContainsGermanDiacriticLetter = Regex.IsMatch(line, "[ÄäÖöÜüß]");
 1465:             }
 1466:             else lineContainsGermanDiacriticLetter = false;
 1467:  
 1468:             return lineContainsGermanDiacriticLetter;
 1469:         }
 1470:  
 1471:         ////////////////////////////////////////////////////////////////////////////
 1472:  
 1473:         /// <summary>
 1474:         ///
 1475:         /// </summary>
 1476:         public static List<Language> List
 1477:         {
 1478:             get
 1479:             {
 1480:                 if (languageList == null || languageList.Count == 0)
 1481:                 {
 1482:                     //  <language iso_639_1="aa" iso_639_2_5="aar" iso_639_3="aar" name="" englishName="Afar" arabicName="" scope="" type=""/> 
 1483:                     languageList = (from q in XDocument.Elements("languageList").Elements("iso").Elements("language")
 1484:                                     where q.Attribute("iso_639_1").Value != "" && q.Attribute("iso_639_2_5").Value != "" && q.Attribute("iso_639_3").Value != ""
 1485:                                     select new Language
 1486:                                     {
 1487:                                         Symbol = q.Attribute("iso_639_1").Value,
 1488:                                         Iso6391 = q.Attribute("iso_639_1").Value,
 1489:                                         Iso63925 = q.Attribute("iso_639_2_5").Value,
 1490:                                         Iso6393 = q.Attribute("iso_639_3").Value,
 1491:                                         Name = q.Attribute("name").Value,
 1492:                                         EnglishName = q.Attribute("englishName").Value,
 1493:                                         ArabicName = q.Attribute("arabicName").Value
 1494:                                     }
 1495:                     ).ToList<Language>();
 1496:                 }
 1497:  
 1498:                 return languageList;
 1499:             }
 1500:         }
 1501:  
 1502:         ////////////////////////////////////////////////////////////////////////////
 1503:  
 1504:         /// <summary>
 1505:         /// 
 1506:         /// How to embed and access resources by using Visual C# http://support.microsoft.com/kb/319292/en-us
 1507:         /// 
 1508:         /// 1. Change the "Build Action" property of your XML file from "Content" to "Embedded Resource".
 1509:         /// 2. Add "using System.Reflection".
 1510:         /// 3. Manifest resource stream will start with the project namespace, the location of XML file.
 1511:         /// 
 1512:         /// </summary>
 1513:  
 1514:         private static XDocument XDocument
 1515:         {
 1516:             get
 1517:             {
 1518:                 Assembly _assembly;
 1519:                 StreamReader streamReader;
 1520:  
 1521:                 xDocument = null;
 1522:                 _assembly = Assembly.GetExecutingAssembly();
 1523:                 streamReader = new StreamReader(_assembly.GetManifestResourceStream("Ia.Cl.model.language.xml"));
 1524:  
 1525:                 try
 1526:                 {
 1527:                     if (streamReader.Peek() != -1)
 1528:                     {
 1529:                         xDocument = System.Xml.Linq.XDocument.Load(streamReader);
 1530:                     }
 1531:                 }
 1532:                 catch (Exception)
 1533:                 {
 1534:                 }
 1535:                 finally
 1536:                 {
 1537:                 }
 1538:  
 1539:                 return xDocument;
 1540:             }
 1541:         }
 1542:  
 1543:         ////////////////////////////////////////////////////////////////////////////
 1544:         ////////////////////////////////////////////////////////////////////////////
 1545:     }
 1546:  
 1547:     ////////////////////////////////////////////////////////////////////////////
 1548:     ////////////////////////////////////////////////////////////////////////////
 1549: }