1: using System;
2: using System.Collections;
3: using System.Collections.Generic;
4: using Inflector;
5: using System.Globalization;
6: using System.IO;
7: using System.Linq;
8: using System.Reflection;
9: using System.Text;
10: using System.Text.RegularExpressions;
11: using System.Web;
12: using System.Xml.Linq;
13:
14: namespace Ia.Cl.Model
15: {
16: ////////////////////////////////////////////////////////////////////////////
17:
18: /// <summary publish="true">
19: /// Language related support class including langauge list and codes.
20: /// </summary>
21: /// <value>
22: /// The Arabic part is built upon "The Unicode Standard, Version 5.2" with plain, accented, and koranic chars.
23: ///
24: /// For language codes see: ISO 639-2 Language Code List - Codes for the representation of names of languages (Library of Congress)
25: /// See: href="http://www-01.sil.org/iso639-3/codes.asp?order=639_1"
26: /// See: href="http://en.wikipedia.org/wiki/List_of_ISO_639-2_codes"
27: /// </value>
28: /// <remarks>
29: /// Copyright © 2001-2015 Jasem Y. Al-Shamlan (info@ia.com.kw), Integrated Applications - Kuwait. All Rights Reserved.
30: ///
31: /// This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
32: /// the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
33: ///
34: /// This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
35: /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
36: ///
37: /// You should have received a copy of the GNU General Public License along with this library. If not, see http://www.gnu.org/licenses.
38: ///
39: /// Copyright notice: This notice may not be removed or altered from any source distribution.
40: /// </remarks>
41: public class Language
42: {
43: private static XDocument xDocument;
44: private static List<Language> languageList;
45:
46: private const string latinPlainUpper = "\u0041-\u005a"; // ABCDEFGHIJKLMNOPQRSTUVWXYZ
47: private const string latinPlainLower = "\u0061-\u007a"; // abcdefghijklmnopqrstuvwxyz
48: private const string latinAccent = "\u00c0-\u00fc"; // ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûü
49:
50: // http://en.wikipedia.org/wiki/Cyrillic_script_in_Unicode
51: private const string cyrillicPlain = "\u0400–\u04ff";
52: private const string cyrillicSupplement = "\u0500–\u052f";
53: private const string cyrillicExtendedA = "\u2de0–\u2dff";
54: private const string cyrillicExtendedB = "\ua640–\ua69f";
55: private const string choneticExtensions = "\u1d2b|\u1d78";
56:
57: private const string arabicPlain = "\u0621-\u063a|\u0641-\u064a"; // ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي...
58: private const string arabicAccent = "\u064b-\u0652"; // ًٌٍَُِّْٕٖٜٓٔٗ٘ٙٚٛٝٞ
59: private const string arabicDigit = "\u0660-\u0669";
60: private const string arabicKoran = "\u0617-\u061a|\u06d6-\u06ed"; // ۖۗۘۙۚۛۜ۞ۣ۟۠ۡۢۤۥۦۧۨ۩۪ۭ۫۬
61: private const string arabicPoint = "\u0670";
62: private const string arabicKoranExtended = "\u0671";
63: private const string arabicExtended = "\u0671-\u06d3";
64: //arabicJoined = "\ufe81-\ufefc";
65:
66: private const string hiragana = "\u3041-\u309f";
67: private const string katakana = "\u30a0-\u30ff";
68: private const string katakanaPhonecticExtensions = "\u31f0-\u31ff";
69: private const string katakanaHalfwidth = "\uff65-\uff9f";
70:
71: private const string cjkUnifiedIdeographs = "\u4e00-\u9fbb";
72: private const string cjkUnifiedIdeographsExtentionA = "\u3400-\u4dbf";
73: private const string cjkUnifiedIdeographsExtentionB = "\u20000-\u200ff";
74: private const string cjkCompatibilityIdeographs = "\f900-\uf9ff";
75: private const string cjkCompatibilityIdeographsSupplement = "\u2f800-\u2f8bf";
76:
77: private const string hangulSyllables = "\uac00-\ud7af";
78: private const string hangulJamo = "\u1100-\u11ff";
79: private const string hangulCompatibilityJamo = "\u3130-\u318f";
80: private const string hangulHalfwidth = "\uffa0-\uffdc";
81:
82: private const string latin = latinPlainLower + "|" + latinPlainUpper + "|" + latinAccent;
83:
84: private const string cyrillic = @"\w+"; //cyrillic_plain + "|" + cyrillicSupplement + "|" + cyrillicExtendedA + "|" + cyrillicExtendedB + "|" + choneticExtensions;
85:
86: private const string arabic = arabicPlain + "|" + arabicAccent + "|" + arabicDigit + "|" + arabicKoran + "|" + arabicPoint + "|" + arabicKoranExtended;
87: private const string arabicNonWord = arabicAccent + "|" + arabicDigit + "|" + arabicKoran + "|" + arabicPoint + "|" + arabicKoranExtended;
88: private const string kana = hiragana + "|" + katakana + "|" + katakanaPhonecticExtensions + "|" + katakanaHalfwidth;
89: private const string hangul = hangulSyllables + "|" + hangulJamo + "|" + hangulCompatibilityJamo + "|" + hangulHalfwidth;
90:
91: //word = latin + "|" + arabic + "|" + kana + "|" + hangul;
92: //ideograph = cjk_unified_ideographs + "|" + cjk_unified_ideographs_extention_a + "|" + cjk_unified_ideographs_extention_b + "|" + cjk_compatibility_ideographs + "|" + cjk_compatibility_ideographs_supplement;
93:
94: private static List<string> allArabicWordsList;
95:
96: private static readonly object objectLock = new object();
97:
98: /// <summary/>
99: public string Id { get; set; }
100:
101: /// <summary/>
102: public string Symbol { get; set; }
103:
104: /// <summary/>
105: public string Name { get; set; }
106:
107: /// <summary/>
108: public string EnglishName { get; set; }
109:
110: /// <summary/>
111: public string ArabicName { get; set; }
112:
113: /// <summary/>
114: public string Iso6391 { get; set; }
115:
116: /// <summary/>
117: public string Iso63925 { get; set; }
118:
119: /// <summary/>
120: public string Iso6393 { get; set; }
121:
122: ////////////////////////////////////////////////////////////////////////////
123:
124: /// <summary>
125: ///
126: /// </summary>
127: public Language()
128: {
129: }
130:
131: ////////////////////////////////////////////////////////////////////////////
132:
133: /// <summary>
134: ///
135: /// </summary>
136: public Language(string iso_639_1)
137: {
138: Language language;
139:
140: language = LanguageByIso6391(iso_639_1);
141:
142: this.Id = language.Id;
143: this.Name = language.Name;
144: this.Symbol = language.Symbol;
145: this.EnglishName = language.EnglishName;
146: this.ArabicName = language.ArabicName;
147: }
148:
149: ////////////////////////////////////////////////////////////////////////////
150:
151: /// <summary>
152: ///
153: /// </summary>
154: public static bool HasArabicLetter(string line)
155: {
156: bool has;
157:
158: if (line != null)
159: {
160: has = Regex.IsMatch(line, "[" + arabic + "]");
161: }
162: else has = false;
163:
164: return has;
165: }
166:
167: ////////////////////////////////////////////////////////////////////////////
168:
169: /// <summary>
170: ///
171: /// </summary>
172: public static List<string> AllArabicWordsList
173: {
174: get
175: {
176: if (allArabicWordsList == null || allArabicWordsList.Count == 0)
177: {
178: lock (objectLock)
179: {
180: allArabicWordsList = Ia.Cl.Model.Language._AllArabicWordsList;
181: }
182: }
183:
184: return allArabicWordsList;
185: }
186: }
187:
188: ////////////////////////////////////////////////////////////////////////////
189:
190: /// <summary>
191: ///
192: /// </summary>
193: private static List<string> _AllArabicWordsList
194: {
195: get
196: {
197: string u;
198:
199: var assembly = Assembly.GetExecutingAssembly();
200:
201: try
202: {
203: using (var streamReader = new StreamReader(assembly.GetManifestResourceStream("Ia.Cl.model.data.language.List of all Arabic words.txt")))
204: {
205: allArabicWordsList = new List<string>(100000);
206:
207: if (streamReader.Peek() != -1)
208: {
209: while (!streamReader.EndOfStream)
210: {
211: u = streamReader.ReadLine();
212:
213: if (u != null && u.Length > 0) allArabicWordsList.Add(u.Trim());
214: }
215: }
216: }
217: }
218: catch (Exception)
219: {
220: allArabicWordsList = new List<string>();
221: }
222: finally
223: {
224: }
225:
226: return allArabicWordsList;
227: }
228: }
229:
230: ////////////////////////////////////////////////////////////////////////////
231:
232: /// <summary>
233: ///
234: /// </summary>
235: public static Language LanguageByIso6391(string iso_639_1)
236: {
237: Language language;
238:
239: language = (from q in XDocument.Elements("languageList").Elements("iso").Elements("language")
240: where q.Attribute("iso_639_1").Value == iso_639_1
241: select new Language
242: {
243: Id = q.Attribute("iso_639_1").Value,
244: Symbol = q.Attribute("iso_639_1").Value,
245: Name = q.Attribute("name").Value,
246: EnglishName = q.Attribute("englishName").Value,
247: ArabicName = q.Attribute("arabicName").Value
248: }
249: ).First<Language>();
250:
251: return language;
252: }
253:
254: ////////////////////////////////////////////////////////////////////////////
255:
256: /// <summary>
257: ///
258: /// </summary>
259: public static string Ideograph(string language)
260: {
261: string s;
262:
263: s = "";
264:
265: if (language == "en") s = "";
266: else if (language == "es") s = "";
267: else if (language == "fr") s = "";
268: else if (language == "de") s = "";
269: else if (language == "nl") s = "";
270: else if (language == "ja") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
271: else if (language == "ko") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
272:
273: else if (language == "zh_traditional") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
274: else if (language == "zh_simplified") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
275: else if (language == "zh") s = cjkUnifiedIdeographs + "|" + cjkUnifiedIdeographsExtentionA + "|" + cjkUnifiedIdeographsExtentionB + "|" + cjkCompatibilityIdeographs + "|" + cjkCompatibilityIdeographsSupplement;
276:
277: else if (language == "ar") s = "";
278:
279: return s;
280: }
281:
282: ////////////////////////////////////////////////////////////////////////////
283:
284: /// <summary>
285: ///
286: /// </summary>
287: public static string WordCharacters(string language)
288: {
289: string s;
290:
291: s = "";
292:
293: if (language == "en") s = latin;
294: else if (language == "es") s = latin;
295: else if (language == "fr") s = latin;
296: else if (language == "de") s = latin;
297: else if (language == "nl") s = latin;
298: else if (language == "ru") s = cyrillic;
299: else if (language == "ja") s = kana;
300: else if (language == "ko") s = hangul;
301: else if (language == "zh_traditional") s = null;
302: else if (language == "zh_simplified") s = null;
303: else if (language == "ar") s = arabic;
304:
305: return s;
306: }
307:
308: ////////////////////////////////////////////////////////////////////////////
309:
310: /// <summary>
311: ///
312: /// </summary>
313: public static string WordsRegularExpression(string language)
314: {
315: string s;
316:
317: if (language == "ja") s = "[" + hiragana + "]+|[" + katakana + "]+|[" + katakanaPhonecticExtensions + "]+|[" + katakanaHalfwidth + "]+";
318: else
319: {
320: s = "[" + WordCharacters(language) + "]+";
321: }
322:
323: return s;
324: }
325:
326: ////////////////////////////////////////////////////////////////////////////
327:
328: /// <summary>
329: ///
330: /// </summary>
331: public static string BasicWord(string language)
332: {
333: string s;
334:
335: s = "";
336:
337: if (language == "en") s = latinPlainLower;
338: else if (language == "es") s = latinPlainLower;
339: else if (language == "fr") s = latinPlainLower;
340: else if (language == "de") s = latinPlainLower;
341: else if (language == "nl") s = latinPlainLower;
342: else if (language == "ru") s = cyrillic;
343: else if (language == "ja") s = kana;
344: else if (language == "ko") s = hangul;
345: else if (language == "zh_traditional") s = null;
346: else if (language == "zh_simplified") s = null;
347: else if (language == "ar") s = arabicPlain;
348:
349: return s;
350: }
351:
352: ////////////////////////////////////////////////////////////////////////////
353:
354: /// <summary>
355: ///
356: /// </summary>
357: public static string BasicWordsRegularExpression(string language)
358: {
359: string s;
360:
361: if (language == "ja") s = "[" + hiragana + "]+|[" + katakana + "]+|[" + katakanaPhonecticExtensions + "]+|[" + katakanaHalfwidth + "]+";
362: else
363: {
364: s = "[" + BasicWord(language) + "]+";
365: }
366:
367: return s;
368: }
369:
370: ////////////////////////////////////////////////////////////////////////////
371:
372: /// <summary>
373: ///
374: /// </summary>
375: public static string NonWord(string language)
376: {
377: string s;
378:
379: s = "";
380:
381: if (language == "en") s = "";
382: else if (language == "es") s = "";
383: else if (language == "fr") s = "";
384: else if (language == "de") s = "";
385: else if (language == "nl") s = "";
386: else if (language == "ja") s = "";
387: else if (language == "ko") s = "";
388: else if (language == "zh_traditional") s = "";
389: else if (language == "zh_simplified") s = "";
390: else if (language == "ar") s = arabicNonWord;
391:
392: return s;
393: }
394:
395: ////////////////////////////////////////////////////////////////////////////
396:
397: /// <summary>
398: ///
399: /// </summary>
400: public static string BasicForm(string word)
401: {
402: // for Western languages, this function takes in a word and returns a copy of the word with all capital letters changed to small, and all
403: // accent letters to standard ASCII ones. For Japanese and Korean, on the other hand, this function is not yet defined. It will just return the
404: // same argument unchanged, for now.
405:
406: word = word.Replace("ß", "ss");
407: word = word.ToLowerInvariant();
408:
409: word = word.Replace("ٱ", "ا");
410: word = Regex.Replace(word, "[" + Ia.Cl.Model.Language.NonWord("ar") + "]", "");
411:
412: word = RemoveDiacritics(word);
413:
414: return word;
415: }
416:
417: ////////////////////////////////////////////////////////////////////////////
418:
419: /// <summary>
420: /// Remove punctuation marks
421: /// http://stackoverflow.com/questions/18830813/how-can-i-remove-punctuation-from-input-text-in-java
422: /// </summary>
423: public static string RemovePunctuationMarks(string text)
424: {
425: text = Regex.Replace(text, "\\p{P}", "");
426:
427: return text;
428: }
429:
430: ////////////////////////////////////////////////////////////////////////////
431:
432: /// <summary>
433: ///
434: /// </summary>
435: public static string RemoveDiacritics(string text)
436: {
437: // http://stackoverflow.com/questions/249087/how-do-i-remove-diacritics-accents-from-a-string-in-net
438:
439: var normalizedString = text.Normalize(NormalizationForm.FormD);
440: var stringBuilder = new StringBuilder();
441:
442: foreach (var c in normalizedString)
443: {
444: var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
445: if (unicodeCategory != UnicodeCategory.NonSpacingMark)
446: {
447: stringBuilder.Append(c);
448: }
449: }
450:
451: return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
452: }
453:
454: ////////////////////////////////////////////////////////////////////////////
455:
456: /// <summary>
457: /// Generate an array of "similar" Arabic pronouciations of a word, like أحمد and احمد.
458: /// </summary>
459: public static List<string> ProduceSimilarArabicWords(string word)
460: {
461: List<string> list;
462: Hashtable hashtable;
463:
464: hashtable = new Hashtable(20);
465: list = new List<string>();
466:
467: // add words to Hashtable:
468: hashtable[word] = 1;
469: hashtable[word.Replace("ـ", "")] = 1;
470:
471: hashtable[Regex.Replace(word, "\\bو", "و ")] = 1;
472: hashtable[Regex.Replace(word, "\\bو\\s+", "و")] = 1;
473:
474: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "أ", "ا", ref hashtable);
475: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "إ", "ا", ref hashtable);
476:
477: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ى", "ي", ref hashtable);
478: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ة", "ه", ref hashtable);
479:
480: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "و", "ؤ", ref hashtable);
481: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "عبد ", "عبد", ref hashtable);
482:
483: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "ابو ", "ابو", ref hashtable);
484: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "داود ", "داوود", ref hashtable);
485:
486: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "آ", "ءا", ref hashtable);
487: ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(word, "آ", "ا", ref hashtable);
488:
489:
490:
491: foreach (string s in hashtable.Keys) if (s.Length > 0) list.Add(s);
492:
493: return list;
494: }
495:
496: ////////////////////////////////////////////////////////////////////////////
497:
498: /// <summary>
499: ///
500: /// </summary>
501: private static void ProduceSimilarArabicWordsAddKeepBothVariationsToArrayList(string word, string variant1, string variant2, ref Hashtable hashtable)
502: {
503: hashtable[word.Replace(variant1, variant2)] = 1;
504: hashtable[word.Replace(variant2, variant1)] = 1;
505: }
506:
507: ////////////////////////////////////////////////////////////////////////////
508:
509: /// <summary>
510: /// Convert Arabic numerals ١٢٣٤٥٦٧٨٩٠ to Latin 1234567890
511: /// </summary>
512: /// <param name="s">Arabic number in string format</param>
513: /// <returns>Latin equivalent</returns>
514: public static string ConvertArabicNumbersToLatin(string s)
515: {
516: s = s.Replace("١", "1");
517: s = s.Replace("٢", "2");
518: s = s.Replace("٣", "3");
519: s = s.Replace("٤", "4");
520: s = s.Replace("٥", "5");
521: s = s.Replace("٦", "6");
522: s = s.Replace("٧", "7");
523: s = s.Replace("٨", "8");
524: s = s.Replace("٩", "9");
525: s = s.Replace("٠", "0");
526:
527: return s;
528: }
529:
530: ////////////////////////////////////////////////////////////////////////////
531:
532: /// <summary>
533: /// Convert Latin numerals 1234567890 to Arabic ١٢٣٤٥٦٧٨٩٠
534: /// </summary>
535: /// <param name="s">Latin number in string format</param>
536: /// <returns>Arabic equivalent</returns>
537: public static string ConvertLatinNumbersToArabic(string s)
538: {
539: s = s.Replace("1", "١");
540: s = s.Replace("2", "٢");
541: s = s.Replace("3", "٣");
542: s = s.Replace("4", "٤");
543: s = s.Replace("5", "٥");
544: s = s.Replace("6", "٦");
545: s = s.Replace("7", "٧");
546: s = s.Replace("8", "٨");
547: s = s.Replace("9", "٩");
548: s = s.Replace("0", "٠");
549:
550: return s;
551: }
552:
553: ////////////////////////////////////////////////////////////////////////////
554:
555: /// <summary>
556: /// Correct an Arabic string to the proper format of Arabic
557: /// </summary>
558: /// <param name="name">Name to be examined</param>
559: /// <returns>String of correct format</returns>
560: public static string CorrectArabicNameNounStringFormat(string name)
561: {
562: name = Regex.Replace(name, @"\s+", @" ");
563: name = name.Trim();
564:
565: // remove all 'ـ' chars
566: name = name.Replace("ـ", "");
567:
568: // last 'ه' to 'ة' (on word border)
569: // exceptions: 'الله' ...etc.
570: if (!Regex.IsMatch(name, "\\bشاه\\b")) name = Regex.Replace(name, "ه\\b", "ة");
571: name = name.Replace("اللة", "الله");
572:
573: // remove first 'دكتور' 'د' 'الدكتور'
574: name = Regex.Replace(name, "\\bدكتور\\b", "");
575: name = Regex.Replace(name, "\\bالدكتور\\b", "");
576: name = Regex.Replace(name, "\\bدكتورة\\b", "");
577: name = Regex.Replace(name, "\\bالدكتورة\\b", "");
578: name = Regex.Replace(name, "\\bد\\b", "");
579:
580: name = Regex.Replace(name, @"\bعبد\s+", "عبد");
581: name = Regex.Replace(name, @"\s+و\s+", " و");
582:
583: // first ''last 'ى' to 'ي' (on word border)
584: if (!name.Contains("يسرى")
585: && !name.Contains("يحيى")
586: && !name.Contains("هدى")
587: && !name.Contains("سلمى")
588: && !name.Contains("منى")
589: && !name.Contains("منتهى")
590: && !name.Contains("ليلى")
591: && !name.Contains("عيسى")
592: && !name.Contains("موسى")
593: && !name.Contains("سلوى")
594: && !name.Contains("بشرى")
595: && !name.Contains("صغرى")
596: && !name.Contains("صدى")
597: && !name.Contains("كبرى")
598: && !name.Contains("مصطفى")
599: && !name.Contains("ندى")
600: && !name.Contains("يسرى")
601: && !name.Contains("يمنى")
602: && !name.Contains("مستشفى")
603: && !name.Contains("تقوى")
604: && !name.Contains("ذكرى")
605: && !name.Contains("بشرى")
606: && !name.Contains("موسيقى")
607: && !name.Contains("ذكرى")
608: && !name.Contains("ضحى")
609: && !name.Contains("لبنى")
610: && !name.Contains("ذكرى")
611: && !name.Contains("مقتدى")
612: && !name.Contains("مقهى")
613: && !name.Contains("ملهى")
614: && !name.Contains("منتدى")
615: && !name.Contains("منتهى")
616: && !name.Contains("يمنى")
617: && !name.Contains("مرتضى")
618: ) name = Regex.Replace(name, "ى\\b", "ي");
619:
620: //
621: name = Regex.Replace(name, "\\bاحمد", "أحمد");
622: name = Regex.Replace(name, "\\bازياء", "أزياء");
623: name = Regex.Replace(name, "\\bاوكسجين", "أوكسجين");
624: name = Regex.Replace(name, "\\bاقبال", "إقبال");
625: name = Regex.Replace(name, "\\bابيار", "أبيار");
626: name = Regex.Replace(name, "اسنان", "أسنان");
627: name = Regex.Replace(name, "[أ|ا]براهيم", "إبراهيم");
628: name = Regex.Replace(name, "[أ|ا]سماعيل", "إسماعيل");
629: name = Regex.Replace(name, "اجياد", "أجياد");
630: name = Regex.Replace(name, "\\bامل\\b", "أمل");
631: name = Regex.Replace(name, "\\bايوب\\b", "أيوب");
632: name = Regex.Replace(name, "\\bايهاب\\b", "إيهاب");
633: name = Regex.Replace(name, "\\bايمن\\b", "أيمن");
634: name = Regex.Replace(name, "\\bايمان\\b", "إيمان");
635: name = Regex.Replace(name, "\\bاياد\\b", "أياد");
636: name = Regex.Replace(name, "\\bانيسة\\b", "أنيسة");
637: name = Regex.Replace(name, "\\bانيس\\b", "أنيس");
638: name = Regex.Replace(name, "\\bانور\\b", "أنور");
639: name = Regex.Replace(name, "\\bانوار\\b", "أنوار");
640: name = Regex.Replace(name, "\\bامينة\\b", "أمينة");
641: name = Regex.Replace(name, "\\bامين\\b", "أمين");
642: name = Regex.Replace(name, "\\bاميمة\\b", "أميمة");
643: name = Regex.Replace(name, "\\bامير\\b", "أمير");
644: name = Regex.Replace(name, "\\bاميرة\\b", "أميرة");
645: name = Regex.Replace(name, "\\bامنة\\b", "آمنة");
646: name = Regex.Replace(name, "\\bامثال\\b", "أمثال");
647: name = Regex.Replace(name, "\\bاماني\\b", "أماني");
648: name = Regex.Replace(name, "\\bامان\\b", "أمان");
649: name = Regex.Replace(name, "\\bامال\\b", "آمال");
650: name = Regex.Replace(name, "\\bام\\b", "أم");
651: name = Regex.Replace(name, "\\bالهام\\b", "إلهام");
652:
653: // 'أل to 'ال'
654: name = Regex.Replace(name, "\\bأل", "ال");
655:
656: // 'اا' at begining to 'ا'
657: name = name.Replace("\\bاا", "ا");
658:
659: // reduce any 3 concecutive similar arabic letters to only 2
660: name = name.Replace(@"(\d)\1\1", @"\1\1");
661:
662: name = Regex.Replace(name, @"\s+", @" ");
663: name = name.Trim();
664:
665: return name;
666: }
667:
668: ////////////////////////////////////////////////////////////////////////////
669:
670: /// <summary>
671: /// Remove non latin characters
672: /// </summary>
673: /// <param name="line">Line to be cleaned</param>
674: /// <returns>String of name cleaned</returns>
675: public static string RemoveNonLatinCharacters(string line)
676: {
677: line = Regex.Replace(line, "[^ " + latin + "]", "");
678:
679: return line;
680: }
681:
682: ////////////////////////////////////////////////////////////////////////////
683:
684: /// <summary>
685: /// Remove non Arabic and non Arabic-Extended letters and digits
686: /// </summary>
687: /// <param name="iso6393"></param>
688: /// <param name="line">Line to filter</param>
689: /// <returns>Filtered line</returns>
690: public static string RemoveNonNativeAndNonNativeExtendedLettersAndDigitsAccordingToLanguage(string iso6393, string line)
691: {
692: if (iso6393 == "ar")
693: {
694: line = Regex.Replace(line, "[^ " + arabicPlain + "|" + arabicDigit + "|" + arabicExtended + "]", "");
695: }
696: else
697: {
698:
699: }
700:
701: return line;
702: }
703:
704: ////////////////////////////////////////////////////////////////////////////
705:
706: /// <summary>
707: /// Convert single latin digits to equivalent native word digits according to language
708: /// </summary>
709: /// <param name="iso6393"></param>
710: /// <param name="line">String to process</param>
711: /// <returns>Filtered string</returns>
712: public static string ConvertSingleLatinDigitsToNativeWordEquivalentAccordingToLanguage(string iso6393, string line)
713: {
714: if (iso6393 == "ar")
715: {
716: line = ConvertArabicNumbersToLatin(line);
717:
718: line = line.Replace("1", "واحد");
719: line = line.Replace("2", "إثنين");
720: line = line.Replace("3", "ثلاثة");
721: line = line.Replace("4", "أربعة");
722: line = line.Replace("5", "خمسة");
723: line = line.Replace("6", "ستة");
724: line = line.Replace("7", "سبعة");
725: line = line.Replace("8", "ثمانية");
726: line = line.Replace("9", "تسعة");
727: line = line.Replace("0", "صفر");
728: }
729: else
730: {
731:
732: }
733:
734: return line;
735: }
736:
737: ////////////////////////////////////////////////////////////////////////////
738:
739: /// <summary>
740: /// Removes the latin transliterations of subject references of native words according to language. For example "الصيد" could be transliterated to "al sayid", and we want to remove the "al" from the transliteration.
741: /// </summary>
742: /// <param name="iso6393"></param>
743: /// <param name="line">Line to be probed and cleaned</param>
744: /// <returns>String of line cleaned</returns>
745: public static string RemoveLatinTransliterationsOfSubjectReferencesOfNativeWordTransliterationAccordingToLanguagee(string iso6393, string line)
746: {
747: // below: Arabic words
748: if (iso6393 == "ar")
749: {
750: line = Regex.Replace(line, "\\bas\\b", "", RegexOptions.IgnoreCase);
751: line = Regex.Replace(line, "\\bal\\b", "", RegexOptions.IgnoreCase);
752: line = Regex.Replace(line, "\\bash\\b", "", RegexOptions.IgnoreCase);
753: line = Regex.Replace(line, "\\bat\\b", "", RegexOptions.IgnoreCase);
754:
755: line = Regex.Replace(line, "\\bad\\b", "", RegexOptions.IgnoreCase);
756: line = Regex.Replace(line, "\\bar\\b", "", RegexOptions.IgnoreCase);
757: line = Regex.Replace(line, "\\ban\\b", "", RegexOptions.IgnoreCase);
758: line = Regex.Replace(line, "\\bath\\b", "", RegexOptions.IgnoreCase);
759:
760: line = Regex.Replace(line, "\\baz\\b", "", RegexOptions.IgnoreCase);
761: line = Regex.Replace(line, "\\baz̧( |\\b)", "", RegexOptions.IgnoreCase); // different than above
762: }
763: else
764: {
765:
766: }
767:
768: return line;
769: }
770:
771: ////////////////////////////////////////////////////////////////////////////
772:
773: /// <summary>
774: /// Produce Arabic Text of Latin Transliterations of Arabic Word Definit Article
775: /// </summary>
776: /// <param name="name">Latin Transliteration of Arabic word</param>
777: /// <returns>Arabic text</returns>
778: public static string ProduceArabicTextOfLatinTransliterationsOfArabicWordDefinitArticle(string name)
779: {
780: name = Regex.Replace(name, "\\bas\\s*\\b", "ال", RegexOptions.IgnoreCase);
781: name = Regex.Replace(name, "\\bal\\s*\\b", "ال", RegexOptions.IgnoreCase);
782: name = Regex.Replace(name, "\\bash\\s*\\b", "ال", RegexOptions.IgnoreCase);
783: name = Regex.Replace(name, "\\bat\\s*\\b", "ال", RegexOptions.IgnoreCase);
784: name = Regex.Replace(name, "\\bad\\s*\\b", "ال", RegexOptions.IgnoreCase);
785: name = Regex.Replace(name, "\\bar\\s*\\b", "ال", RegexOptions.IgnoreCase);
786: name = Regex.Replace(name, "\\ban\\s*\\b", "ال", RegexOptions.IgnoreCase);
787: name = Regex.Replace(name, "\\bath\\s*\\b", "ال", RegexOptions.IgnoreCase);
788: name = Regex.Replace(name, "\\baz\\s*\\b", "ال", RegexOptions.IgnoreCase);
789:
790: name = Regex.Replace(name, "\\baş\\s*\\b", "ال", RegexOptions.IgnoreCase);
791: name = Regex.Replace(name, "\\baţ\\s*\\b", "ال", RegexOptions.IgnoreCase);
792: name = Regex.Replace(name, "\\baḑ\\s*\\b", "ال", RegexOptions.IgnoreCase);
793: name = Regex.Replace(name, "\\badh\\s*\\b", "ال", RegexOptions.IgnoreCase);
794:
795: name = Regex.Replace(name, "\\baz̧\\s*\\b", "ال", RegexOptions.IgnoreCase);
796: name = Regex.Replace(name, "\\baş\\s*\\b", "ال", RegexOptions.IgnoreCase);
797:
798: return name;
799: }
800:
801: ////////////////////////////////////////////////////////////////////////////
802:
803: /// <summary>
804: /// Remove space after the latin transliteration of Arabic word's definit article
805: /// </summary>
806: /// <param name="name">Latin Transliteration of Arabic word with article space</param>
807: /// <returns>Latin transliteration without article space</returns>
808: public static string RemoveSpaceAfterLatinTransliterationsOfArabicWordsDefinitArticle(string name)
809: {
810: name = Regex.Replace(name, "\\b(as|al|ash|at|ad|ar|an|ath|az|aş|aţ|aḑ|adh|az̧)\\s*\\b", "$1", RegexOptions.IgnoreCase);
811: // note two different z and z̧
812:
813: return name;
814: }
815:
816: ////////////////////////////////////////////////////////////////////////////
817:
818: /// <summary>
819: /// Remove the wrong space between a native definit article and its word.
820: /// </summary>
821: /// <param name="iso6393"></param>
822: /// <param name="name">Native text</param>
823: /// <returns>Natieve text</returns>
824: public static string RemoveWrongSpaceBetweenNativeDefinitArticleAndItsWord(string iso6393, string name)
825: {
826: if (iso6393 == "ar")
827: {
828: name = Regex.Replace(name, "\\b[أ|ا]ت ت", "الت", RegexOptions.IgnoreCase);
829: name = Regex.Replace(name, "\\b[أ|ا]ث ث", "الث", RegexOptions.IgnoreCase);
830: name = Regex.Replace(name, "\\b[أ|ا]د د", "الد", RegexOptions.IgnoreCase);
831: name = Regex.Replace(name, "\\b[أ|ا]ذ ذ", "الذ", RegexOptions.IgnoreCase);
832: name = Regex.Replace(name, "\\b[أ|ا]ر ر", "الر", RegexOptions.IgnoreCase);
833:
834: name = Regex.Replace(name, "\\b[أ|ا]ز ز", "الز", RegexOptions.IgnoreCase);
835: name = Regex.Replace(name, "\\b[أ|ا]س س", "الس", RegexOptions.IgnoreCase);
836: name = Regex.Replace(name, "\\b[أ|ا]ش ش", "الش", RegexOptions.IgnoreCase);
837: name = Regex.Replace(name, "\\b[أ|ا]ص ص", "الص", RegexOptions.IgnoreCase);
838: name = Regex.Replace(name, "\\b[أ|ا]ض ض", "الض", RegexOptions.IgnoreCase);
839:
840: name = Regex.Replace(name, "\\b[أ|ا]ط ط", "الط", RegexOptions.IgnoreCase);
841: name = Regex.Replace(name, "\\b[أ|ا]ظ ظ", "الظ", RegexOptions.IgnoreCase);
842: name = Regex.Replace(name, "\\b[أ|ا]ل ل", "الل", RegexOptions.IgnoreCase);
843: name = Regex.Replace(name, "\\b[أ|ا]ن ن", "الن", RegexOptions.IgnoreCase);
844:
845: name = Regex.Replace(name, "\\b[أ|ا]ل ", "ال", RegexOptions.IgnoreCase);
846: }
847: else
848: {
849:
850: }
851:
852: return name;
853: }
854:
855: ////////////////////////////////////////////////////////////////////////////
856:
857: /// <summary>
858: /// Produce approximate native text of latin transliterations of native words
859: /// </summary>
860: /// <param name="iso6393"></param>
861: /// <param name="name">Latin transliteration of native word</param>
862: /// <returns>Approximate native text</returns>
863: public static string ProduceApproximateNativeTextOfLatinTransliterationsOfNativeWordsAccrodingToLanguage(string iso6393, string name)
864: {
865: /*
866: * To match:
867: * '’' use '\u2019'
868: * '‘' use '\u2018'
869: */
870:
871: if (iso6393 == "ar")
872: {
873: name = name.ToLower();
874:
875: // for some reason I can not match "'\b"
876: // does not work: name = Regex.Replace(name, "i\u2018\\b", "ع");
877: // works(?): name = Regex.Replace(name, "i\u2018(\\b|$)", "ع");
878:
879: name = Regex.Replace(name, "ayyā", "يا");
880: name = Regex.Replace(name, "iyah\\b", "ية");
881: name = name.Replace("dhdh", "ذ");
882: name = name.Replace("thth", "ث");
883: name = name.Replace("shsh", "ش");
884: name = Regex.Replace(name, "deid", "ديد");
885:
886: name = Regex.Replace(name, "lay", "لي");
887: name = Regex.Replace(name, "way", "وي");
888: name = Regex.Replace(name, "ain", "ين");
889: name = Regex.Replace(name, "llá\\b", "لا");
890: name = Regex.Replace(name, "iyā", "يا");
891: name = name.Replace("yya", "ي");
892: name = Regex.Replace(name, "\\bAya", "أيا");
893:
894: name = name.Replace("mm", "م");
895: name = name.Replace("bb", "ب");
896: name = name.Replace("dd", "د");
897: name = name.Replace("ff", "ف");
898: name = name.Replace("ss", "س");
899: name = name.Replace("ll", "ل");
900: name = name.Replace("rr", "ر");
901: name = name.Replace("zz", "ز");
902: name = name.Replace("nn", "ن");
903: name = name.Replace("jj", "ج");
904: name = name.Replace("ww", "و");
905: name = name.Replace("qq", "ق");
906: name = name.Replace("tt", "ت");
907: name = name.Replace("ḩḩ", "ح");
908: name = name.Replace("kk", "ك");
909: name = name.Replace("ţţ", "ط");
910: name = name.Replace("şş", "ص");
911: name = name.Replace("ḑḑ", "ض"); // not same
912: name = name.Replace("ḍḍ", "ض");
913: name = name.Replace("ay", "ي");
914: name = name.Replace("au", "و");
915: name = name.Replace("āy", "اي");
916: name = name.Replace("kh", "خ");
917: name = name.Replace("sh", "ش");
918: name = name.Replace("th", "ث");
919: name = name.Replace("dh", "ض");
920: name = name.Replace("gh", "غ");
921:
922: name = Regex.Replace(name, "ah\\b", "ة");
923: name = Regex.Replace(name, "āt\\b", "ات");
924: name = Regex.Replace(name, "at\\b", "ات");
925: name = Regex.Replace(name, "ā\u2019i", "ائ");
926: name = Regex.Replace(name, "ā\u2019(\\b|$)", "اء");
927: name = Regex.Replace(name, "\u2018ā(\\b|$)", "عا");
928: name = Regex.Replace(name, "\u2018a", "ع");
929: name = Regex.Replace(name, "\u2018u", "ع");
930: name = Regex.Replace(name, "\u2018ū", "عو");
931: name = Regex.Replace(name, "\u2018ī", "عي");
932: name = Regex.Replace(name, "i\u2018(\\b|$)", "ع");
933: name = Regex.Replace(name, "i\u2018", "ئ");
934: name = Regex.Replace(name, "i\u2019", "ئ");
935: name = Regex.Replace(name, "ay\u2018\\b", "يع");
936: name = Regex.Replace(name, "ay\\b", "ي");
937: name = Regex.Replace(name, "ei", "ي");
938: name = Regex.Replace(name, "yā", "يا");
939: name = Regex.Replace(name, "ya", "ي");
940: name = Regex.Replace(name, "īt\\b", "يت");
941: name = Regex.Replace(name, "ḩá\\b", "حا");
942:
943: name = Regex.Replace(name, "t\\b", "ة");
944: name = name.Replace("ş", "ص");
945: name = name.Replace("ḑ", "ض"); // not same
946: name = name.Replace("ḍ", "ض");
947: name = name.Replace("ţ", "ط");
948: name = name.Replace("ā", "ا");
949: name = name.Replace("a’", "ا");
950: name = name.Replace("ī", "ي");
951: name = name.Replace("ū", "و");
952: name = name.Replace("ḩ", "ح");
953:
954: name = Regex.Replace(name, "\\ba", "أ");
955: name = Regex.Replace(name, "\\bu", "أ");
956: name = Regex.Replace(name, "\\bi", "أ");
957: name = Regex.Replace(name, "i\\b", "ي");
958: name = Regex.Replace(name, "y\\b", "ي");
959:
960: name = name.Replace("a", "");
961: name = name.Replace("b", "ب");
962: name = name.Replace("t", "ت");
963: //name = name.Replace("c", "");
964: name = name.Replace("j", "ج");
965: name = name.Replace("g", "ج");
966: name = name.Replace("d", "د");
967: name = name.Replace("r", "ر");
968: name = name.Replace("r", "ر");
969: name = name.Replace("s", "س");
970: name = name.Replace("t", "ت");
971: name = name.Replace("d", "د");
972: name = name.Replace("e", "");
973: name = name.Replace("h", "ه");
974: name = name.Replace("i", "");
975: name = name.Replace("j", "ج");
976: name = name.Replace("p", "ب");
977: name = name.Replace("f", "ف");
978: name = name.Replace("q", "ق");
979: name = name.Replace("k", "ك");
980: name = name.Replace("l", "ل");
981: name = name.Replace("m", "م");
982: name = name.Replace("n", "ن");
983: name = name.Replace("h", "ه");
984: name = name.Replace("u", "");
985: name = name.Replace("v", "ف");
986: name = name.Replace("o", "");
987: name = name.Replace("w", "و");
988: name = name.Replace("y", "");
989: name = name.Replace("w", "و");
990: name = name.Replace("z̧", "ظ"); // note this is not 'z'. KEEP THIS ORDER.
991: name = name.Replace("z", "ز");
992: name = name.Replace("á", "اء");
993:
994: name = Regex.Replace(name, "\\b\u2018", "ع");
995:
996: //name = name.Replace("7", "ح");
997: //name = name.Replace("3", "ع");
998: //name = name.Replace("6", "ط");
999:
1000: // name = name.Replace("", "");
1001: // name = Regex.Replace(name, "\\b\\b", "");
1002: }
1003: else
1004: {
1005:
1006: }
1007:
1008: return name;
1009: }
1010:
1011: ////////////////////////////////////////////////////////////////////////////
1012:
1013: /// <summary>
1014: ///
1015: /// </summary>
1016: public static string ProduceExactNativeTextOfSingleLatinTransliterationsOfNativeWordsAccordingToLanguage(string iso6393, string name)
1017: {
1018: /*
1019: * Remove english worlds like Island, Airport, Ridge, North, Ridge, Customs, Long, South, East, Inner,
1020: * Channel, Reef Islet...etc.
1021: *
1022: * To match:
1023: * '’' use '\u2019'
1024: * '‘' use "\u2018'
1025: */
1026:
1027: if (iso6393 == "ar")
1028: {
1029: // kw
1030: name = Regex.Replace(name, "\\bKhalij\\b", "خليج", RegexOptions.IgnoreCase);
1031: // name = Regex.Replace(name, "\\bAl Kuwait\\b", "الكويت", RegexOptions.IgnoreCase);
1032: //name = Regex.Replace(name, "\\bDā’irī\\b", "دائري", RegexOptions.IgnoreCase);
1033:
1034: name = Regex.Replace(name, "\\bSha\u2018īb\\b", "شعيب", RegexOptions.IgnoreCase);
1035: name = Regex.Replace(name, "\\bSha\u2018ib\\b", "شعيب", RegexOptions.IgnoreCase);
1036: name = Regex.Replace(name, "\\bShi\u2018b\\b", "شعب", RegexOptions.IgnoreCase);
1037:
1038: name = Regex.Replace(name, "\\bAbū\\b", "أبو", RegexOptions.IgnoreCase);
1039: name = Regex.Replace(name, "\\bBū\\b", "أبو", RegexOptions.IgnoreCase);
1040: name = Regex.Replace(name, "\\bḨisū\\b", "حسو", RegexOptions.IgnoreCase);
1041:
1042: name = Regex.Replace(name, "\\bSayḩ\\b", "سيح", RegexOptions.IgnoreCase);
1043: name = Regex.Replace(name, "\\bKharmat\\b", "خرمة", RegexOptions.IgnoreCase);
1044:
1045: name = Regex.Replace(name, "\\bŢawī\\b", "طوي", RegexOptions.IgnoreCase);
1046: name = Regex.Replace(name, "\\bKhaţmat\\b", "خطمة", RegexOptions.IgnoreCase);
1047: name = Regex.Replace(name, "\\bYadd\\b", "يد", RegexOptions.IgnoreCase);
1048: name = Regex.Replace(name, "\\bQāriḩat\\b", "قارحات", RegexOptions.IgnoreCase);
1049: name = Regex.Replace(name, "\\bGhāfat\\b", "غافة", RegexOptions.IgnoreCase);
1050: name = Regex.Replace(name, "\\bWara\\b", "وارة", RegexOptions.IgnoreCase);
1051: name = Regex.Replace(name, "\\bYabbah\\b", "يبة", RegexOptions.IgnoreCase);
1052: name = Regex.Replace(name, "\\bBidud\\b", "بدود", RegexOptions.IgnoreCase);
1053: name = Regex.Replace(name, "\\bMayy\\b", "مي", RegexOptions.IgnoreCase);
1054: name = Regex.Replace(name, "\\bYarīrah\\b", "يريرة", RegexOptions.IgnoreCase);
1055: name = Regex.Replace(name, "\\bNā\u2019if\\b", "نايف", RegexOptions.IgnoreCase);
1056: name = Regex.Replace(name, "\\bBarqat\\b", "برقة", RegexOptions.IgnoreCase);
1057: name = Regex.Replace(name, "\\bHaz\u2019", "هزع", RegexOptions.IgnoreCase);
1058: name = Regex.Replace(name, "\\bDhi\u2019b", "ذئب", RegexOptions.IgnoreCase);
1059: name = Regex.Replace(name, "\\bSuyūl\\b", "سيول", RegexOptions.IgnoreCase);
1060: name = Regex.Replace(name, "\\bFā\u2019iyah\\b", "فائية", RegexOptions.IgnoreCase);
1061: name = Regex.Replace(name, "Khawr Fakkān", "خورفكان", RegexOptions.IgnoreCase);
1062:
1063: name = Regex.Replace(name, "\\bMusá\\b", "موسى", RegexOptions.IgnoreCase);
1064:
1065: name = Regex.Replace(name, "\\bMuhammad\\b", "محمد", RegexOptions.IgnoreCase);
1066: name = Regex.Replace(name, "\\bMohammad\\b", "محمد", RegexOptions.IgnoreCase);
1067: name = Regex.Replace(name, "\\bMuhammed\\b", "محمد", RegexOptions.IgnoreCase);
1068: name = Regex.Replace(name, "\\bMohammed\\b", "محمد", RegexOptions.IgnoreCase);
1069: name = Regex.Replace(name, "\\bAhmad\\b", "أحمد", RegexOptions.IgnoreCase);
1070: name = Regex.Replace(name, "\\bAhmed\\b", "أحمد", RegexOptions.IgnoreCase);
1071:
1072: name = Regex.Replace(name, "\\bWādī\\b", "وادي", RegexOptions.IgnoreCase);
1073: name = Regex.Replace(name, "\\bWadi\\b", "وادي", RegexOptions.IgnoreCase);
1074: name = Regex.Replace(name, "\\bWâdi\\b", "وادي", RegexOptions.IgnoreCase);
1075: name = Regex.Replace(name, "\\bWād\\b", "وادي", RegexOptions.IgnoreCase);
1076: name = Regex.Replace(name, "\\bWudayy\\b", "ودي", RegexOptions.IgnoreCase);
1077:
1078: name = Regex.Replace(name, "\\bJaww\\b", "جو", RegexOptions.IgnoreCase);
1079: name = Regex.Replace(name, "\\bDarb\\b", "درب", RegexOptions.IgnoreCase);
1080: name = Regex.Replace(name, "\\bManākh\\b", "مناخ", RegexOptions.IgnoreCase);
1081: name = Regex.Replace(name, "\\bJibāl\\b", "جبال", RegexOptions.IgnoreCase);
1082: name = Regex.Replace(name, "\\bJibal\\b", "جبال", RegexOptions.IgnoreCase);
1083: name = Regex.Replace(name, "\\bRawḑat\\b", "روضة", RegexOptions.IgnoreCase);
1084:
1085: name = Regex.Replace(name, "\\bNufūd\\b", "نفود", RegexOptions.IgnoreCase);
1086: name = Regex.Replace(name, "\\bNafūd\\b", "نفود", RegexOptions.IgnoreCase);
1087:
1088: name = Regex.Replace(name, "\\bMazra\u2018at\\b", "مزرعة", RegexOptions.IgnoreCase);
1089: name = Regex.Replace(name, "\\bFayḑat\\b", "فيضة", RegexOptions.IgnoreCase);
1090: name = Regex.Replace(name, "\\bKhashm\\b", "خشم", RegexOptions.IgnoreCase);
1091:
1092: name = Regex.Replace(name, "\\bĀbār\\b", "آبار", RegexOptions.IgnoreCase);
1093: name = Regex.Replace(name, "\\bBi\u2019r\\b", "بئر", RegexOptions.IgnoreCase);
1094: name = Regex.Replace(name, "\\bBi\u2018r\\b", "بئر", RegexOptions.IgnoreCase);
1095:
1096: name = Regex.Replace(name, "\\bQalīb\\b", "قليب", RegexOptions.IgnoreCase);
1097: name = Regex.Replace(name, "\\bQulayyib\\b", "قليب", RegexOptions.IgnoreCase);
1098: name = Regex.Replace(name, "\\bQulbān\\b", "قلبان", RegexOptions.IgnoreCase);
1099:
1100: name = Regex.Replace(name, "\\bḐulay\u2018\\b", "ضليع", RegexOptions.IgnoreCase);
1101: name = Regex.Replace(name, "\\bḐulū\u2018\\b", "ضلوع", RegexOptions.IgnoreCase);
1102:
1103: name = Regex.Replace(name, "\\bḨaşāt\\b", "حصاة", RegexOptions.IgnoreCase);
1104:
1105: name = Regex.Replace(name, "\\bJabal\\b", "جبل", RegexOptions.IgnoreCase);
1106: name = Regex.Replace(name, "\\bJubayl\\b", "جبيل", RegexOptions.IgnoreCase);
1107:
1108:
1109: name = Regex.Replace(name, "\\bHijrat\\b", "هجرة", RegexOptions.IgnoreCase);
1110: name = Regex.Replace(name, "\\bḤazm\\b", "حزم", RegexOptions.IgnoreCase);
1111: name = Regex.Replace(name, "\\bḨazm\\b", "حزم", RegexOptions.IgnoreCase);
1112: name = Regex.Replace(name, "\\bḨuzūm\\b", "حزوم", RegexOptions.IgnoreCase);
1113:
1114: name = Regex.Replace(name, "\\bKhabrā\u2019\\b", "خبراء", RegexOptions.IgnoreCase);
1115: name = Regex.Replace(name, "\\bKhubayrā\u2019\\b", "خبيراء", RegexOptions.IgnoreCase);
1116: name = Regex.Replace(name, "\\bKhabrat\\b", "خبرة", RegexOptions.IgnoreCase);
1117:
1118: name = Regex.Replace(name, "\\bFāj\\b", "فج", RegexOptions.IgnoreCase);
1119: name = Regex.Replace(name, "\\bSabkhat\\b", "صبخة", RegexOptions.IgnoreCase);
1120: name = Regex.Replace(name, "\\bQā\u2018\\b", "قاع", RegexOptions.IgnoreCase);
1121: name = Regex.Replace(name, "\\bḨalāt\\b", "حالة", RegexOptions.IgnoreCase);
1122:
1123: name = Regex.Replace(name, "\u2018Irq\\b", "عرق", RegexOptions.IgnoreCase); // like that
1124: name = Regex.Replace(name, "\u2018Ays\\b", "عيس", RegexOptions.IgnoreCase); // like that
1125: name = Regex.Replace(name, "\u2018Ayb\\b", "عيب", RegexOptions.IgnoreCase); // like that
1126:
1127: name = Regex.Replace(name, "\\bFiyāḑ\\b", "فياض", RegexOptions.IgnoreCase);
1128: name = Regex.Replace(name, "\\bHaḑb\\b", "هدب", RegexOptions.IgnoreCase);
1129: name = Regex.Replace(name, "\\bḨarrat\\b", "حرة", RegexOptions.IgnoreCase);
1130:
1131: name = Regex.Replace(name, "\\bKhawr\\b", "خور", RegexOptions.IgnoreCase);
1132: name = Regex.Replace(name, "\\bAin\\b", "عين", RegexOptions.IgnoreCase);
1133: name = Regex.Replace(name, "\\b\u2018Ayn\\b", "عين", RegexOptions.IgnoreCase);
1134: name = Regex.Replace(name, "\\bḨijl\\b", "حجل", RegexOptions.IgnoreCase);
1135:
1136: name = Regex.Replace(name, "\\bḨattá\\b", "حتا", RegexOptions.IgnoreCase);
1137:
1138: name = Regex.Replace(name, "\\bShiqqat\\b", "شقة", RegexOptions.IgnoreCase);
1139: name = Regex.Replace(name, "\\b\u2018Urūq\\b", "عروق", RegexOptions.IgnoreCase);
1140:
1141: name = Regex.Replace(name, "\\bQalamat\\b", "قلمة", RegexOptions.IgnoreCase);
1142: name = Regex.Replace(name, "\\bQulumat\\b", "قلمة", RegexOptions.IgnoreCase);
1143:
1144: name = Regex.Replace(name, "\\bHadh\\b", "حد", RegexOptions.IgnoreCase);
1145: name = Regex.Replace(name, "\\bUmm\\b", "أم", RegexOptions.IgnoreCase);
1146: name = Regex.Replace(name, "\\bUmmahāt\\b", "أمهات", RegexOptions.IgnoreCase);
1147:
1148: name = Regex.Replace(name, "\\bRimāl\\b", "رمال", RegexOptions.IgnoreCase);
1149: name = Regex.Replace(name, "\\bRamlat\\b", "رملة", RegexOptions.IgnoreCase);
1150:
1151: name = Regex.Replace(name, "\\bKhayţ\\b", "خيط", RegexOptions.IgnoreCase);
1152:
1153: name = Regex.Replace(name, "\\b\u2018Irq\\b", "عرق", RegexOptions.IgnoreCase);
1154: name = Regex.Replace(name, "\\b\u2018Urayq\\b", "عريق", RegexOptions.IgnoreCase);
1155:
1156: name = Regex.Replace(name, "\\b\u2018Alam\\b", "علم", RegexOptions.IgnoreCase);
1157: name = Regex.Replace(name, "\\bJazīrat\\b", "جزيرة", RegexOptions.IgnoreCase);
1158: name = Regex.Replace(name, "\\bJezîret\\b", "جزيرة", RegexOptions.IgnoreCase);
1159:
1160: name = Regex.Replace(name, "\\bJāl\\b", "جال", RegexOptions.IgnoreCase);
1161:
1162: name = Regex.Replace(name, "\\bRās\\b", "رأس", RegexOptions.IgnoreCase);
1163: name = Regex.Replace(name, "\\bRa\u2019s", "رأس", RegexOptions.IgnoreCase); // like this
1164:
1165: name = Regex.Replace(name, "\\bQal\u2018at\\b", "قلعة", RegexOptions.IgnoreCase);
1166: name = Regex.Replace(name, "\\bSharm\\b", "شرم", RegexOptions.IgnoreCase);
1167: name = Regex.Replace(name, "\\bQit\u2018at\\b", "قطعة", RegexOptions.IgnoreCase);
1168: name = Regex.Replace(name, "\\bTell\\b", "تل", RegexOptions.IgnoreCase);
1169: name = Regex.Replace(name, "\\bTall\\b", "تل", RegexOptions.IgnoreCase);
1170: name = Regex.Replace(name, "\\bTulūl\\b", "تلول", RegexOptions.IgnoreCase);
1171:
1172: name = Regex.Replace(name, "\\bZibārat\\b", "زبارة", RegexOptions.IgnoreCase);
1173:
1174: name = Regex.Replace(name, "\\bMiqnāt\\b", "مقناة", RegexOptions.IgnoreCase);
1175: name = Regex.Replace(name, "\\bQārat\\b", "قارة", RegexOptions.IgnoreCase);
1176:
1177: name = Regex.Replace(name, "\\bMadīnat\\b", "مدينة", RegexOptions.IgnoreCase);
1178: name = Regex.Replace(name, "\\bMaqbarat\\b", "مقبرة", RegexOptions.IgnoreCase);
1179: name = Regex.Replace(name, "\\bRuḩayyat\\b", "رحية", RegexOptions.IgnoreCase);
1180: name = Regex.Replace(name, "\\bDawḩat\\b", "دوحة", RegexOptions.IgnoreCase);
1181: name = Regex.Replace(name, "\\bQaşr\\b", "قصر", RegexOptions.IgnoreCase);
1182: name = Regex.Replace(name, "\\bNuhaydayn\\b", "نهيدين", RegexOptions.IgnoreCase);
1183: name = Regex.Replace(name, "\\bBaţn\\b", "بطن", RegexOptions.IgnoreCase);
1184: name = Regex.Replace(name, "\\bKhullat\\b", "خلات", RegexOptions.IgnoreCase);
1185: name = Regex.Replace(name, "\\bGhars\\b", "غرس", RegexOptions.IgnoreCase);
1186:
1187: name = Regex.Replace(name, "\\bMuqayrāt\\b", "مقيرات", RegexOptions.IgnoreCase);
1188: name = Regex.Replace(name, "\\bGhadīr\\b", "غدير", RegexOptions.IgnoreCase);
1189: name = Regex.Replace(name, "\\b\u2018Uwaynat\\b", "عوينات", RegexOptions.IgnoreCase);
1190: name = Regex.Replace(name, "\\b\u2018Uqlat\\b", "عقلة", RegexOptions.IgnoreCase);
1191: name = Regex.Replace(name, "\\bḨālat\\b", "حالة", RegexOptions.IgnoreCase);
1192: name = Regex.Replace(name, "\\bKhalīj\\b", "خليج", RegexOptions.IgnoreCase);
1193:
1194: name = Regex.Replace(name, "\\bDaḩl\\b", "دحل", RegexOptions.IgnoreCase);
1195: name = Regex.Replace(name, "\\bDaḩal\\b", "دحل", RegexOptions.IgnoreCase);
1196: name = Regex.Replace(name, "\\bDaḩlat\\b", "دحلة", RegexOptions.IgnoreCase);
1197:
1198: name = Regex.Replace(name, "\\bDil\u2018ān\\b", "دلعان", RegexOptions.IgnoreCase);
1199: name = Regex.Replace(name, "\\bBanī\\b", "بني", RegexOptions.IgnoreCase);
1200: name = Regex.Replace(name, "\\bQurūn\\b", "قرون", RegexOptions.IgnoreCase);
1201: name = Regex.Replace(name, "\\bMīnā\u2019\\b", "ميناء", RegexOptions.IgnoreCase);
1202: name = Regex.Replace(name, "\\bKhuyūţ\\b", "خيوط", RegexOptions.IgnoreCase);
1203: name = Regex.Replace(name, "\\bḐil\u2018ān\\b", "دلعان", RegexOptions.IgnoreCase);
1204:
1205: name = Regex.Replace(name, "\\bRijm\\b", "رجم", RegexOptions.IgnoreCase);
1206: name = Regex.Replace(name, "\\bRujm\\b", "رجم", RegexOptions.IgnoreCase);
1207: name = Regex.Replace(name, "\\bRujūm\\b", "رجوم", RegexOptions.IgnoreCase);
1208:
1209: name = Regex.Replace(name, "\\bHaḑabat\\b", "حدبة", RegexOptions.IgnoreCase);
1210: name = Regex.Replace(name, "\\bAbraq\\b", "أبرق", RegexOptions.IgnoreCase);
1211: name = Regex.Replace(name, "\\bSayḩ\\b", "سيح", RegexOptions.IgnoreCase);
1212: name = Regex.Replace(name, "\\Sāqiyat\\b", "ساقية", RegexOptions.IgnoreCase);
1213: name = Regex.Replace(name, "\\bQurayy\\b", "قري", RegexOptions.IgnoreCase);
1214: name = Regex.Replace(name, "\\bSuḩūl\\b", "سهول", RegexOptions.IgnoreCase);
1215: name = Regex.Replace(name, "\\bQuwīd\\b", "قويد", RegexOptions.IgnoreCase);
1216:
1217: name = Regex.Replace(name, "\\bMishāsh\\b", "مشاش", RegexOptions.IgnoreCase);
1218: name = Regex.Replace(name, "\\bMushāsh\\b", "مشاش", RegexOptions.IgnoreCase);
1219:
1220: name = Regex.Replace(name, "\\bKhubb\\b", "خب", RegexOptions.IgnoreCase);
1221: name = Regex.Replace(name, "\\bKhabb\\b", "خب", RegexOptions.IgnoreCase);
1222:
1223: name = Regex.Replace(name, "\\bŞayhad\\b", "صيهد", RegexOptions.IgnoreCase);
1224:
1225: name = Regex.Replace(name, "\\bSumr\\b", "سمر", RegexOptions.IgnoreCase);
1226: name = Regex.Replace(name, "\\bSamrat\\b", "سمرة", RegexOptions.IgnoreCase);
1227: name = Regex.Replace(name, "\\bSamrā\u2019\\b", "سمراء", RegexOptions.IgnoreCase);
1228:
1229: name = Regex.Replace(name, "\\bQuşmat\\b", "قصمة", RegexOptions.IgnoreCase);
1230: name = Regex.Replace(name, "\\bKhafs\\b", "خفس", RegexOptions.IgnoreCase);
1231: name = Regex.Replace(name, "\\bJawb\\b", "جوب", RegexOptions.IgnoreCase);
1232: name = Regex.Replace(name, "\\b\u2018Alāt\\b", "علة", RegexOptions.IgnoreCase);
1233: name = Regex.Replace(name, "\\bQawz\\b", "قوز", RegexOptions.IgnoreCase);
1234: name = Regex.Replace(name, "\\bRijlat\\b", "رجلة", RegexOptions.IgnoreCase);
1235: name = Regex.Replace(name, "\\bŞafrā\u2019\\b", "صفراء", RegexOptions.IgnoreCase);
1236: name = Regex.Replace(name, "\\bQarn\\b", "قرن", RegexOptions.IgnoreCase);
1237: name = Regex.Replace(name, "\\bDibdibat\\b", "دبدبة", RegexOptions.IgnoreCase);
1238: name = Regex.Replace(name, "\\bThulmat\\b", "ثلمة", RegexOptions.IgnoreCase);
1239: name = Regex.Replace(name, "\\bBarqā\u2019\\b", "برقاء", RegexOptions.IgnoreCase);
1240: name = Regex.Replace(name, "\\bGhār\\b", "غار", RegexOptions.IgnoreCase);
1241: name = Regex.Replace(name, "\\bJisrat\\b", "جسرة", RegexOptions.IgnoreCase);
1242: name = Regex.Replace(name, "\\bMadārat\\b", "مدارة", RegexOptions.IgnoreCase);
1243: name = Regex.Replace(name, "\\bSāddat\\b", "صعدة", RegexOptions.IgnoreCase);
1244: name = Regex.Replace(name, "\\bŢu\u2018ūs\\b", "طعوس", RegexOptions.IgnoreCase);
1245: name = Regex.Replace(name, "\\bNāziyat\\b", "نازية", RegexOptions.IgnoreCase);
1246: name = Regex.Replace(name, "\\bQi\u2018r\\b", "قعر", RegexOptions.IgnoreCase);
1247: name = Regex.Replace(name, "\\bḨabl\\b", "حبل", RegexOptions.IgnoreCase);
1248: name = Regex.Replace(name, "\\bBinīyat\\b", "بنية", RegexOptions.IgnoreCase);
1249: name = Regex.Replace(name, "\\bSahlat\\b", "سحلة", RegexOptions.IgnoreCase);
1250: name = Regex.Replace(name, "\\bŢa\u2018aysāt\\b", "طعيسة", RegexOptions.IgnoreCase);
1251: name = Regex.Replace(name, "\\bQaţ\u2018at\\b", "قطعة", RegexOptions.IgnoreCase);
1252: name = Regex.Replace(name, "\\bJadhmā\u2019\\b", "جدماء", RegexOptions.IgnoreCase);
1253:
1254: name = Regex.Replace(name, "\\bHashm\\b", "هشم", RegexOptions.IgnoreCase);
1255: name = Regex.Replace(name, "\\bJar\u2018ā\\b", "جرعاء", RegexOptions.IgnoreCase);
1256: name = Regex.Replace(name, "\\bQūr\\b", "قور", RegexOptions.IgnoreCase);
1257: name = Regex.Replace(name, "\\bḨammat\\b", "حمة", RegexOptions.IgnoreCase);
1258: name = Regex.Replace(name, "\\bShāţi\u2019\\b", "شاطيء", RegexOptions.IgnoreCase);
1259: name = Regex.Replace(name, "\\bŢi\u2018s\\b", "طعس", RegexOptions.IgnoreCase);
1260: name = Regex.Replace(name, "\\bḨamrūr\\b", "حمرور", RegexOptions.IgnoreCase);
1261: name = Regex.Replace(name, "\\bLābat\\b", "لابة", RegexOptions.IgnoreCase);
1262: name = Regex.Replace(name, "\\bDuwaykhilat\\b", "دويخيلات", RegexOptions.IgnoreCase);
1263: name = Regex.Replace(name, "\\bMarsá\\b", "مرسى", RegexOptions.IgnoreCase);
1264: name = Regex.Replace(name, "\\bDi\u2018b\\b", "دعب", RegexOptions.IgnoreCase);
1265: name = Regex.Replace(name, "\\b\u2018Idd\\b", "عد", RegexOptions.IgnoreCase);
1266: name = Regex.Replace(name, "\\b\u2018Ulaym\\b", "عليم", RegexOptions.IgnoreCase);
1267: name = Regex.Replace(name, "\\bFasht\\b", "فشت", RegexOptions.IgnoreCase);
1268: name = Regex.Replace(name, "\\bUfīḩim\\b", "عفحم", RegexOptions.IgnoreCase);
1269: name = Regex.Replace(name, "\\bMazāri\u2018\\b", "مزارع", RegexOptions.IgnoreCase);
1270: name = Regex.Replace(name, "\\bŢawīl\\b", "طويل", RegexOptions.IgnoreCase);
1271: name = Regex.Replace(name, "\\bWark\\b", "ورك", RegexOptions.IgnoreCase);
1272: name = Regex.Replace(name, "\\bMaḩfūr\\b", "محفور", RegexOptions.IgnoreCase);
1273:
1274: name = Regex.Replace(name, "\\b\u2018Abl\\b", "عبل", RegexOptions.IgnoreCase);
1275: name = Regex.Replace(name, "\\bBinī\\b", "بني", RegexOptions.IgnoreCase);
1276: name = Regex.Replace(name, "\\bḨuşaydat\\b", "حصيدة", RegexOptions.IgnoreCase);
1277: name = Regex.Replace(name, "\\bNuqrat\\b", "نقرة", RegexOptions.IgnoreCase);
1278: name = Regex.Replace(name, "\\bRumḩayn\\b", "رمحين", RegexOptions.IgnoreCase);
1279: name = Regex.Replace(name, "\\bFarshat\\b", "فرشت", RegexOptions.IgnoreCase);
1280: name = Regex.Replace(name, "\\bNaqā\\b", "نقا", RegexOptions.IgnoreCase);
1281: name = Regex.Replace(name, "\\bNuşaylāt\\b", "نصيلات", RegexOptions.IgnoreCase);
1282: name = Regex.Replace(name, "\\bSudd\\b", "سد", RegexOptions.IgnoreCase);
1283: name = Regex.Replace(name, "\\b\u2018Aqabat\\b", "عقبة", RegexOptions.IgnoreCase);
1284: name = Regex.Replace(name, "\\bS\u2019irat\\b", "سعرة", RegexOptions.IgnoreCase);
1285: name = Regex.Replace(name, "\\bTawal\\b", "طوال", RegexOptions.IgnoreCase);
1286: name = Regex.Replace(name, "\\bBaḩr\\b", "بحر", RegexOptions.IgnoreCase);
1287: name = Regex.Replace(name, "\\bNajfat\\b", "نجفة", RegexOptions.IgnoreCase);
1288: name = Regex.Replace(name, "\\bRaḑmat\\b", "ردمة", RegexOptions.IgnoreCase);
1289: name = Regex.Replace(name, "\\bŢiwāl\\b", "طوال", RegexOptions.IgnoreCase);
1290: name = Regex.Replace(name, "\\bSahl\\b", "سهل", RegexOptions.IgnoreCase);
1291: name = Regex.Replace(name, "\\bQīzān\\b", "قيزان", RegexOptions.IgnoreCase);
1292: name = Regex.Replace(name, "\\bHadhlūl\\b", "هدلول", RegexOptions.IgnoreCase);
1293:
1294: name = Regex.Replace(name, "\\bIbn\\b", "إبن", RegexOptions.IgnoreCase);
1295: name = Regex.Replace(name, "\\bMīnā’\\b", "ميناء", RegexOptions.IgnoreCase);
1296: name = Regex.Replace(name, "\\bMūsá\\b", "موسى", RegexOptions.IgnoreCase);
1297:
1298: name = name.Replace("‘Iyāl", "عيال");
1299: }
1300: else
1301: {
1302:
1303: }
1304:
1305: return name;
1306: }
1307:
1308: ////////////////////////////////////////////////////////////////////////////
1309:
1310: /// <summary>
1311: /// From a country code this will return the main language used in this country.
1312: /// </summary>
1313: /// <param name="countryCode">Country code (iso2)</param>
1314: /// <returns>Language Code (ISO 639-2)</returns>
1315: public static string ReturnLanguageCodeUsedInCountryFromCountyCode(string countryCode)
1316: {
1317: // Incomplete
1318:
1319: string languageCode;
1320:
1321: countryCode = countryCode.ToLower();
1322:
1323: switch (countryCode)
1324: {
1325: // Korea
1326: case "kr":
1327: case "kp": languageCode = "ko"; break;
1328:
1329: // Japan
1330: case "jp": languageCode = "ja"; break;
1331:
1332: // Arab countries
1333: case "dz":
1334: case "bh":
1335: case "eg":
1336: case "iq":
1337: case "jo":
1338: case "kw":
1339: case "lb":
1340: case "ly":
1341: case "mr":
1342: case "ma":
1343: case "om":
1344: case "ps":
1345: case "qa":
1346: case "sa":
1347: case "so":
1348: case "sd":
1349: case "sy":
1350: case "tn":
1351: case "ae":
1352: case "ye": languageCode = "ar"; break;
1353:
1354: // Iran, Afghanistan, Tajikistan
1355: case "ir":
1356: case "af":
1357: case "tj": languageCode = "fa"; break; // Persian
1358:
1359: // English
1360: default: languageCode = "en"; break; // English
1361: }
1362:
1363: return languageCode;
1364: }
1365:
1366: /*
1367: ////////////////////////////////////////////////////////////////////////////
1368:
1369: /// <summary>
1370: /// Return the first language used by this user
1371: /// <param name="userLanguages">This should be HttpContext.Current.Request from webpages</param>
1372: /// </summary>
1373: public static string ReturnUserLanguage(HttpRequest userLanguages)
1374: {
1375: string language;
1376: string[] languages = HttpContext.Current.Request.UserLanguages;
1377:
1378: if (languages == null || languages.Length == 0)
1379: {
1380: language = null;
1381: }
1382: else
1383: {
1384: try
1385: {
1386: language = languages[0].ToLowerInvariant().Trim();
1387: language = language.Substring(0, 2);
1388: }
1389: catch (ArgumentException)
1390: {
1391: language = null;
1392: }
1393: }
1394:
1395: return language;
1396: }
1397: */
1398:
1399: ////////////////////////////////////////////////////////////////////////////
1400:
1401: /// <summary>
1402: /// Remove the small Koranic jawaz and sili characters because they cause the displayed text to render unstable
1403: /// </summary>
1404: /// <param name="line"></param>
1405: /// <returns>cleaned line</returns>
1406: public static string RemoveSmallKoranicJawazAndSiliCharacters(string line)
1407: {
1408: //
1409: // remove ۚ and ۖ
1410:
1411: // remove the small character as it makes text unstable
1412: line = line.Replace(" ۚ", ""); // small ج
1413: line = line.Replace(" ۖ", ""); // small صلى
1414:
1415: return line;
1416: }
1417:
1418: ////////////////////////////////////////////////////////////////////////////
1419:
1420: /// <summary>
1421: ///
1422: /// </summary>
1423: /// <param name="line"></param>
1424: /// <returns>cleaned line</returns>
1425: public static string SlightlyChangeSomeSentencesToPreventSystemFromGeneratingSingleCharacters(string line)
1426: {
1427: // slightly change sentences to prevent system single characters
1428: //s = s.Replace("الله", "اللـه");
1429: //s = s.Replace("ريال", "ريـال");
1430: //s = s.Replace("محمد", "مـحمد");
1431: //s = s.Replace("جل جلاله", "جل جلالـه");
1432: line = line.Replace("صلى الله عليه وسلم", "صـلى الله عليه وسلم");
1433:
1434: return line;
1435: }
1436:
1437: ////////////////////////////////////////////////////////////////////////////
1438:
1439: /// <summary>
1440: ///
1441: /// </summary>
1442: public static string Pluralize(string word)
1443: {
1444: string s;
1445:
1446: s = Inflector.Inflector.Pluralize(word);
1447:
1448: return s;
1449: }
1450:
1451: ////////////////////////////////////////////////////////////////////////////
1452:
1453: /// <summary>
1454: ///
1455: /// </summary>
1456: public static string FirstCharacterToUpper(string input)
1457: {
1458: string s;
1459:
1460: if (!string.IsNullOrEmpty(input)) s = input.First().ToString().ToUpper() + String.Join("", input.Skip(1));
1461: else s = input;
1462:
1463: return s;
1464: }
1465:
1466: ////////////////////////////////////////////////////////////////////////////
1467:
1468: /// <summary>
1469: /// Checks if a word contains a German diacritic letter.
1470: /// <see href="http://en.wikipedia.org/wiki/German_alphabet"/>
1471: /// </summary>
1472: /// <remarks>
1473: /// German uses letter-diacritic combinations (Ä/ä, Ö/ö, Ü/ü) using the umlaut and one ligature (ß (called eszett (sz) or scharfes S, sharp s)), but they do not constitute distinct letters in the alphabet.
1474: /// </remarks>
1475:
1476: public static bool ContainsGermanDiacriticLetter(string line)
1477: {
1478: bool lineContainsGermanDiacriticLetter;
1479:
1480: if (line != null)
1481: {
1482: lineContainsGermanDiacriticLetter = Regex.IsMatch(line, "[ÄäÖöÜüß]");
1483: }
1484: else lineContainsGermanDiacriticLetter = false;
1485:
1486: return lineContainsGermanDiacriticLetter;
1487: }
1488:
1489: ////////////////////////////////////////////////////////////////////////////
1490:
1491: /// <summary>
1492: ///
1493: /// </summary>
1494: public static List<Language> List
1495: {
1496: get
1497: {
1498: if (languageList == null || languageList.Count == 0)
1499: {
1500: // <language iso_639_1="aa" iso_639_2_5="aar" iso_639_3="aar" name="" englishName="Afar" arabicName="" scope="" type=""/>
1501: languageList = (from q in XDocument.Elements("languageList").Elements("iso").Elements("language")
1502: where q.Attribute("iso_639_1").Value != "" && q.Attribute("iso_639_2_5").Value != "" && q.Attribute("iso_639_3").Value != ""
1503: select new Language
1504: {
1505: Symbol = q.Attribute("iso_639_1").Value,
1506: Iso6391 = q.Attribute("iso_639_1").Value,
1507: Iso63925 = q.Attribute("iso_639_2_5").Value,
1508: Iso6393 = q.Attribute("iso_639_3").Value,
1509: Name = q.Attribute("name").Value,
1510: EnglishName = q.Attribute("englishName").Value,
1511: ArabicName = q.Attribute("arabicName").Value
1512: }
1513: ).ToList<Language>();
1514: }
1515:
1516: return languageList;
1517: }
1518: }
1519:
1520: ////////////////////////////////////////////////////////////////////////////
1521:
1522: /// <summary>
1523: ///
1524: /// How to embed and access resources by using Visual C# http://support.microsoft.com/kb/319292/en-us
1525: ///
1526: /// 1. Change the "Build Action" property of your XML file from "Content" to "Embedded Resource".
1527: /// 2. Add "using System.Reflection".
1528: /// 3. Manifest resource stream will start with the project namespace, the location of XML file.
1529: ///
1530: /// </summary>
1531:
1532: private static XDocument XDocument
1533: {
1534: get
1535: {
1536: Assembly _assembly;
1537: StreamReader streamReader;
1538:
1539: xDocument = null;
1540: _assembly = Assembly.GetExecutingAssembly();
1541: streamReader = new StreamReader(_assembly.GetManifestResourceStream("Ia.Cl.model.language.xml"));
1542:
1543: try
1544: {
1545: if (streamReader.Peek() != -1)
1546: {
1547: xDocument = System.Xml.Linq.XDocument.Load(streamReader);
1548: }
1549: }
1550: catch (Exception)
1551: {
1552: }
1553: finally
1554: {
1555: }
1556:
1557: return xDocument;
1558: }
1559: }
1560:
1561: ////////////////////////////////////////////////////////////////////////////
1562: ////////////////////////////////////////////////////////////////////////////
1563: }
1564:
1565: ////////////////////////////////////////////////////////////////////////////
1566: ////////////////////////////////////////////////////////////////////////////
1567: }