Reputation: 95
I am trying to remove the Arabic text diacritic. For example I need to convert this َب
to this ب
, here is my code :
if (text != "") {
for char in text! {
print(char)
print(char.unicodeScalars.first?.value)
if allowed.contains("\(char)"){
newText.append(char)
}
}
self.textView.text = text!
} else {
// TODO :
// show an alert
print("uhhh no way")
}
I have tried these solutions but with no luck :
How to remove diacritics from a String in Swift?
NSString : easy way to remove UTF-8 accents from a string?
Upvotes: 4
Views: 2063
Reputation: 776
Based on @Hashem-Aboonajmi answer
extension String {
/// strip combining marks (accents or diacritics)
func stripDiacritics(active: Bool) -> String {
if !active {return self}
let mStringRef = NSMutableString(string: self) as CFMutableString
CFStringTransform(mStringRef, nil, kCFStringTransformStripCombiningMarks, false)
return mStringRef as String
}
}
in case you want to activate or deactivate
Upvotes: 1
Reputation: 19
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#this code for arabic preporocessing
import pyarabic.araby as araby
import pyarabic.number as number
text = u'الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ'
#Strip Harakat from arabic word except Shadda.
from pyarabic.araby import strip_harakat
print(strip_harakat(text))
# الحمد للّه ربّ العالمين
#حذف الحركات بما فيها الشدة
#Strip vowels from a text, include Shadda.
from pyarabic.araby import strip_tashkeel
print(strip_tashkeel(text))
#الحمد لله رب العالمين
Upvotes: 1
Reputation: 13900
Use this extension:
extension String {
/// strip combining marks (accents or diacritics)
var stripDiacritics: String {
let mStringRef = NSMutableString(string: self) as CFMutableString
CFStringTransform(mStringRef, nil, kCFStringTransformStripCombiningMarks, false)
return mStringRef as String
}
}
Upvotes: 5
Reputation: 2469
You can use Regex, try this code
let myString = "الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ"
let regex = try! NSRegularExpression(pattern: "[\\u064b-\\u064f\\u0650-\\u0652]", options: NSRegularExpression.Options.caseInsensitive)
let range = NSMakeRange(0, myString.unicodeScalars.count)
let modString = regex.stringByReplacingMatches(in: myString, options: [], range: range, withTemplate: "")
print(modString)
Output : الحمد لله رب العالمين
Upvotes: 6
Reputation: 6067
you can use CFStringTransform
with kCFStringTransformStripCombiningMarks
to remove (accents or diacritics)
let original = "ََب"
let mutableString = NSMutableString(string: original) as CFMutableString
CFStringTransform(mutableString, nil, kCFStringTransformStripCombiningMarks, Bool(truncating: 0))
let normalized = (mutableString as NSMutableString).copy() as! NSString
print(normalized)
CFStringTransform
A constant containing the transformation of a string by removing combining marks.
kCFStringTransformStripCombiningMarks
The identifier of a transform to strip combining marks (accents or diacritics).
Upvotes: 1