How to extract text and emojis from a string?

Question

I am trying to separate the text string. I am having some issues trying to achieve this. What I tried is below.

import emoji

text = "#samplesenti @emojitweets i ❤❤❤ sentiment " analysis " http://senti.com/pic_01.jpg "

def extract_text_and_emoji(text=text):
    global allchars, emoji_list
    # remove all tagging and links, not need for sentiments
    remove_keys = ('@', 'http://', '&', '#')
    clean_text = ' '.join(txt for txt in text.split() if not txt.startswith(remove_keys))
    #     print(clean_text)

    # setup the input, get the characters and the emoji lists
    allchars = [str for str in text]
    emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI]

    # extract text
    clean_text = ' '.join([str for str in clean_text.split() if not any(i in str for i in emoji_list)])

    # extract emoji
    clean_emoji = ''.join([str for str in text.split() if any(i in str for i in emoji_list)])
    return (clean_text, clean_emoji)


allchars, emoji_list = 0, 0
(clean_text, clean_emoji) = extract_text_and_emoji()
print('
All Char:', allchars)
print('
All Emoji:', emoji_list)
print('
', clean_text)
print('
', clean_emoji)

I am expecting to get this to my console:

All Char: ['#', 's', 'a', 'm', 'p', 'l', 'e', 's', 'e', 'n', 't', 'i', ' ', '@', 'e', 'm', 'o', 'j', 'i', 't', 'w', 'e', 'e', 't', 's', ' ', 'i', ' ', '❤', '❤', '❤', ' ', 's', 'e', 'n', 't', 'i', 'm', 'e', 'n', 't', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'a', 'n', 'a', 'l', 'y', 's', 'i', 's', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'h', 't', 't', 'p', ':', '/', '/', 's', 'e', 'n', 't', 'i', '.', 'c', 'o', 'm', '/', 'p', 'i', 'c', '_', '0', '1', '.', 'j', 'p', 'g', ' ']

All Emoji: ['❤', '❤', '❤']

 i sentiment analysis

 ❤❤❤

but I get this instead:

All Char: ['#', 's', 'a', 'm', 'p', 'l', 'e', 's', 'e', 'n', 't', 'i', ' ', '@', 'e', 'm', 'o', 'j', 'i', 't', 'w', 'e', 'e', 't', 's', ' ', 'i', ' ', '❤', '❤', '❤', ' ', 's', 'e', 'n', 't', 'i', 'm', 'e', 'n', 't', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'a', 'n', 'a', 'l', 'y', 's', 'i', 's', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'h', 't', 't', 'p', ':', '/', '/', 's', 'e', 'n', 't', 'i', '.', 'c', 'o', 'm', '/', 'p', 'i', 'c', '_', '0', '1', '.', 'j', 'p', 'g', ' ']

All Emoji: []

 i ❤❤❤ sentiment analysis

Andrej Kesely · Accepted Answer

Add ['en'] to emoji.UNICODE_EMOJI:

import emoji

text = "#samplesenti @emojitweets i ❤❤❤ sentiment " analysis " http://senti.com/pic_01.jpg "


def extract_text_and_emoji(text=text):
    global allchars, emoji_list
    # remove all tagging and links, not need for sentiments
    remove_keys = ("@", "http://", "&", "#")
    clean_text = " ".join(
        txt for txt in text.split() if not txt.startswith(remove_keys)
    )
    #     print(clean_text)

    # setup the input, get the characters and the emoji lists
    allchars = [str for str in text]
    emoji_list = [c for c in allchars if c in emoji.UNICODE_EMOJI["en"]] # <-- HERE!

    # extract text
    clean_text = " ".join(
        [
            str
            for str in clean_text.split()
            if not any(i in str for i in emoji_list)
        ]
    )

    # extract emoji
    clean_emoji = "".join(
        [str for str in text.split() if any(i in str for i in emoji_list)]
    )
    return (clean_text, clean_emoji)


allchars, emoji_list = 0, 0
(clean_text, clean_emoji) = extract_text_and_emoji()
print("
All Char:", allchars)
print("
All Emoji:", emoji_list)
print("
", clean_text)
print("
", clean_emoji)

Prints:


All Char: ['#', 's', 'a', 'm', 'p', 'l', 'e', 's', 'e', 'n', 't', 'i', ' ', '@', 'e', 'm', 'o', 'j', 'i', 't', 'w', 'e', 'e', 't', 's', ' ', 'i', ' ', '❤', '❤', '❤', ' ', 's', 'e', 'n', 't', 'i', 'm', 'e', 'n', 't', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'a', 'n', 'a', 'l', 'y', 's', 'i', 's', ' ', '&', 'q', 'u', 'o', 't', ';', ' ', 'h', 't', 't', 'p', ':', '/', '/', 's', 'e', 'n', 't', 'i', '.', 'c', 'o', 'm', '/', 'p', 'i', 'c', '_', '0', '1', '.', 'j', 'p', 'g', ' ']

All Emoji: ['❤', '❤', '❤']

 i sentiment analysis

 ❤❤❤

How to extract text and emojis from a string?

Answers (2)

Related Questions