Sapir
Sapir

Reputation: 263

Encoding sys.argv in python 2.7

I'm writing a script that takes input from a software (Maltego), and using sys.argv[1] as a variable. When the information from Maltego is in Hebrew, I get question marks rather than the actual text. I tried encoding the text in different ways but everything failed.

I'm using Python 2.7.

Any idea for a solution will be greatly appreciated.

the script is being run from within Maltego:

# -*- coding: utf-8 -*-

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from MaltegoTransform import *
import codecs
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


me = MaltegoTransform()
search_value = sys.argv[1].encode("utf-8")

Upvotes: 0

Views: 601

Answers (1)

funkr
funkr

Reputation: 101

Here my fifty pence:

import sys


def win32_utf8_argv():
    """Uses shell32.GetCommandLineArgvW to get sys.argv as a list of UTF-8
    strings.

    Versions 2.5 and older of Python don't support Unicode in sys.argv on
    Windows, with the underlying Windows API instead replacing multi-byte
    characters with '?'.

    Returns None on failure.


    """

    try:
        from ctypes import POINTER, byref, cdll, c_int, windll
        from ctypes.wintypes import LPCWSTR, LPWSTR

        GetCommandLineW = cdll.kernel32.GetCommandLineW
        GetCommandLineW.argtypes = []
        GetCommandLineW.restype = LPCWSTR

        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
        CommandLineToArgvW.restype = POINTER(LPWSTR)

        cmd = GetCommandLineW()
        argc = c_int(0)
        argv = CommandLineToArgvW(cmd, byref(argc))
        if argc.value > 0:
            # Remove Python executable if present
            if argc.value - len(sys.argv) == 1:
                start = 1
            else:
                start = 0
            return [argv[i].encode('utf-8') for i in
                    xrange(start, argc.value)]
    except Exception:
        pass

if __name__ == '__main__':
    a = win32_utf8_argv()
    print (a[1])

Upvotes: 1

Related Questions