Reputation: 71
I want to use AWK to convert a list of decimal numbers in a file to binary but there seems to be no built-in method. Sample file is as below:
134218506
134218250
134217984
1610612736
16384
33554432
Upvotes: 2
Views: 3463
Reputation: 57
This is a slight variation on James Brown's answer, using a bit shift instead of division (requires gawk v3.1+):
# Binary representation (string) of integer n
function dec2bin(n, b) {
# Divide-by-2 algorithm
b = ""
if (n == 0) {
b = 0
}
while (n > 0) {
b = (n % 2) "" b
n = rshift(n, 1) # <-- instead of int(n / 2)
}
b = "0b" b
return b
}
Upvotes: 0
Reputation: 204310
Here's an approach that'll run 2-3 times faster than the accepted answer in case execution time matters. It works by first converting the decimal to hex and then converting each hex character to it's binary equivalent so it loops fewer times courtesy of a hash lookup of the hex:
$ cat dec2bin.awk
BEGIN {
split("0 1 2 3 4 5 6 7 8 9 A B C D E F", hex, " ")
split("0000 0001 0010 0011 0100 0101 0110 0111 " \
"1000 1001 1010 1011 1100 1101 1110 1111", bin, " ")
for ( i in hex ) {
h2b[tolower(hex[i])] = bin[i]
h2b[hex[i]] = bin[i]
}
}
function hex2bin(hex, n,i,bin) {
n = length(hex)
for (i=1; i<=n; i++) {
bin = bin h2b[substr(hex,i,1)]
}
sub(/^0+/,"",bin)
return bin
}
function dec2bin(dec, hex, bin) {
hex = sprintf("%x\n", dec)
bin = hex2bin(hex)
return bin
}
{ print dec2bin($0) }
$ awk -f dec2bin.awk file
1000000000000000001100001010
1000000000000000001000001010
1000000000000000000100000000
1100000000000000000000000000000
100000000000000
10000000000000000000000000
e.g. with an input file that contains 1 million 6-digit numbers created by this script:
$ awk -v min=100000 -v max=999999 -v num=1000000 'BEGIN{srand(); for (i=1;i<=num;i++) print int(min+rand()*(max-min+1))}' > file
the timing for the above script is:
$ time awk -f tst.awk file > o1
real 0m2.378s
user 0m2.296s
sys 0m0.000s
while the time for the currently accepted answer is:
$ time awk -f tst2.awk file > o2
real 0m6.047s
user 0m5.875s
sys 0m0.015s
and they both produce the same output:
$ diff o1 o2
$
Upvotes: 2
Reputation: 2885
a recursive way of converting binary string to decimal :
echo 10000101010001000010100000101010001101000110001110101 |
# gawk profile, created Wed Jun 26 19:22:10 2024
BEGIN {
1 CONVFMT = OFMT = "%.250g"
}
1 $++NF = ____($1)
31 function ____(__, ___, _) {
31 return \
(___ = length(__ = substr(__, index((__) (++_), _++)))) <= _ + _ \
? (_ < ___ ? substr(__, ___) + _ * (_ * (_ * (___ == _ + _ &&
__ = substr(__, _)) + (--_<__)) + substr(__, _ + _,_)) \
: ___ < _ ? +__ : _ + substr(__, _)) \
: ! index(__, !_) \
? _--^___ - _ \
: ____(substr(___ % _ && ___++ ? __ = (!_)__ : __, (___/= _--) + _),
(__ = substr(__, _, ___))^!_) + ____(__, __ = _++) * _^___
}
10000101010001000010100000101010001101000110001110101 4688888899996789
This boolean test
! index(__, !_)
? _--^___ - _
is for checking whether there are any zeros in the binary string. If there are none, then directly return
2 ^ length(str) - 1
instead of wasting time converting them one by one. So for an input like this
11111111111111111111111111111111111111111111111111111
The function would directly return
9007199254740991
without recursing any levels.
Upvotes: 0
Reputation: 26531
Athough you asked for awk
, you or others coming here with a need to print binary from decimal numbers may find that bc
is more to your liking. The code is more succinct that awk's, and bc is been delivered by default with UNIX/LINUX since practically The Epoch. Here it is:
$ bc <<EOF
ibase=10
obase=2
$(cat file)
EOF
or
bc <<< $(awk 'BEGIN{ print "ibase=10; obase=2"}1' file)
or even:
echo "ibase=10; obase=2; $(cat file)" | bc
Upvotes: 0
Reputation: 37464
Here is an awk way, functionized for your pleasure:
awk '
function d2b(d, b) {
while(d) {
b=d%2b
d=int(d/2)
}
return(b?b:0)
}
{
print d2b($0)
}' file
Output of the first three records:
1000000000000000001100001010
1000000000000000001000001010
1000000000000000000100000000
Output for 0, -1 and -2:
0
-1
-10
Upvotes: 12
Reputation: 2885
on top of what others have already mentioned, this function has a rapid shortcut for non-negative integer powers of 2
—- (since they always have a binary pattern of /^[1][0]*$/
)
version 1 : processing in 3-bit chunks instead of bit-by-bit :
{m,g}awk '
BEGIN {
1 CONVFMT="%.250g"
1 _^=OFMT="%.25g"
}
($++NF=________v1($_))^!_
function ________v1(__,___,_,____,_____)
{
6 if (+__==(_+=_^=____="")^(___=log(__)/log(_))) { # 2
2 return \
___<=_^_^_ \
? (_+_*_*_)^___ \
: sprintf("%.f%0*.f",--_,___,--_)
}
4 ___=(!_!_!_!!_) (_^((_____=_*_*_)+_)-_^_^_+(++_))
4 gsub("..", "&0&1", ___)
41 while(__) {
41 ____ = substr(___,
__%_____*_+(__=int(__/_____))^!_,_)____
}
4 return substr(__=____, index(__, _^(! _)))
}'
version 2 : first use
sprintf()
to convert to octals, before mapping to binary
function ________v2(__,___,_,____,_____)
{
6 if (+__==(_+=_^=____="")^(___=log(__)/log(_))) { # 2
2 return \
___<=_^_^_ \
? (_+_*_*_)^___ \
: sprintf("%.f%0*.f",--_,___,--_)
}
4 ___=(!_!_!_!!_) (_^((_____=_*_*_)+_)-_^_^_+(++_))
4 gsub("..", "&0&1", ___)
4 _____=___
4 __=sprintf("%o%.*o", int(__/(___=++_^(_*--_+_))),
_*_+!!_, __%___)
4 sub("^[0]+", "", __)
41 for (___=length(__); ___; ___--) {
41 ____ = substr(_____, substr(__,
___,!!_)*_ + !!_,_)____
}
4 return substr(____, index(____,!!_))
}
|
134218506 1000000000000000001100001010
134218250 1000000000000000001000001010
134217984 1000000000000000000100000000
1610612736 1100000000000000000000000000000
16384 100000000000000
33554432 10000000000000000000000000
version 3 : reasonably zippy (
29.5 MB/s
throughput onmawk2
) version by using a caching array and processing 8-bits each round
- ouputs are zero-padded to minimum 8 binary digits wide
.
{m,g,n}awk '
1 function ________(_______,_, __,____,______)
{
1 split(_=__=____=______="", _______, _)
2 for (_^=_<_; -_<=+_; _--) {
4 for (__^=_<_; -__<=+__; __--) {
8 for (____^=_<_; -____<=+____; ____--) {
16 for (______^=_<_; -______<=+______; ______--) {
16 _______[_+_+_+_+_+_+_+_+__+__+\
__+__+____+____+______]=\
(_)__ (____)______
}
}
}
}
1 return _^(_<_)
}
BEGIN {
1 CONVFMT = "%." ((_+=(_^=_<_)+(_+=_))*_)(!_)"g"
1 OFMT = "%." (_*_) "g"
1 _ = ________(_____)
}
($++NF=___($_))^!_
function ___(__,____,_,______)
{
6 if ((__=int(__))<(______=\
(_*=_+=_+=_^=____="")*_)) {
return _____[int(__/_)]_____[__%_]
}
16 do { ____=_____[int(__/_)%_]_____[__%_]____
} while (______<=(__=int(__/______)))
6 return int(_____[int(__/_)%_]\
_____[ (__) %_])____
}
Upvotes: -1
Reputation: 353
# gawk binary number functions
# RPC 09OCT2022
# convert an 8 bit binary number to an integer
function bin_to_n(i)
{
n = 0;
#printf(">> %s:", i);
for (k = 1; k < 9; k++) {
n = n * 2;
b = substr(i, k, 1);
if (b == "1") {
n = n + 1;
}
}
return (n);
}
# convert a number to a binary number
function dectobin(n)
{
printf("dectobin: n in %d ",n);
binstring = "0b"; # some c compilers allow 0bXXXXXXXX format numbers
bn = 128;
for(k=0;k<8;k++) {
if (n >= bn) {
binstring = binstring "1";
n = n - bn;
} else {
binstring = binstring "0"
}
printf(" bn %d",bn);
bn = bn / 2;
}
return binstring;
}
BEGIN {
FS = " ";
# gawk (I think) has no atoi() funciton or equiv. So a table of all
# chars (well 256 ascii) can be used with the index function to get
# round this
for (i = 0; i < 255; i++) {
table = sprintf("%s%c", table, i);
}
}
{
# assume on stdin a buffer of 8 bit binary numbers "01000001 01000010" is AB etc
for (i = 1; i <= NF; i++)
printf("bin-num#%d: %x --> %c\n", i, bin_to_n($i), bin_to_n($i));
s = "ABC123string to test";
for (i = 0; i < length(s); i++) {
nn = index(table, substr(s,i+1,1))-1;
printf("substr :%s:%x:",ss,nn);
printf(" :%d: %s\n", i, dectobin(nn));
}
}
Upvotes: 0
Reputation: 2491
You can try with dc :
# -f infile : Use infile for data
# after -e , it is there are the dc command
dc -f infile -e '
z # number of values
sa # keep in register a
2
o # set the output radix to 2 : binary
[
Sb # keep all the value of infile in the register b
# ( b is use here as a stack)
z
0 <M # until there is no more value
] sM # define macro M in [ and ]
lMx # execute macro M to populate stack b
[
Lb # get all values one at a time from stack b
p # print this value in binary
la # get the number of value
1
- # decremente it
d # duplicate
sa # keep one in register a
0<N # the other is use here
]sN # define macro N
lNx' # execute macro N to print each values in binary
Upvotes: 0
Reputation: 8751
You can try Perl one-liner
$ cat hamdani.txt
134218506
134218250
134217984
134217984
1610612736
16384
33554432
$ perl -nle ' printf("%b\n",$_) ' hamdani.txt
1000000000000000001100001010
1000000000000000001000001010
1000000000000000000100000000
1000000000000000000100000000
1100000000000000000000000000000
100000000000000
10000000000000000000000000
$
Upvotes: 0