mirror of
https://github.com/cortesi/scurve.git
synced 2025-06-18 16:55:33 -04:00
Make binvis play better with restricted symbol sets.
This means that we can now measure entropy in, for instance, JavaScript files, where we don't encounter the full 256 symbols in the byte spectrum.
This commit is contained in:
parent
8cf7eb9664
commit
52672e723c
13
binvis
13
binvis
@ -8,6 +8,9 @@ import Image, ImageDraw
|
||||
class _Color:
|
||||
def __init__(self, data, block):
|
||||
self.data, self.block = data, block
|
||||
s = list(set(data))
|
||||
s.sort()
|
||||
self.symbol_map = {v : i for (i, v) in enumerate(s)}
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
@ -23,10 +26,10 @@ class ColorHilbert(_Color):
|
||||
def __init__(self, data, block):
|
||||
_Color.__init__(self, data, block)
|
||||
self.csource = scurve.fromSize("hilbert", 3, 256**3)
|
||||
self.step = len(self.csource)/float(256)
|
||||
self.step = len(self.csource)/float(len(self.symbol_map))
|
||||
|
||||
def getPoint(self, x):
|
||||
c = ord(self.data[x])
|
||||
c = self.symbol_map[self.data[x]]
|
||||
return self.csource.point(int(c*self.step))
|
||||
|
||||
|
||||
@ -44,7 +47,8 @@ class ColorClass(_Color):
|
||||
|
||||
class ColorEntropy(_Color):
|
||||
def getPoint(self, x):
|
||||
e = utils.entropy(self.data, 256, x)
|
||||
e = utils.entropy(self.data, 256, x, len(self.symbol_map))
|
||||
# http://www.wolframalpha.com/input/?i=plot+%284%28x-0.5%29-4%28x-0.5%29**2%29**4+from+0.5+to+1
|
||||
def curve(v):
|
||||
f = (4*v - 4*v**2)**4
|
||||
f = max(f, 0)
|
||||
@ -52,7 +56,6 @@ class ColorEntropy(_Color):
|
||||
r = curve(e-0.5) if e > 0.5 else 0
|
||||
b = e**2
|
||||
return [
|
||||
# http://www.wolframalpha.com/input/?i=plot+%284%28x-0.5%29-4%28x-0.5%29**2%29**4+from+0.5+to+1
|
||||
int(255*r),
|
||||
0,
|
||||
int(255*b)
|
||||
@ -159,7 +162,7 @@ def main():
|
||||
base, _ = base.rsplit(".", 1)
|
||||
dst = base + options.suffix + ".png"
|
||||
|
||||
if os.path.exists(dst):
|
||||
if os.path.exists(dst) and len(args) < 2:
|
||||
print >> sys.stderr, "Refusing to over-write '%s'. Specify explicitly if you really want to do this."%dst
|
||||
sys.exit(1)
|
||||
|
||||
|
@ -100,7 +100,7 @@ def bitrange(x, width, start, end):
|
||||
return x >> (width-end) & ((2**(end-start))-1)
|
||||
|
||||
|
||||
def entropy(data, blocksize, offset):
|
||||
def entropy(data, blocksize, offset, symbols=256):
|
||||
"""
|
||||
Returns local byte entropy for a location in a file.
|
||||
"""
|
||||
@ -115,7 +115,7 @@ def entropy(data, blocksize, offset):
|
||||
hist = {}
|
||||
for i in data[start:start+blocksize]:
|
||||
hist[i] = hist.get(i, 0) + 1
|
||||
base = min(blocksize, 256)
|
||||
base = min(blocksize, symbols)
|
||||
entropy = 0
|
||||
for i in hist.values():
|
||||
p = i/float(blocksize)
|
||||
@ -124,5 +124,3 @@ def entropy(data, blocksize, offset):
|
||||
# between 0 and 1.
|
||||
entropy += (p * math.log(p, base))
|
||||
return -entropy
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user