From d09b31b4385d8de3352cc30e6eb32fd6d353c1b2 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Tue, 2 Feb 2021 21:57:21 -0500 Subject: [PATCH 1/5] Update example_image.py --- example_image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example_image.py b/example_image.py index de5976d..80cc9f9 100644 --- a/example_image.py +++ b/example_image.py @@ -1,4 +1,4 @@ -import scipy.io +import scipy.misc import pyae import numpy import matplotlib.pyplot From bb65962a872bc7c49a4c95292dc1f99f5abe0b79 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sat, 20 Feb 2021 14:10:47 -0500 Subject: [PATCH 2/5] Support of binary encoding --- README.md | 64 ++++++++++--- example.py | 6 +- example_binary.py | 33 +++++++ example_image.py | 18 ++-- example_image_binary.py | 56 ++++++++++++ pyae.py | 196 ++++++++++++++++++++++++++++++++++++++-- 6 files changed, 342 insertions(+), 31 deletions(-) create mode 100644 example_binary.py create mode 100644 example_image_binary.py diff --git a/README.md b/README.md index 8b0e27b..ca96cfc 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ This project implements the lossless data compression technique called **arithmetic encoding (AE)**. The project is simple and has just some basic features. +The project supports encoding the input as both a floating-point value and a binary code. + The project has a main module called `pyae.py` which contains a class called `ArithmeticEncoding` to encode and decode messages. # Usage Steps @@ -12,7 +14,8 @@ To use the project, follow these steps: 2. Instantiate the `ArithmeticEncoding` Class 3. Prepare a Message 4. Encode the Message -5. Decode the Message +5. Get the binary code of the encoded message. +6. Decode the Message ## Import `pyae` @@ -53,8 +56,17 @@ original_msg = "abc" Encode the message using the `encode()` method. It accepts the message to be encoded and the probability table. It returns the encoded message (single double value) and the encoder stages. ```python -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) +``` + +## Get the Binary Code of the Encoded Message + +Convert the floating-point value returned from the `AE.encode()` function into a binary code using the `AE.encode_binary()` function. + +```python +binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value, + float_interval_max=interval_max_value) ``` ## Decode the Message @@ -95,6 +107,7 @@ The [`example.py`](/example.py) script has an example that compresses the messag import pyae # Example for encoding a simple text message using the PyAE module. +# This example returns the floating-point value in addition to its binary code that encodes the message. frequency_table = {"a": 2, "b": 7, @@ -106,16 +119,22 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, original_msg = "abc" print("Original Message: {msg}".format(msg=original_msg)) -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +# Encode the message +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) print("Encoded Message: {msg}".format(msg=encoded_msg)) +# Get the binary code out of the floating-point value +binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value, + float_interval_max=interval_max_value) +print("The binary code is: {binary_code}".format(binary_code=binary_code)) + +# Decode the message decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, msg_length=len(original_msg), probability_table=AE.probability_table) -print("Decoded Message: {msg}".format(msg=decoded_msg)) - decoded_msg = "".join(decoded_msg) +print("Decoded Message: {msg}".format(msg=decoded_msg)) print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg)) ``` @@ -124,6 +143,7 @@ The printed messages out of the code are: ``` Original Message: abc Encoded Message: 0.1729999999999999989175325511 +The binary code is: 0.0010110 Decoded Message: abc Message Decoded Successfully? True ``` @@ -161,6 +181,22 @@ print(encoder) Decimal('0.5599999999999999349409307570')]}] ``` +Here is the binary encoder: + +```python +print(encoder_binary) +``` + +```python +[{0: ['0.0', '0.1'], 1: ['0.1', '1.0']}, + {0: ['0.00', '0.01'], 1: ['0.01', '0.1']}, + {0: ['0.000', '0.001'], 1: ['0.001', '0.01']}, + {0: ['0.0010', '0.0011'], 1: ['0.0011', '0.01']}, + {0: ['0.00100', '0.00101'], 1: ['0.00101', '0.0011']}, + {0: ['0.001010', '0.001011'], 1: ['0.001011', '0.0011']}, + {0: ['0.0010110', '0.0010111'], 1: ['0.0010111', '0.0011']}] +``` + ## Low Precision Assume the message to be encoded is `"abc"*20` (i.e. `abc` repeated 20 times) while using the default precision 28. The length of the message is 60. @@ -184,16 +220,15 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, original_msg = "abc"*20 print("Original Message: {msg}".format(msg=original_msg)) -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) print("Encoded Message: {msg}".format(msg=encoded_msg)) decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, msg_length=len(original_msg), probability_table=AE.probability_table) -print("Decoded Message: {msg}".format(msg=decoded_msg)) - decoded_msg = "".join(decoded_msg) +print("Decoded Message: {msg}".format(msg=decoded_msg)) print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg)) ``` @@ -232,16 +267,15 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, original_msg = "abc"*20 print("Original Message: {msg}".format(msg=original_msg)) -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) print("Encoded Message: {msg}".format(msg=encoded_msg)) decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, msg_length=len(original_msg), probability_table=AE.probability_table) -print("Decoded Message: {msg}".format(msg=decoded_msg)) - decoded_msg = "".join(decoded_msg) +print("Decoded Message: {msg}".format(msg=decoded_msg)) print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg)) ``` diff --git a/example.py b/example.py index ffbc1d7..49799a3 100644 --- a/example.py +++ b/example.py @@ -1,6 +1,8 @@ import pyae # Example for encoding a simple text message using the PyAE module. +# This example only returns the floating-point value that encodes the message. +# Check the example_binary.py to return the binary code of the floating-point value. frequency_table = {"a": 2, "b": 7, @@ -12,8 +14,8 @@ original_msg = "abc" print("Original Message: {msg}".format(msg=original_msg)) -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) print("Encoded Message: {msg}".format(msg=encoded_msg)) decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, diff --git a/example_binary.py b/example_binary.py new file mode 100644 index 0000000..7626a49 --- /dev/null +++ b/example_binary.py @@ -0,0 +1,33 @@ +import pyae + +# Example for encoding a simple text message using the PyAE module. +# This example returns the floating-point value in addition to its binary code that encodes the message. + +frequency_table = {"a": 2, + "b": 7, + "c": 1} + +AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, + save_stages=True) + +original_msg = "abc" +print("Original Message: {msg}".format(msg=original_msg)) + +# Encode the message +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) +print("Encoded Message: {msg}".format(msg=encoded_msg)) + +# Get the binary code out of the floating-point value +binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value, + float_interval_max=interval_max_value) +print("The binary code is: {binary_code}".format(binary_code=binary_code)) + +# Decode the message +decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, + msg_length=len(original_msg), + probability_table=AE.probability_table) +decoded_msg = "".join(decoded_msg) +print("Decoded Message: {msg}".format(msg=decoded_msg)) + +print("Message Decoded Successfully? {result}".format(result=original_msg == decoded_msg)) diff --git a/example_image.py b/example_image.py index 80cc9f9..55e8f14 100644 --- a/example_image.py +++ b/example_image.py @@ -3,15 +3,19 @@ import numpy import matplotlib.pyplot +# Example for encoding an image using the PyAE module. +# This example only returns the floating-point value that encodes the image. +# Check the example_image_binary.py to return the binary code of the floating-point value. + # Change the precision to a bigger value from decimal import getcontext -getcontext().prec = 10000 +getcontext().prec = 444 # Read an image. im = scipy.misc.face(gray=True) # Just work on a small part to save time. The larger the image, the more time consumed. -im = im[:50, :50] +im = im[:15, :15] # Convert the image into a 1D vector. msg = im.flatten() @@ -25,13 +29,13 @@ AE = pyae.ArithmeticEncoding(frequency_table=frequency_table) # Encode the message -encoded_msg, _ = AE.encode(msg=msg, - probability_table=AE.probability_table) +encoded_msg, encoder, interval_min_value, interval_max_value = AE.encode(msg=msg, + probability_table=AE.probability_table) # Decode the message -decoded_msg, _ = AE.decode(encoded_msg=encoded_msg, - msg_length=len(msg), - probability_table=AE.probability_table) +decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, + msg_length=len(msg), + probability_table=AE.probability_table) # Reshape the image to its original shape. decoded_msg = numpy.reshape(decoded_msg, im.shape) diff --git a/example_image_binary.py b/example_image_binary.py new file mode 100644 index 0000000..829e83f --- /dev/null +++ b/example_image_binary.py @@ -0,0 +1,56 @@ +import scipy.misc +import pyae +import numpy +import matplotlib.pyplot + +# Example for encoding an image using the PyAE module. +# This example returns the floating-point value in addition to its binary code that encodes the image. + +# Change the precision to a bigger value +from decimal import getcontext +getcontext().prec = 444 + +# Read an image. +im = scipy.misc.face(gray=True) + +# Just work on a small part to save time. The larger the image, the more time consumed. +im = im[:15, :15] + +# Convert the image into a 1D vector. +msg = im.flatten() + +# Create the frequency table based on its hitogram. +hist, bin_edges = numpy.histogram(a=im, + bins=range(0, 257)) +frequency_table = {key: value for key, value in zip(bin_edges[0:256], hist)} + +# Create an instance of the ArithmeticEncoding class. +AE = pyae.ArithmeticEncoding(frequency_table=frequency_table, save_stages=True) + +# Encode the message +encoded_msg, encoder, interval_min_value, interval_max_value = AE.encode(msg=msg, + probability_table=AE.probability_table) + +# Get the binary code that encodes the image +binary_code, encoder_binary = AE.encode_binary(float_interval_min=interval_min_value, + float_interval_max=interval_max_value) +print("The binary code is: {binary_code}".format(binary_code=binary_code)) + +# Decode the message +decoded_msg, decoder = AE.decode(encoded_msg=encoded_msg, + msg_length=len(msg), + probability_table=AE.probability_table) + +# Reshape the image to its original shape. +decoded_msg = numpy.reshape(decoded_msg, im.shape) + +# Show the original and decoded images. +fig, ax = matplotlib.pyplot.subplots(1, 2) +ax[0].imshow(im, cmap="gray") +ax[0].set_title("Original Image") +ax[0].set_xticks([]) +ax[0].set_yticks([]) +ax[1].imshow(decoded_msg, cmap="gray") +ax[1].set_title("Reconstructed Image") +ax[1].set_xticks([]) +ax[1].set_yticks([]) diff --git a/pyae.py b/pyae.py index d333497..aa0eb60 100644 --- a/pyae.py +++ b/pyae.py @@ -1,4 +1,4 @@ -from decimal import Decimal # Used to offer any user-defined precision. +from decimal import Decimal class ArithmeticEncoding: """ @@ -20,6 +20,10 @@ def __init__(self, frequency_table, save_stages=False): def get_probability_table(self, frequency_table): """ Calculates the probability table out of the frequency table. + + frequency_table: A table of the term frequencies. + + Returns the probability table. """ total_frequency = sum(list(frequency_table.values())) @@ -32,6 +36,10 @@ def get_probability_table(self, frequency_table): def get_encoded_value(self, last_stage_probs): """ After encoding the entire message, this method returns the single value that represents the entire message. + + last_stage_probs: A list of the probabilities in the last stage. + + Returns the minimum and maximum probabilites in the last stage in addition to the value encoding the message. """ last_stage_probs = list(last_stage_probs.values()) last_stage_values = [] @@ -41,13 +49,21 @@ def get_encoded_value(self, last_stage_probs): last_stage_min = min(last_stage_values) last_stage_max = max(last_stage_values) + encoded_value = (last_stage_min + last_stage_max)/2 - return (last_stage_min + last_stage_max)/2 + return last_stage_min, last_stage_max, encoded_value def process_stage(self, probability_table, stage_min, stage_max): """ Processing a stage in the encoding/decoding process. + + probability_table: The probability table. + stage_min: The minumim probability of the current stage. + stage_max: The maximum probability of the current stage. + + Returns the probabilities in the stage. """ + stage_probs = {} stage_domain = stage_max - stage_min for term_idx in range(len(probability_table.items())): @@ -60,10 +76,14 @@ def process_stage(self, probability_table, stage_min, stage_max): def encode(self, msg, probability_table): """ - Encodes a message. + Encodes a message using arithmetic encoding. + + msg: The message to be encoded. + probability_table: The probability table. + + Returns the encoder, the floating-point value representing the encoded message, and the maximum and minimum values of the interval in which the floating-point value falls. """ - # Make sure msg = list(msg) encoder = [] @@ -86,13 +106,98 @@ def encode(self, msg, probability_table): if self.save_stages: encoder.append(last_stage_probs) - encoded_msg = self.get_encoded_value(last_stage_probs) + interval_min_value, interval_max_value, encoded_msg = self.get_encoded_value(last_stage_probs) + + return encoded_msg, encoder, interval_min_value, interval_max_value + + def process_stage_binary(self, float_interval_min, float_interval_max, stage_min_bin, stage_max_bin): + """ + Processing a stage in the encoding/decoding process. + + float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located. + float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located. + stage_min_bin: The minimum binary number in the current stage. + stage_max_bin: The maximum binary number in the current stage. + + Returns the probabilities of the terms in this stage. There are only 2 terms. + """ + + stage_mid_bin = stage_min_bin + "1" + stage_min_bin = stage_min_bin + "0" + + stage_probs = {} + stage_probs[0] = [stage_min_bin, stage_mid_bin] + stage_probs[1] = [stage_mid_bin, stage_max_bin] + + return stage_probs + + def encode_binary(self, float_interval_min, float_interval_max): + """ + Calculates the binary code that represents the floating-point value that encodes the message. + + float_interval_min: The minimum floating-point value in the interval in which the floating-point value that encodes the message is located. + float_interval_max: The maximum floating-point value in the interval in which the floating-point value that encodes the message is located. + + Returns the binary code representing the encoded message. + """ + + binary_encoder = [] + binary_code = None + + stage_min_bin = "0.0" + stage_max_bin = "1.0" - return encoded_msg, encoder + stage_probs = {} + stage_probs[0] = [stage_min_bin, "0.1"] + stage_probs[1] = ["0.1", stage_max_bin] + + while True: + if float_interval_max < bin2float(stage_probs[0][1]): + stage_min_bin = stage_probs[0][0] + stage_max_bin = stage_probs[0][1] + else: + stage_min_bin = stage_probs[1][0] + stage_max_bin = stage_probs[1][1] + + if self.save_stages: + binary_encoder.append(stage_probs) + + stage_probs = self.process_stage_binary(float_interval_min, + float_interval_max, + stage_min_bin, + stage_max_bin) + + # print(stage_probs[0][0], bin2float(stage_probs[0][0])) + # print(stage_probs[0][1], bin2float(stage_probs[0][1])) + if (bin2float(stage_probs[0][0]) >= float_interval_min) and (bin2float(stage_probs[0][1]) < float_interval_max): + # The binary code is found. + # print(stage_probs[0][0], bin2float(stage_probs[0][0])) + # print(stage_probs[0][1], bin2float(stage_probs[0][1])) + # print("The binary code is : ", stage_probs[0][0]) + binary_code = stage_probs[0][0] + break + elif (bin2float(stage_probs[1][0]) >= float_interval_min) and (bin2float(stage_probs[1][1]) < float_interval_max): + # The binary code is found. + # print(stage_probs[1][0], bin2float(stage_probs[1][0])) + # print(stage_probs[1][1], bin2float(stage_probs[1][1])) + # print("The binary code is : ", stage_probs[1][0]) + binary_code = stage_probs[1][0] + break + + if self.save_stages: + binary_encoder.append(stage_probs) + + return binary_code, binary_encoder def decode(self, encoded_msg, msg_length, probability_table): """ - Decodes a message. + Decodes a message from a floating-point number. + + encoded_msg: The floating-point value that encodes the message. + msg_length: Length of the message. + probability_table: The probability table. + + Returns the decoded message. """ decoder = [] @@ -122,3 +227,80 @@ def decode(self, encoded_msg, msg_length, probability_table): decoder.append(last_stage_probs) return decoded_msg, decoder + +def float2bin(float_num, num_bits=None): + """ + Converts a floating-point number into binary. + + float_num: The floating-point number. + num_bits: The number of bits expected in the result. If None, then the number of bits depends on the number. + + Returns the binary representation of the number. + """ + + float_num = str(float_num) + if float_num.find(".") == -1: + # No decimals in the floating-point number. + integers = float_num + decimals = "" + else: + integers, decimals = float_num.split(".") + decimals = "0." + decimals + decimals = Decimal(decimals) + integers = int(integers) + + result = "" + num_used_bits = 0 + while True: + mul = decimals * 2 + int_part = int(mul) + result = result + str(int_part) + num_used_bits = num_used_bits + 1 + + decimals = mul - int(mul) + if type(num_bits) is type(None): + if decimals == 0: + break + elif num_used_bits >= num_bits: + break + if type(num_bits) is type(None): + pass + elif len(result) < num_bits: + num_remaining_bits = num_bits - len(result) + result = result + "0"*num_remaining_bits + + integers_bin = bin(integers)[2:] + result = str(integers_bin) + "." + str(result) + return result + +def bin2float(bin_num): + """ + Converts a binary number to a floating-point number. + + bin_num: The binary number as a string. + + Returns the floating-point representation. + """ + + if bin_num.find(".") == -1: + # No decimals in the binary number. + integers = bin_num + decimals = "" + else: + integers, decimals = bin_num.split(".") + result = Decimal(0.0) + + # Working with integers. + for idx, bit in enumerate(integers): + if bit == "0": + continue + mul = 2**idx + result = result + Decimal(mul) + + # Working with decimals. + for idx, bit in enumerate(decimals): + if bit == "0": + continue + mul = Decimal(1.0)/Decimal((2**(idx+1))) + result = result + mul + return result From 7b2f3f0a5dff35a35554dd4fc85cc3c62ae9ba8b Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sat, 20 Feb 2021 14:11:38 -0500 Subject: [PATCH 3/5] Update example2.py --- example2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example2.py b/example2.py index ba35dda..a253f30 100644 --- a/example2.py +++ b/example2.py @@ -20,8 +20,8 @@ print("Original Message: {msg}".format(msg=original_msg)) # Encode the message -encoded_msg, encoder = AE.encode(msg=original_msg, - probability_table=AE.probability_table) +encoded_msg, encoder , interval_min_value, interval_max_value = AE.encode(msg=original_msg, + probability_table=AE.probability_table) print("Encoded Message: {msg}".format(msg=encoded_msg)) # Decode the message From 86d20abbe419671cafaec0327d1cf3f0d66857b1 Mon Sep 17 00:00:00 2001 From: Ahmed Gad Date: Sat, 8 Apr 2023 15:40:58 -0400 Subject: [PATCH 4/5] Create FUNDING.yml --- .github/FUNDING.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..5892ed6 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,12 @@ +# These are supported funding model platforms + +github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +# paypal: http://paypal.me/ahmedfgad # Replace with a single Patreon username +open_collective: pygad +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +custom: ['https://donate.stripe.com/eVa5kO866elKgM0144', 'http://paypal.me/ahmedfgad'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] From 692baeb46ec9c42081bf937dec402e2088723d01 Mon Sep 17 00:00:00 2001 From: Yuxi Liu <33951560+yuxi-liu-wired@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:01:52 -0800 Subject: [PATCH 5/5] added setup.py --- setup.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6b34fdb --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup, find_packages + +setup( + name='ArithmeticEncodingPython', + version='1.0.0', + packages=find_packages(), + description='Data Compression using Arithmetic Encoding in Python', + long_description=open('README.md').read(), + long_description_content_type='text/markdown', + author='Ahmed Fawzy Gad', + author_email='ahmed.f.gad@gmail.com', + url='https://github.com/ahmedfgad/ArithmeticEncodingPython', + py_modules=['pyae'], + install_requires=[ + ], + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + ], +)