diff --git a/.gitignore b/.gitignore
index 722d5e7..db3e642 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,32 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
 .vscode
+
+datasets
diff --git a/bulk_insert.py b/bulk_insert.py
deleted file mode 100644
index 10b4786..0000000
--- a/bulk_insert.py
+++ /dev/null
@@ -1,428 +0,0 @@
-import csv
-import os
-import io
-import sys
-import math
-import struct
-import json
-from timeit import default_timer as timer
-import redis
-import click
-
-# Global variables
-CONFIGS = None         # thresholds for batching Redis queries
-NODE_DICT = {}         # global node dictionary
-TOP_NODE_ID = 0        # next ID to assign to a node
-QUERY_BUF = None       # Buffer for query being constructed
-QUOTING = None
-
-FIELD_TYPES = None
-
-# Custom error class for invalid inputs
-class CSVError(Exception):
-    pass
-
-# Official enum support varies widely between 2.7 and 3.x, so we'll use a custom class
-class Type:
-    NULL = 0
-    BOOL = 1
-    NUMERIC = 2
-    STRING = 3
-
-# User-configurable thresholds for when to send queries to Redis
-class Configs(object):
-    def __init__(self, max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges):
-        # Maximum number of tokens per query
-        # 1024 * 1024 is the hard-coded Redis maximum. We'll set a slightly lower limit so
-        # that we can safely ignore tokens that aren't binary strings
-        # ("GRAPH.BULK", "BEGIN", graph name, counts)
-        self.max_token_count = min(max_token_count, 1024 * 1023)
-        # Maximum size in bytes per query
-        self.max_buffer_size = max_buffer_size * 1000000
-        # Maximum size in bytes per token
-        # 512 megabytes is a hard-coded Redis maximum
-        self.max_token_size = min(max_token_size * 1000000, 512 * 1000000)
-
-        self.skip_invalid_nodes = skip_invalid_nodes
-        self.skip_invalid_edges = skip_invalid_edges
-
-# QueryBuffer is the class that processes input CSVs and emits their binary formats to the Redis client.
-class QueryBuffer(object):
-    def __init__(self, graphname, client):
-        # Redis client and data for each query
-        self.client = client
-
-        # Sizes for buffer currently being constructed
-        self.redis_token_count = 0
-        self.buffer_size = 0
-
-        # The first query should include a "BEGIN" token
-        self.graphname = graphname
-        self.initial_query = True
-
-        self.node_count = 0
-        self.relation_count = 0
-
-        self.labels = [] # List containing all pending Label objects
-        self.reltypes = [] # List containing all pending RelationType objects
-
-        self.nodes_created = 0 # Total number of nodes created
-        self.relations_created = 0 # Total number of relations created
-
-    # Send all pending inserts to Redis
-    def send_buffer(self):
-        # Do nothing if we have no entities
-        if self.node_count == 0 and self.relation_count == 0:
-            return
-
-        args = [self.node_count, self.relation_count, len(self.labels), len(self.reltypes)] + self.labels + self.reltypes
-        # Prepend a "BEGIN" token if this is the first query
-        if self.initial_query:
-            args.insert(0, "BEGIN")
-            self.initial_query = False
-
-        result = self.client.execute_command("GRAPH.BULK", self.graphname, *args)
-        stats = result.split(', '.encode())
-        self.nodes_created += int(stats[0].split(' '.encode())[0])
-        self.relations_created += int(stats[1].split(' '.encode())[0])
-
-        self.clear_buffer()
-
-    # Delete all entities that have been inserted
-    def clear_buffer(self):
-        self.redis_token_count = 0
-        self.buffer_size = 0
-
-        # All constructed entities have been inserted, so clear buffers
-        self.node_count = 0
-        self.relation_count = 0
-        del self.labels[:]
-        del self.reltypes[:]
-
-    def report_completion(self, runtime):
-        print("Construction of graph '%s' complete: %d nodes created, %d relations created in %f seconds"
-              % (self.graphname, self.nodes_created, self.relations_created, runtime))
-
-# Superclass for label and relation CSV files
-class EntityFile(object):
-    def __init__(self, filename, separator):
-        # The label or relation type string is the basename of the file
-        self.entity_str = os.path.splitext(os.path.basename(filename))[0]
-        # Input file handling
-        self.infile = io.open(filename, 'rt')
-        # Initialize CSV reader that ignores leading whitespace in each field
-        # and does not modify input quote characters
-        self.reader = csv.reader(self.infile, delimiter=separator, skipinitialspace=True, quoting=QUOTING)
-
-        self.prop_offset = 0 # Starting index of properties in row
-        self.prop_count = 0 # Number of properties per entity
-
-        self.packed_header = b''
-        self.binary_entities = []
-        self.binary_size = 0 # size of binary token
-        self.count_entities() # number of entities/row in file.
-
-    # Count number of rows in file.
-    def count_entities(self):
-        self.entities_count = 0
-        self.entities_count = sum(1 for line in self.infile)
-        # discard header row
-        self.entities_count -= 1
-        # seek back
-        self.infile.seek(0)
-        return self.entities_count
-
-    # Simple input validations for each row of a CSV file
-    def validate_row(self, expected_col_count, row):
-        # Each row should have the same number of fields
-        if len(row) != expected_col_count:
-            raise CSVError("%s:%d Expected %d columns, encountered %d ('%s')"
-                           % (self.infile.name, self.reader.line_num, expected_col_count, len(row), ','.join(row)))
-
-    # If part of a CSV file was sent to Redis, delete the processed entities and update the binary size
-    def reset_partial_binary(self):
-        self.binary_entities = []
-        self.binary_size = len(self.packed_header)
-
-    # Convert property keys from a CSV file header into a binary string
-    def pack_header(self, header):
-        prop_count = len(header) - self.prop_offset
-        # String format
-        entity_bytes = self.entity_str.encode()
-        fmt = "=%dsI" % (len(entity_bytes) + 1) # Unaligned native, entity name, count of properties
-        args = [entity_bytes, prop_count]
-        for p in header[self.prop_offset:]:
-            prop = p.encode()
-            fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator
-            args.append(prop)
-        return struct.pack(fmt, *args)
-
-    # Convert a list of properties into a binary string
-    def pack_props(self, line):
-        props = []
-        for num, field in enumerate(line[self.prop_offset:]):
-            field_type_idx = self.prop_offset+num
-            try:
-                FIELD_TYPES[self.entity_str][field_type_idx]
-            except:
-                props.append(prop_to_binary(field, None))
-            else:
-                props.append(prop_to_binary(field, FIELD_TYPES[self.entity_str][field_type_idx]))
-        return b''.join(p for p in props)
-
-    def to_binary(self):
-        return self.packed_header + b''.join(self.binary_entities)
-
-# Handler class for processing label csv files.
-class Label(EntityFile):
-    def __init__(self, infile, separator):
-        super(Label, self).__init__(infile, separator)
-        expected_col_count = self.process_header()
-        self.process_entities(expected_col_count)
-        self.infile.close()
-
-    def process_header(self):
-        # Header format:
-        # node identifier (which may be a property key), then all other property keys
-        header = next(self.reader)
-        expected_col_count = len(header)
-        # If identifier field begins with an underscore, don't add it as a property.
-        if header[0][0] == '_':
-            self.prop_offset = 1
-        self.packed_header = self.pack_header(header)
-        self.binary_size += len(self.packed_header)
-        return expected_col_count
-
-    def process_entities(self, expected_col_count):
-        global NODE_DICT
-        global TOP_NODE_ID
-        global QUERY_BUF
-
-        entities_created = 0
-        with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
-            for row in reader:
-                self.validate_row(expected_col_count, row)
-                # Add identifier->ID pair to dictionary if we are building relations
-                if NODE_DICT is not None:
-                    if row[0] in NODE_DICT:
-                        sys.stderr.write("Node identifier '%s' was used multiple times - second occurrence at %s:%d\n"
-                                         % (row[0], self.infile.name, self.reader.line_num))
-                        if CONFIGS.skip_invalid_nodes is False:
-                            exit(1)
-                    NODE_DICT[row[0]] = TOP_NODE_ID
-                    TOP_NODE_ID += 1
-                row_binary = self.pack_props(row)
-                row_binary_len = len(row_binary)
-                # If the addition of this entity will make the binary token grow too large,
-                # send the buffer now.
-                if self.binary_size + row_binary_len > CONFIGS.max_token_size:
-                    QUERY_BUF.labels.append(self.to_binary())
-                    QUERY_BUF.send_buffer()
-                    self.reset_partial_binary()
-                    # Push the label onto the query buffer again, as there are more entities to process.
-                    QUERY_BUF.labels.append(self.to_binary())
-
-                QUERY_BUF.node_count += 1
-                entities_created += 1
-                self.binary_size += row_binary_len
-                self.binary_entities.append(row_binary)
-            QUERY_BUF.labels.append(self.to_binary())
-        print("%d nodes created with label '%s'" % (entities_created, self.entity_str))
-
-# Handler class for processing relation csv files.
-class RelationType(EntityFile):
-    def __init__(self, infile, separator):
-        super(RelationType, self).__init__(infile, separator)
-        expected_col_count = self.process_header()
-        self.process_entities(expected_col_count)
-        self.infile.close()
-
-    def process_header(self):
-        # Header format:
-        # source identifier, dest identifier, properties[0..n]
-        header = next(self.reader)
-        # Assume rectangular CSVs
-        expected_col_count = len(header)
-        self.prop_count = expected_col_count - 2
-        if self.prop_count < 0:
-            raise CSVError("Relation file '%s' should have at least 2 elements in header line."
-                           % (self.infile.name))
-
-        self.prop_offset = 2
-        self.packed_header = self.pack_header(header) # skip src and dest identifiers
-        self.binary_size += len(self.packed_header)
-        return expected_col_count
-
-    def process_entities(self, expected_col_count):
-        entities_created = 0
-        with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
-            for row in reader:
-                self.validate_row(expected_col_count, row)
-                try:
-                    src = NODE_DICT[row[0]]
-                    dest = NODE_DICT[row[1]]
-                except KeyError as e:
-                    print("Relationship specified a non-existent identifier. src: %s; dest: %s" % (row[0], row[1]))
-                    if CONFIGS.skip_invalid_edges is False:
-                        raise e
-                    continue
-                fmt = "=QQ" # 8-byte unsigned ints for src and dest
-                row_binary = struct.pack(fmt, src, dest) + self.pack_props(row)
-                row_binary_len = len(row_binary)
-                # If the addition of this entity will make the binary token grow too large,
-                # send the buffer now.
-                if self.binary_size + row_binary_len > CONFIGS.max_token_size:
-                    QUERY_BUF.reltypes.append(self.to_binary())
-                    QUERY_BUF.send_buffer()
-                    self.reset_partial_binary()
-                    # Push the reltype onto the query buffer again, as there are more entities to process.
-                    QUERY_BUF.reltypes.append(self.to_binary())
-
-                QUERY_BUF.relation_count += 1
-                entities_created += 1
-                self.binary_size += row_binary_len
-                self.binary_entities.append(row_binary)
-            QUERY_BUF.reltypes.append(self.to_binary())
-        print("%d relations created for type '%s'" % (entities_created, self.entity_str))
-
-# Convert a single CSV property field into a binary stream.
-# Supported property types are string, numeric, boolean, and NULL.
-# type is either Type.NUMERIC, Type.BOOL or Type.STRING, and explicitly sets the value to this type if possible
-def prop_to_binary(prop_val, type):
-    # All format strings start with an unsigned char to represent our Type enum
-    format_str = "=B"
-    if prop_val is None:
-        # An empty field indicates a NULL property
-        return struct.pack(format_str, Type.NULL)
-
-    # If field can be cast to a float, allow it
-    if type == None or type == Type.NUMERIC:
-        try:
-            numeric_prop = float(prop_val)
-            if not math.isnan(numeric_prop) and not math.isinf(numeric_prop): # Don't accept non-finite values.
-                return struct.pack(format_str + "d", Type.NUMERIC, numeric_prop)
-        except:
-            pass
-
-    if type == None or type == Type.BOOL:
-        # If field is 'false' or 'true', it is a boolean
-        if prop_val.lower() == 'false':
-            return struct.pack(format_str + '?', Type.BOOL, False)
-        elif prop_val.lower() == 'true':
-            return struct.pack(format_str + '?', Type.BOOL, True)
-
-    if type == None or type == Type.STRING:
-        # If we've reached this point, the property is a string
-        encoded_str = str.encode(prop_val) # struct.pack requires bytes objects as arguments
-        # Encoding len+1 adds a null terminator to the string
-        format_str += "%ds" % (len(encoded_str) + 1)
-        return struct.pack(format_str, Type.STRING, encoded_str)
-
-    ## if it hasn't returned by this point, it is trying to set it to a type that it can't adopt
-    raise Exception("unable to parse [" + prop_val + "] with type ["+repr(type)+"]")
-
-# For each node input file, validate contents and convert to binary format.
-# If any buffer limits have been reached, flush all enqueued inserts to Redis.
-def process_entity_csvs(cls, csvs, separator):
-    global QUERY_BUF
-    for in_csv in csvs:
-        # Build entity descriptor from input CSV
-        entity = cls(in_csv, separator)
-        added_size = entity.binary_size
-        # Check to see if the addition of this data will exceed the buffer's capacity
-        if (QUERY_BUF.buffer_size + added_size >= CONFIGS.max_buffer_size
-                or QUERY_BUF.redis_token_count + len(entity.binary_entities) >= CONFIGS.max_token_count):
-            # Send and flush the buffer if appropriate
-            QUERY_BUF.send_buffer()
-        # Add binary data to list and update all counts
-        QUERY_BUF.redis_token_count += len(entity.binary_entities)
-        QUERY_BUF.buffer_size += added_size
-
-# Command-line arguments
-@click.command()
-@click.argument('graph')
-# Redis server connection settings
-@click.option('--host', '-h', default='127.0.0.1', help='Redis server host')
-@click.option('--port', '-p', default=6379, help='Redis server port')
-@click.option('--password', '-a', default=None, help='Redis server password')
-# CSV file paths
-@click.option('--nodes', '-n', required=True, multiple=True, help='Path to node csv file')
-@click.option('--relations', '-r', multiple=True, help='Path to relation csv file')
-@click.option('--separator', '-o', default=',', help='Field token separator in csv file')
-# Buffer size restrictions
-@click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)')
-@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
-@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
-@click.option('--quote', '-q', default=3, help='the quoting format used in the CSV file. QUOTE_MINIMAL=0,QUOTE_ALL=1,QUOTE_NONNUMERIC=2,QUOTE_NONE=3')
-@click.option('--field-types', '-f', default=None, help='json to set explicit types for each field, format {<label>:[<col1 type>, <col2 type> ...]} where type can be 0(null),1(bool),2(numeric),3(string)')
-@click.option('--skip-invalid-nodes', '-s', default=False, is_flag=True, help='ignore nodes that use previously defined IDs')
-@click.option('--skip-invalid-edges', '-e', default=False, is_flag=True, help='ignore invalid edges, print an error message and continue loading (True), or stop loading after an edge loading failure (False)')
-
-
-def bulk_insert(graph, host, port, password, nodes, relations, separator, max_token_count, max_buffer_size, max_token_size, quote, field_types, skip_invalid_nodes, skip_invalid_edges):
-    global CONFIGS
-    global NODE_DICT
-    global TOP_NODE_ID
-    global QUERY_BUF
-    global QUOTING
-    global FIELD_TYPES
-
-    if sys.version_info[0] < 3:
-        raise Exception("Python 3 is required for the RedisGraph bulk loader.")
-
-    if field_types is not None:
-        try:
-            FIELD_TYPES = json.loads(field_types)
-        except:
-            raise Exception("Problem parsing field-types. Use the format {<label>:[<col1 type>, <col2 type> ...]} where type can be 0(null),1(bool),2(numeric),3(string) ")
-
-    QUOTING = int(quote)
-
-    TOP_NODE_ID = 0 # reset global ID variable (in case we are calling bulk_insert from unit tests)
-    CONFIGS = Configs(max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges)
-
-    start_time = timer()
-    # Attempt to connect to Redis server
-    try:
-        client = redis.StrictRedis(host=host, port=port, password=password)
-    except redis.exceptions.ConnectionError as e:
-        print("Could not connect to Redis server.")
-        raise e
-
-    # Attempt to verify that RedisGraph module is loaded
-    try:
-        module_list = client.execute_command("MODULE LIST")
-        if not any(b'graph' in module_description for module_description in module_list):
-            print("RedisGraph module not loaded on connected server.")
-            exit(1)
-    except redis.exceptions.ResponseError:
-        # Ignore check if the connected server does not support the "MODULE LIST" command
-        pass
-
-    # Verify that the graph name is not already used in the Redis database
-    key_exists = client.execute_command("EXISTS", graph)
-    if key_exists:
-        print("Graph with name '%s', could not be created, as Redis key '%s' already exists." % (graph, graph))
-        exit(1)
-
-    QUERY_BUF = QueryBuffer(graph, client)
-
-    # Create a node dictionary if we're building relations and as such require unique identifiers
-    if relations:
-        NODE_DICT = {}
-    else:
-        NODE_DICT = None
-
-    process_entity_csvs(Label, nodes, separator)
-
-    if relations:
-        process_entity_csvs(RelationType, relations, separator)
-
-    # Send all remaining tokens to Redis
-    QUERY_BUF.send_buffer()
-
-    end_time = timer()
-    QUERY_BUF.report_completion(end_time - start_time)
-
-if __name__ == '__main__':
-    bulk_insert()
diff --git a/bulk_insert/__init__.py b/bulk_insert/__init__.py
new file mode 100644
index 0000000..95869b0
--- /dev/null
+++ b/bulk_insert/__init__.py
@@ -0,0 +1,7 @@
+from .label import Label
+from .relation_type import RelationType
+from .query_buffer import QueryBuffer
+from .exceptions import (
+        CSVError,
+        SchemaError
+)
diff --git a/bulk_insert/bulk_insert.py b/bulk_insert/bulk_insert.py
new file mode 100644
index 0000000..0ec542b
--- /dev/null
+++ b/bulk_insert/bulk_insert.py
@@ -0,0 +1,136 @@
+import sys
+from timeit import default_timer as timer
+import redis
+import click
+import configs
+import query_buffer as QueryBuffer
+from label import Label
+from relation_type import RelationType
+
+
+def parse_schemas(cls, path_to_csv, csv_tuples):
+    schemas = [None] * (len(path_to_csv) + len(csv_tuples))
+    for idx, in_csv in enumerate(path_to_csv):
+        # Build entity descriptor from input CSV
+        schemas[idx] = cls(in_csv, None)
+
+    offset = len(path_to_csv)
+    for idx, csv_tuple in enumerate(csv_tuples):
+        # Build entity descriptor from input CSV
+        schemas[idx + offset] = cls(csv_tuple[1], csv_tuple[0])
+    return schemas
+
+
+# For each input file, validate contents and convert to binary format.
+# If any buffer limits have been reached, flush all enqueued inserts to Redis.
+def process_entities(entities):
+    for entity in entities:
+        entity.process_entities()
+        added_size = entity.binary_size
+        # Check to see if the addition of this data will exceed the buffer's capacity
+        if (QueryBuffer.buffer_size + added_size >= configs.max_buffer_size
+                or QueryBuffer.redis_token_count + len(entity.binary_entities) >= configs.max_token_count):
+            # Send and flush the buffer if appropriate
+            QueryBuffer.send_buffer()
+        # Add binary data to list and update all counts
+        QueryBuffer.redis_token_count += len(entity.binary_entities)
+        QueryBuffer.buffer_size += added_size
+
+
+def Config_Set(max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges, separator, quoting):
+    # Maximum number of tokens per query
+    # 1024 * 1024 is the hard-coded Redis maximum. We'll set a slightly lower limit so
+    # that we can safely ignore tokens that aren't binary strings
+    # ("GRAPH.BULK", "BEGIN", graph name, counts)
+    configs.max_token_count = min(max_token_count, 1024 * 1023)
+    # Maximum size in bytes per query
+    configs.max_buffer_size = max_buffer_size * 1000000
+    # Maximum size in bytes per token
+    # 512 megabytes is a hard-coded Redis maximum
+    configs.max_token_size = min(max_token_size * 1000000, 512 * 1000000)
+
+    configs.skip_invalid_nodes = skip_invalid_nodes
+    configs.skip_invalid_edges = skip_invalid_edges
+    configs.separator = separator
+    configs.quoting = quoting
+
+
+def QueryBuf_Set(graphname, client, has_relations):
+    # Redis client and data for each query
+    QueryBuffer.client = client
+    QueryBuffer.graphname = graphname
+
+    # Create a node dictionary if we're building relations and as such require unique identifiers
+    if has_relations:
+        QueryBuffer.nodes = {}
+
+
+# Command-line arguments
+@click.command()
+@click.argument('graph')
+# Redis server connection settings
+@click.option('--host', '-h', default='127.0.0.1', help='Redis server host')
+@click.option('--port', '-p', default=6379, help='Redis server port')
+@click.option('--password', '-a', default=None, help='Redis server password')
+# CSV file paths
+@click.option('--nodes', '-n', required=True, multiple=True, help='Path to node csv file')
+@click.option('--nodes-with-label', '-N', nargs=2, multiple=True, help='Label string followed by path to node csv file')
+@click.option('--relations', '-r', multiple=True, help='Path to relation csv file')
+@click.option('--relations-with-type', '-R', nargs=2, multiple=True, help='Relation type string followed by path to relation csv file')
+@click.option('--separator', '-o', default=',', help='Field token separator in csv file')
+# Buffer size restrictions
+@click.option('--max-token-count', '-c', default=1024, help='max number of processed CSVs to send per query (default 1024)')
+@click.option('--max-buffer-size', '-b', default=2048, help='max buffer size in megabytes (default 2048)')
+@click.option('--max-token-size', '-t', default=500, help='max size of each token in megabytes (default 500, max 512)')
+@click.option('--quote', '-q', default=3, help='the quoting format used in the CSV file. QUOTE_MINIMAL=0,QUOTE_ALL=1,QUOTE_NONNUMERIC=2,QUOTE_NONE=3')
+@click.option('--skip-invalid-nodes', '-s', default=False, is_flag=True, help='ignore nodes that use previously defined IDs')
+@click.option('--skip-invalid-edges', '-e', default=False, is_flag=True, help='ignore invalid edges, print an error message and continue loading (True), or stop loading after an edge loading failure (False)')
+def bulk_insert(graph, host, port, password, nodes, nodes_with_label, relations, relations_with_type, separator, max_token_count, max_buffer_size, max_token_size, quote, skip_invalid_nodes, skip_invalid_edges):
+    if sys.version_info[0] < 3:
+        raise Exception("Python 3 is required for the RedisGraph bulk loader.")
+
+    # Initialize configurations with command-line arguments
+    Config_Set(max_token_count, max_buffer_size, max_token_size, skip_invalid_nodes, skip_invalid_edges, separator, int(quote))
+
+    start_time = timer()
+    # Attempt to connect to Redis server
+    try:
+        client = redis.StrictRedis(host=host, port=port, password=password)
+    except redis.exceptions.ConnectionError as e:
+        print("Could not connect to Redis server.")
+        raise e
+
+    # Attempt to verify that RedisGraph module is loaded
+    try:
+        module_list = client.execute_command("MODULE LIST")
+        if not any(b'graph' in module_description for module_description in module_list):
+            print("RedisGraph module not loaded on connected server.")
+            sys.exit(1)
+    except redis.exceptions.ResponseError:
+        # Ignore check if the connected server does not support the "MODULE LIST" command
+        pass
+
+    # Verify that the graph name is not already used in the Redis database
+    key_exists = client.execute_command("EXISTS", graph)
+    if key_exists:
+        print("Graph with name '%s', could not be created, as Redis key '%s' already exists." % (graph, graph))
+        sys.exit(1)
+
+    QueryBuf_Set(graph, client, relations is not None)
+
+    # Read the header rows of each input CSV and save its schema.
+    labels = parse_schemas(Label, nodes, nodes_with_label)
+    reltypes = parse_schemas(RelationType, relations, relations_with_type)
+
+    process_entities(labels)
+    process_entities(reltypes)
+
+    # Send all remaining tokens to Redis
+    QueryBuffer.send_buffer()
+
+    end_time = timer()
+    QueryBuffer.report_completion(end_time - start_time)
+
+
+if __name__ == '__main__':
+    bulk_insert()
diff --git a/bulk_insert/configs.py b/bulk_insert/configs.py
new file mode 100644
index 0000000..183a350
--- /dev/null
+++ b/bulk_insert/configs.py
@@ -0,0 +1,9 @@
+# Default values for command-line arguments
+
+max_token_count = 1024 * 1023
+max_buffer_size = 0
+max_token_size = 512 * 1000000
+skip_invalid_nodes = False
+skip_invalid_edges = False
+separator = ','
+quoting = 3
diff --git a/bulk_insert/entity_file.py b/bulk_insert/entity_file.py
new file mode 100644
index 0000000..a7a4be5
--- /dev/null
+++ b/bulk_insert/entity_file.py
@@ -0,0 +1,194 @@
+import os
+import io
+import csv
+import math
+import struct
+import configs
+from exceptions import CSVError, SchemaError
+
+
+class Type:
+    NULL = 0
+    BOOL = 1
+    DOUBLE = 2
+    STRING = 3
+    LONG = 4
+    ID = 5
+    START_ID = 8
+    END_ID = 9
+    IGNORE = 10
+
+
+def convert_schema_type(in_type):
+    try:
+        return {
+                'null': Type.NULL,
+                'boolean': Type.BOOL,
+                'double': Type.DOUBLE,
+                'string': Type.STRING,
+                'string[]': Type.STRING, # TODO tmp
+                'integer': Type.LONG,
+                'int': Type.LONG,
+                'long': Type.LONG,
+                'id': Type.ID,
+                'start_id': Type.START_ID,
+                'end_id': Type.END_ID
+                }[in_type]
+    except KeyError:
+        # TODO tmp
+        if in_type.startswith('id('):
+            return Type.ID
+        elif in_type.startswith('start_id('):
+            return Type.START_ID
+        elif in_type.startswith('end_id('):
+            return Type.END_ID
+        else:
+            raise SchemaError("Encountered invalid field type '%s'" % in_type)
+
+
+# Convert a single CSV property field into a binary stream.
+# Supported property types are string, numeric, boolean, and NULL.
+# type is either Type.DOUBLE, Type.BOOL or Type.STRING, and explicitly sets the value to this type if possible
+def prop_to_binary(prop_val, prop_type):
+    # All format strings start with an unsigned char to represent our prop_type enum
+    format_str = "=B"
+    if prop_val is None:
+        # An empty field indicates a NULL property
+        return struct.pack(format_str, Type.NULL)
+
+    # If field can be cast to a float, allow it
+    if prop_type is None or prop_type == Type.DOUBLE:
+        try:
+            numeric_prop = float(prop_val)
+            if not math.isnan(numeric_prop) and not math.isinf(numeric_prop): # Don't accept non-finite values.
+                return struct.pack(format_str + "d", Type.DOUBLE, numeric_prop)
+        except:
+            raise SchemaError("Could not parse '%s' as a double" % prop_val)
+
+    # TODO add support for non-integer ID types
+    if prop_type is None or prop_type == Type.LONG or prop_type == Type.ID:
+        try:
+            numeric_prop = int(float(prop_val))
+            return struct.pack(format_str + "q", Type.LONG, numeric_prop)
+        except:
+            raise SchemaError("Could not parse '%s' as a long" % prop_val)
+
+    if prop_type is None or prop_type == Type.BOOL:
+        # If field is 'false' or 'true', it is a boolean
+        if prop_val.lower() == 'false':
+            return struct.pack(format_str + '?', Type.BOOL, False)
+        elif prop_val.lower() == 'true':
+            return struct.pack(format_str + '?', Type.BOOL, True)
+
+    if prop_type is None or prop_type == Type.STRING:
+        # If we've reached this point, the property is a string
+        encoded_str = str.encode(prop_val) # struct.pack requires bytes objects as arguments
+        # Encoding len+1 adds a null terminator to the string
+        format_str += "%ds" % (len(encoded_str) + 1)
+        return struct.pack(format_str, Type.STRING, encoded_str)
+
+    # If it hasn't returned by this point, it is trying to set it to a type that it can't adopt
+    raise Exception("unable to parse [" + prop_val + "] with type ["+repr(prop_type)+"]")
+
+
+# Superclass for label and relation CSV files
+class EntityFile(object):
+    def __init__(self, filename, label):
+        # The label or relation type string is the basename of the file
+        if label:
+            self.entity_str = label
+        else:
+            self.entity_str = os.path.splitext(os.path.basename(filename))[0]
+        # Input file handling
+        self.infile = io.open(filename, 'rt')
+
+        # Initialize CSV reader that ignores leading whitespace in each field
+        # and does not modify input quote characters
+        self.reader = csv.reader(self.infile, delimiter=configs.separator, skipinitialspace=True, quoting=configs.quoting)
+
+        self.packed_header = b''
+        self.binary_entities = []
+        self.binary_size = 0 # size of binary token
+
+        self.convert_header() # Extract data from header row.
+        self.count_entities() # Count number of entities/row in file.
+
+    # Count number of rows in file.
+    def count_entities(self):
+        self.entities_count = 0
+        self.entities_count = sum(1 for line in self.infile)
+        # seek back
+        self.infile.seek(0)
+        return self.entities_count
+
+    # Simple input validations for each row of a CSV file
+    def validate_row(self, row):
+        # Each row should have the same number of fields
+        if len(row) != self.column_count:
+            raise CSVError("%s:%d Expected %d columns, encountered %d ('%s')"
+                           % (self.infile.name, self.reader.line_num, self.column_count, len(row), configs.separator.join(row)))
+
+    # If part of a CSV file was sent to Redis, delete the processed entities and update the binary size
+    def reset_partial_binary(self):
+        self.binary_entities = []
+        self.binary_size = len(self.packed_header)
+
+    # Convert property keys from a CSV file header into a binary string
+    def pack_header(self):
+        # String format
+        entity_bytes = self.entity_str.encode()
+        fmt = "=%dsI" % (len(entity_bytes) + 1) # Unaligned native, entity name, count of properties
+        args = [entity_bytes, self.prop_count]
+        for idx in range(self.column_count):
+            if self.skip_offsets[idx]:
+                continue
+            prop = self.column_names[idx].encode()
+            fmt += "%ds" % (len(prop) + 1) # encode string with a null terminator
+            args.append(prop)
+        return struct.pack(fmt, *args)
+
+    # Extract column names and types from a header row
+    def convert_header(self):
+        header = next(self.reader)
+        self.column_count = len(header)
+        self.column_names = [None] * self.column_count   # Property names of every column.
+        self.types = [None] * self.column_count          # Value type of every column.
+        self.skip_offsets = [False] * self.column_count  # Whether column at any offset should not be stored as a property.
+
+        for idx, field in enumerate(header):
+            pair = field.split(':')
+            if len(pair) > 2:
+                raise CSVError("Field '%s' had %d colons" % field, len(field))
+
+            if len(pair[0]) == 0: # Delete empty string in a case like ":LABEL"
+                del pair[0]
+
+            if len(pair) < 2:
+                self.types[idx] = convert_schema_type(pair[0].casefold())
+                self.skip_offsets[idx] = True
+                if self.types[idx] not in (Type.ID, Type.START_ID, Type.END_ID, Type.IGNORE):
+                    # Any other field should have 2 elements
+                    raise SchemaError("Each property in the header should be a colon-separated pair")
+            else:
+                self.column_names[idx] = pair[0]
+                self.types[idx] = convert_schema_type(pair[1].casefold())
+                if self.types[idx] in (Type.START_ID, Type.END_ID, Type.IGNORE):
+                    self.skip_offsets[idx] = True
+
+        # The number of properties is equal to the number of non-skipped columns.
+        self.prop_count = self.skip_offsets.count(False)
+        self.packed_header = self.pack_header()
+        self.binary_size += len(self.packed_header)
+
+    # Convert a list of properties into a binary string
+    def pack_props(self, line):
+        props = []
+        for idx, field in enumerate(line):
+            if self.skip_offsets[idx]:
+                continue
+            if self.column_names[idx]:
+                props.append(prop_to_binary(field, self.types[idx]))
+        return b''.join(p for p in props)
+
+    def to_binary(self):
+        return self.packed_header + b''.join(self.binary_entities)
diff --git a/bulk_insert/exceptions.py b/bulk_insert/exceptions.py
new file mode 100644
index 0000000..38e6baa
--- /dev/null
+++ b/bulk_insert/exceptions.py
@@ -0,0 +1,7 @@
+# Custom error class for invalid inputs
+class CSVError(Exception):
+    pass
+
+
+class SchemaError(Exception):
+    pass
diff --git a/bulk_insert/label.py b/bulk_insert/label.py
new file mode 100644
index 0000000..cd12443
--- /dev/null
+++ b/bulk_insert/label.py
@@ -0,0 +1,64 @@
+import re
+import sys
+import click
+import configs
+import query_buffer as QueryBuffer
+from entity_file import Type, EntityFile
+from exceptions import SchemaError
+
+
+# Handler class for processing label csv files.
+class Label(EntityFile):
+    def __init__(self, infile, label_str):
+        super(Label, self).__init__(infile, label_str)
+        self.post_process_header()
+
+    def post_process_header(self):
+        # Verify that exactly one field is labeled ID.
+        if self.types.count(Type.ID) != 1:
+            raise SchemaError("Node file '%s' should have exactly one ID column."
+                              % (self.infile.name))
+        header = next(self.reader)
+        self.id = self.types.index(Type.ID) # Track the offset containing the node ID.
+        id_field = header[self.id]
+        # If the ID field specifies an ID namespace in parentheses like "val:ID(NAMESPACE)", capture the namespace.
+        match = re.search(r"\((\w+)\)", id_field)
+        if match:
+            self.id_namespace = match.group(1)
+
+    def process_entities(self):
+        entities_created = 0
+        with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
+            for row in reader:
+                self.validate_row(row)
+                # Add identifier->ID pair to dictionary if we are building relations
+                if QueryBuffer.nodes is not None:
+                    id_field = row[self.id]
+                    if self.id_namespace:
+                        id_field = self.id_namespace + '.' + str(id_field)
+
+                    if id_field in QueryBuffer.nodes:
+                        sys.stderr.write("Node identifier '%s' was used multiple times - second occurrence at %s:%d\n"
+                                         % (row[self.id], self.infile.name, self.reader.line_num))
+                        if configs.skip_invalid_nodes is False:
+                            sys.exit(1)
+                    QueryBuffer.nodes[id_field] = QueryBuffer.top_node_id
+                    QueryBuffer.top_node_id += 1
+                row_binary = self.pack_props(row)
+                row_binary_len = len(row_binary)
+                # If the addition of this entity will make the binary token grow too large,
+                # send the buffer now.
+                if self.binary_size + row_binary_len > configs.max_token_size:
+                    QueryBuffer.labels.append(self.to_binary())
+                    QueryBuffer.send_buffer()
+                    self.reset_partial_binary()
+                    # Push the label onto the query buffer again, as there are more entities to process.
+                    QueryBuffer.labels.append(self.to_binary())
+
+                QueryBuffer.node_count += 1
+                entities_created += 1
+                self.binary_size += row_binary_len
+                self.binary_entities.append(row_binary)
+            QueryBuffer.labels.append(self.to_binary())
+        self.infile.close()
+        print("%d nodes created with label '%s'" % (entities_created, self.entity_str))
diff --git a/bulk_insert/query_buffer.py b/bulk_insert/query_buffer.py
new file mode 100644
index 0000000..12cfe91
--- /dev/null
+++ b/bulk_insert/query_buffer.py
@@ -0,0 +1,72 @@
+# QueryBuffer is the singleton module that processes input CSVs and emits their binary formats to the Redis client.
+
+nodes = None
+top_node_id = 0
+
+# Redis client and data for each query
+client = None
+
+# Sizes for buffer currently being constructed
+redis_token_count = 0
+buffer_size = 0
+
+# The first query should include a "BEGIN" token
+graphname = ""
+initial_query = True
+
+node_count = 0
+relation_count = 0
+
+labels = [] # List containing all pending Label objects
+reltypes = [] # List containing all pending RelationType objects
+
+nodes_created = 0 # Total number of nodes created
+relations_created = 0 # Total number of relations created
+
+
+# Send all pending inserts to Redis
+def send_buffer():
+    global initial_query
+    global nodes_created
+    global relations_created
+
+    # Do nothing if we have no entities
+    if node_count == 0 and relation_count == 0:
+        return
+
+    args = [node_count, relation_count, len(labels), len(reltypes)] + labels + reltypes
+    # Prepend a "BEGIN" token if this is the first query
+    if initial_query:
+        args.insert(0, "BEGIN")
+        initial_query = False
+
+    result = client.execute_command("GRAPH.BULK", graphname, *args)
+    stats = result.split(', '.encode())
+    nodes_created += int(stats[0].split(' '.encode())[0])
+    relations_created += int(stats[1].split(' '.encode())[0])
+
+    clear_buffer()
+
+
+# Delete all entities that have been inserted
+def clear_buffer():
+    global redis_token_count
+    global buffer_size
+    global node_count
+    global relation_count
+    global labels
+    global reltypes
+
+    redis_token_count = 0
+    buffer_size = 0
+
+    # All constructed entities have been inserted, so clear buffers
+    node_count = 0
+    relation_count = 0
+    del labels[:]
+    del reltypes[:]
+
+
+def report_completion(runtime):
+    print("Construction of graph '%s' complete: %d nodes created, %d relations created in %f seconds"
+          % (graphname, nodes_created, relations_created, runtime))
diff --git a/bulk_insert/relation_type.py b/bulk_insert/relation_type.py
new file mode 100644
index 0000000..6baf810
--- /dev/null
+++ b/bulk_insert/relation_type.py
@@ -0,0 +1,80 @@
+import re
+import struct
+import click
+import configs
+import query_buffer as QueryBuffer
+from entity_file import Type, EntityFile
+from exceptions import CSVError, SchemaError
+
+
+# Handler class for processing relation csv files.
+class RelationType(EntityFile):
+    def __init__(self, infile, type_str):
+        super(RelationType, self).__init__(infile, type_str)
+        if self.column_count < 2:
+            raise CSVError("Relation file '%s' should have at least 2 elements in header line."
+                           % (infile.name))
+
+        self.start_id = -1
+        self.end_id = -1
+        self.post_process_header()
+
+    def post_process_header(self):
+        # Can interleave these tasks if preferred.
+        if self.types.count(Type.START_ID) != 1:
+            raise SchemaError("Relation file '%s' should have exactly one START_ID column."
+                              % (self.infile.name))
+        if self.types.count(Type.END_ID) != 1:
+            raise SchemaError("Relation file '%s' should have exactly one END_ID column."
+                              % (self.infile.name))
+
+        self.start_id = self.types.index(Type.START_ID)
+        self.end_id = self.types.index(Type.END_ID)
+        # Capture namespaces of start and end IDs if provided
+        header = next(self.reader)
+        start_match = re.search(r"\((\w+)\)", header[self.start_id])
+        if start_match:
+            self.start_namespace = start_match.group(1)
+        end_match = re.search(r"\((\w+)\)", header[self.end_id])
+        if end_match:
+            self.end_namespace = end_match.group(1)
+
+    def process_entities(self):
+        entities_created = 0
+        with click.progressbar(self.reader, length=self.entities_count, label=self.entity_str) as reader:
+            for row in reader:
+                self.validate_row(row)
+                try:
+                    start_id = row[self.start_id]
+                    if self.start_namespace:
+                        start_id = self.start_namespace + '.' + str(start_id)
+                    end_id = row[self.end_id]
+                    if self.end_namespace:
+                        end_id = self.end_namespace + '.' + str(end_id)
+
+                    src = QueryBuffer.nodes[start_id]
+                    dest = QueryBuffer.nodes[end_id]
+                except KeyError as e:
+                    print("Relationship specified a non-existent identifier. src: %s; dest: %s" % (row[self.start_id], row[self.end_id]))
+                    if configs.skip_invalid_edges is False:
+                        raise e
+                    continue
+                fmt = "=QQ" # 8-byte unsigned ints for src and dest
+                row_binary = struct.pack(fmt, src, dest) + self.pack_props(row)
+                row_binary_len = len(row_binary)
+                # If the addition of this entity will make the binary token grow too large,
+                # send the buffer now.
+                if self.binary_size + row_binary_len > configs.max_token_size:
+                    QueryBuffer.reltypes.append(self.to_binary())
+                    QueryBuffer.send_buffer()
+                    self.reset_partial_binary()
+                    # Push the reltype onto the query buffer again, as there are more entities to process.
+                    QueryBuffer.reltypes.append(self.to_binary())
+
+                QueryBuffer.relation_count += 1
+                entities_created += 1
+                self.binary_size += row_binary_len
+                self.binary_entities.append(row_binary)
+            QueryBuffer.reltypes.append(self.to_binary())
+        self.infile.close()
+        print("%d relations created for type '%s'" % (entities_created, self.entity_str))
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1acf928
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,17 @@
+
+from setuptools import setup, find_packages
+setup(
+    name='redisgraph-bulk-loader',
+    version='0.9dev',
+
+    description='RedisGraph Bulk Import Tool',
+    url='https://github.com/redisgraph/redisgraph-bulk-loader',
+    packages=find_packages(),
+    install_requires=['redis', 'click'],
+    classifiers=[
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: BSD License',
+        'Programming Language :: Python :: 3.0',
+        'Topic :: Database'
+    ]
+)