# See https://github.com/RDFLib/rdflib/issues/1261 # py -3.6 import.py english-wordnet-2020.ttl turtle # Windows 7. # Interpreted. # 2021-02-25 18:05:27 # 2021-02-25 18:10:09 # Then run: # py -3.6 setup.py build_ext --inplace # 2021-02-25 18:54:05 # 2021-02-25 18:58:08 # Speedup: 14% import os import sys import datetime import linecache import tracemalloc # https://stackoverflow.com/questions/552744/how-do-i-profile-memory-usage-in-python def display_top(snapshot, key_type='lineno', limit=3): snapshot = snapshot.filter_traces(( tracemalloc.Filter(False, ""), tracemalloc.Filter(False, ""), )) top_stats = snapshot.statistics(key_type) print("Top %s lines" % limit) for index, stat in enumerate(top_stats[:limit], 1): frame = stat.traceback[0] # replace "/path/to/module/file.py" with "module/file.py" filename = os.sep.join(frame.filename.split(os.sep)[-2:]) print("#%s: %s:%s: %.1f KiB" % (index, filename, frame.lineno, stat.size / 1024)) line = linecache.getline(frame.filename, frame.lineno).strip() if line: print(' %s' % line) other = top_stats[limit:] if other: size = sum(stat.size for stat in other) print("%s other: %.1f KiB" % (len(other), size / 1024)) total = sum(stat.size for stat in top_stats) print("Total allocated size: %.1f KiB" % (total / 1024)) sys.path.insert(0, "rdflib") import rdflib print(rdflib.__file__) g = rdflib.Graph() print (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) # python3 import.py english-wordnet-2020.ttl turtle # python3 -m cProfile -o results.prof import.py english-wordnet-2020.ttl turtle # python3 -m pstats results.prof # sort tottime # reverse # stats # Expected number of triples: 2956279 input_file = "english-wordnet-2020.ttl" if len(sys.argv) < 2 else sys.argv[1] input_format = "turtle" if len(sys.argv) < 3 else sys.argv[2] tracemalloc.start() g.parse(input_file, format=input_format) snapshot = tracemalloc.take_snapshot() display_top(snapshot) print (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print("Len=", len(g))