Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC/WIP] Tools for measuring cycles and cpu_times and tricking out LLVM #92

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/BenchmarkTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ export BenchmarkGroup,
addgroup!,
leaves

##########################
# Low-level benchmarking #
##########################

include("lowlevel.jl")
export clobber,
escape

######################
# Execution Strategy #
######################
Expand Down
155 changes: 155 additions & 0 deletions src/lowlevel.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
##########################
# Low-level benchmarking #
##########################
import Base: llvmcall

"""
clobber()

Force the compiler to flush pending writes to global memory.
Acts as an effective read/write barrier.
"""
@inline function clobber()
llvmcall("""
call void asm sideeffect "", "~{memory}"()
ret void
""", Void, Tuple{})
end

"""
_llvmname(type::Type)

Produce the string name of the llvm equivalent of our Julia code.
Oh my. The preferable way would be to use LLVM.jl to do this for us.
"""
function _llvmname(typ::Type)
isboxed_ref = Ref{Bool}()
llvmtyp = ccall(:julia_type_to_llvm, Ptr{Void},
(Any, Ptr{Bool}), typ, isboxed_ref)
name = unsafe_string(
ccall(:LLVMPrintTypeToString, Cstring, (Ptr{Void},), llvmtyp))
return (isboxed_ref[], name)
end

"""
escape(val)

The `escape` function can be used to prevent a value or
expression from being optimized away by the compiler. This function is
intended to add little to no overhead.
See: https://youtu.be/nXaxk27zwlk?t=2441
"""
@generated function escape(val::T) where T
# If the value is `nothing` then a memory clobber
# should have the same effect.
if T == Void
return :(clobber())
end
# We need to get the string representation of the LLVM type to be able to issue a
# fake call.
isboxed, name = _llvmname(T)
if isboxed
# name will be `jl_value_t*` which we can't use since string based llvmcall can't handle named structs...
# Ideally we would issue a `bitcast jl_value_t* %0 to i8*`
Base.warn_once("Trying to escape a boxed value. Don't know how to handle that.")
else
ir = """
call void asm sideeffect "", "X,~{memory}"($name %0)
ret void
"""
quote
llvmcall($ir, Void, Tuple{T}, val)
end
end
end

################
# Count cycles #
################

# Only implemented on x86_64 and needs cpuflags:
# rdtscp, tsc, nonstop_tsc, tsc_known_freq, constant_tsc
# See https://github.com/dterei/gotsc for a good discussion.

"""
bench_start()

Issues the instructions `cpuid,rdtsc` to get a precise cycle counter at the beginning of a code segment.
"""
@inline function bench_start()
llvmcall("""
%a = call {i32, i32} asm sideeffect "CPUID\nRDTSC\nMOV %edx, \$0\nMOV %eax, \$1", "=r,=r,~{rax},~{rbx},~{rcx},~{rdx}"()
%a.0 = extractvalue { i32, i32 } %a, 0
%a.1 = extractvalue { i32, i32 } %a, 1
%b0 = insertvalue [2 x i32] undef, i32 %a.0, 0
%b = insertvalue [2 x i32] %b0 , i32 %a.1, 1
ret [2 x i32] %b
""", Tuple{UInt32, UInt32}, Tuple{})
end

"""
bench_end()

Issues the instructions `rdtscp,cpuid` to get a precise cycle counter at the end of a code segment.
"""
@inline function bench_end()
llvmcall("""
%a = call {i32, i32} asm sideeffect "RDTSCP\nMOV %edx, \$0\nMOV %eax, \$1\nCPUID", "=r,=r,~{rax},~{rbx},~{rcx},~{rdx}"()
%a.0 = extractvalue { i32, i32 } %a, 0
%a.1 = extractvalue { i32, i32 } %a, 1
%b0 = insertvalue [2 x i32] undef, i32 %a.0, 0
%b = insertvalue [2 x i32] %b0 , i32 %a.1, 1
ret [2 x i32] %b
""", Tuple{UInt32, UInt32}, Tuple{})
end

function cyc_convert(c::Tuple{UInt32, UInt32})
a, b = c
((a % UInt64) << 32) | b
end

macro elapsed_cyc(ex)
quote
local c0 = bench_start()
escape($(esc(ex)))
local c1 = bench_end()
cyc_convert(c1)-cyc_convert(c0)
end
end

##########
# Timers #
##########
struct TimeSpec
tv_sec :: UInt64 # time_t
tv_nsec :: UInt64
end
maketime(ts) = ts.tv_sec + ts.tv_nsec * 1e-9

const CLOCK_PROCESS_CPUTIME_ID = Cint(2)
const CLOCK_THREAD_CPUTIME_ID = Cint(3)

@inline function clock_gettime(cid)
ts = Ref{TimeSpec}()
ccall(:clock_gettime, Cint, (Cint, Ref{TimeSpec}), cid, ts)
return ts[]
end

"""
getProcessTime()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't a very Julian name, both in that it starts with "get" and that it's in camel case. I'd just call it processtime(). Likewise for getThreadTime, I'd call that threadtime().


Per-process CPU-time clock (measures CPU time consumed by all
threads in the process).
"""
@inline function getProcessTime()
maketime(clock_gettime(CLOCK_PROCESS_CPUTIME_ID))
end

"""
getThreadTime()

Thread-specific CPU-time clock.
"""
@inline function getThreadTime()
maketime(clock_gettime(CLOCK_THREAD_CPUTIME_ID))
end