Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

abstractions for input and model (preparing for distributed impl.) #21

Merged
merged 8 commits into from
Jan 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 12 additions & 15 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
language: cpp
compiler:
- clang
# Documentation: http://docs.travis-ci.com/user/languages/julia/
language: julia
os:
- linux
- osx
julia:
- release
- nightly
notifications:
email: false
env:
matrix:
- JULIAVERSION="juliareleases"
- JULIAVERSION="julianightlies"
before_install:
- sudo add-apt-repository ppa:staticfloat/julia-deps -y
- sudo add-apt-repository ppa:staticfloat/${JULIAVERSION} -y
- sudo apt-get update -qq -y
- sudo apt-get install libpcre3-dev julia -y
script:
- julia -e 'Pkg.init(); run(`ln -s $(pwd()) $(Pkg.dir("JuliaRecSys"))`); Pkg.pin("JuliaRecSys"); Pkg.resolve()'
- julia -e 'using JuliaRecSys; @assert isdefined(:JuliaRecSys); @assert typeof(JuliaRecSys) === Module'
# uncomment the following lines to override the default test script
#script:
# - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
# - julia -e 'Pkg.clone(pwd()); Pkg.build("RecSys"); Pkg.test("RecSys"; coverage=true)'
24 changes: 14 additions & 10 deletions examples/lastfm/lastfm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ type MusicRec
trainingset::FileSpec
artist_names::FileSpec
artist_map::FileSpec
rec::ALSWR
als::ALSWR
artist_mat::Nullable{Dict{Int64,AbstractString}}

function MusicRec(trainingset::FileSpec, artist_names::FileSpec, artist_map::FileSpec)
Expand Down Expand Up @@ -92,9 +92,9 @@ function artist_names(rec::MusicRec)
get(rec.artist_mat)
end

train(musicrec::MusicRec, args...) = train(musicrec.rec, args...)
rmse(musicrec::MusicRec) = rmse(musicrec.rec)
recommend(musicrec::MusicRec, args...; kwargs...) = recommend(musicrec.rec, args...; kwargs...)
train(musicrec::MusicRec, args...) = train(musicrec.als, args...)
rmse(musicrec::MusicRec) = rmse(musicrec.als)
recommend(musicrec::MusicRec, args...; kwargs...) = recommend(musicrec.als, args...; kwargs...)

function print_list(mat::Dict, idxs::Vector{Int}, header::AbstractString)
if !isempty(idxs)
Expand Down Expand Up @@ -129,15 +129,19 @@ function test(dataset_path)
print_recommendations(rec, recommend(rec, 9875)...)

println("recommending anonymous user:")
R, item_idmap, user_idmap = RecSys.ratings(rec.rec)
# take user 100
actual_user = findfirst(user_idmap, 9875)
ratings_anon = R[actual_user, :]
actual_movie_ids = item_idmap[find(full(ratings_anon))]
sp_ratings_anon = SparseVector(maximum(item_idmap), actual_movie_ids, nonzeros(ratings_anon))
u_idmap = RecSys.user_idmap(rec.als.inp)
i_idmap = RecSys.item_idmap(rec.als.inp)
# take user 9875
actual_user = isempty(u_idmap) ? 9875 : findfirst(u_idmap, 9875)
rated_anon, ratings_anon = RecSys.items_and_ratings(rec.als.inp, actual_user)
actual_music_ids = isempty(i_idmap) ? rated_anon : i_idmap[rated_anon]
nmusic = isempty(i_idmap) ? RecSys.nitems(rec.als.inp) : maximum(i_idmap)
sp_ratings_anon = SparseVector(nmusic, actual_music_ids, ratings_anon)
print_recommendations(rec, recommend(rec, sp_ratings_anon)...)

println("saving model to model.sav")
clear(rec.als)
localize!(rec.als)
save(rec, "model.sav")
nothing
end
22 changes: 13 additions & 9 deletions examples/movielens/movielens.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ end

type MovieRec
movie_names::FileSpec
rec::ALSWR
als::ALSWR
movie_mat::Nullable{SparseVector{AbstractString,Int64}}

function MovieRec(trainingset::FileSpec, movie_names::FileSpec)
Expand All @@ -30,9 +30,9 @@ function movie_names(rec::MovieRec)
get(rec.movie_mat)
end

train(movierec::MovieRec, args...) = train(movierec.rec, args...)
rmse(movierec::MovieRec, args...; kwargs...) = rmse(movierec.rec, args...; kwargs...)
recommend(movierec::MovieRec, args...; kwargs...) = recommend(movierec.rec, args...; kwargs...)
train(movierec::MovieRec, args...) = train(movierec.als, args...)
rmse(movierec::MovieRec, args...; kwargs...) = rmse(movierec.als, args...; kwargs...)
recommend(movierec::MovieRec, args...; kwargs...) = recommend(movierec.als, args...; kwargs...)

function print_list(mat::SparseVector, idxs::Vector{Int}, header::AbstractString)
if isless(Base.VERSION, v"0.5.0-")
Expand Down Expand Up @@ -69,15 +69,19 @@ function test(dataset_path)
print_recommendations(rec, recommend(rec, 100)...)

println("recommending anonymous user:")
R, item_idmap, user_idmap = RecSys.ratings(rec.rec)
u_idmap = RecSys.user_idmap(rec.als.inp)
i_idmap = RecSys.item_idmap(rec.als.inp)
# take user 100
actual_user = findfirst(user_idmap, 100)
ratings_anon = R[actual_user, :]
actual_movie_ids = item_idmap[find(full(ratings_anon))]
sp_ratings_anon = SparseVector(maximum(item_idmap), actual_movie_ids, nonzeros(ratings_anon))
actual_user = isempty(u_idmap) ? 100 : findfirst(u_idmap, 100)
rated_anon, ratings_anon = RecSys.items_and_ratings(rec.als.inp, actual_user)
actual_movie_ids = isempty(i_idmap) ? rated_anon : i_idmap[rated_anon]
nmovies = isempty(i_idmap) ? RecSys.nitems(rec.als.inp) : maximum(i_idmap)
sp_ratings_anon = SparseVector(nmovies, actual_movie_ids, ratings_anon)
print_recommendations(rec, recommend(rec, sp_ratings_anon)...)

println("saving model to model.sav")
clear(rec.als)
localize!(rec.als)
save(rec, "model.sav")
nothing
end
23 changes: 17 additions & 6 deletions src/RecSys.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ import Base: zero
export FileSpec, DlmFile, MatFile, SparseMat, read_input
export ALSWR, train, recommend, rmse, zero
export ParShmem
export save, load
export save, load, clear, localize!

typealias RatingMatrix SparseMatrixCSC{Float64,Int64}
typealias SharedRatingMatrix ParallelSparseMatMul.SharedSparseMatrixCSC{Float64,Int64}
typealias InputRatings Union{RatingMatrix,SharedRatingMatrix}
typealias InputIdMap Union{Vector{Int64}, SharedVector{Int64}}
typealias ModelFactor Union{Matrix{Float64}, SharedArray{Float64,2}}

abstract FileSpec
abstract Inputs
abstract Model

abstract Parallelism
type ParShmem <: Parallelism end
Expand All @@ -25,16 +31,21 @@ if (Base.VERSION >= v"0.5.0-")
using Base.Threads
type ParThread <: Parallelism end
export ParThread
else
macro threads(x)
end
end

include("input.jl")
include("als_model.jl")
include("als-wr.jl")
include("utils.jl")

# enable logging only during debugging
using Logging
#const logger = Logging.configure(filename="recsys.log", level=DEBUG)
const logger = Logging.configure(level=DEBUG)
logmsg(s) = debug(s)
#logmsg(s) = nothing
#using Logging
##const logger = Logging.configure(filename="recsys.log", level=DEBUG)
#const logger = Logging.configure(level=DEBUG)
#logmsg(s) = debug(s)
logmsg(s) = nothing

end
Loading