diff --git a/examples/example-probes/Cargo.toml b/examples/example-probes/Cargo.toml
index bea65eb4..9bfb7df0 100644
--- a/examples/example-probes/Cargo.toml
+++ b/examples/example-probes/Cargo.toml
@@ -73,3 +73,8 @@ required-features = ["probes"]
 name = "tasks"
 path = "src/tasks/main.rs"
 required-features = ["probes", "kernel5_8"]
+
+[[bin]]
+name = "hashmaps"
+path = "src/hashmaps/main.rs"
+required-features = ["probes"]
diff --git a/examples/example-probes/src/hashmaps/main.rs b/examples/example-probes/src/hashmaps/main.rs
new file mode 100644
index 00000000..97c47615
--- /dev/null
+++ b/examples/example-probes/src/hashmaps/main.rs
@@ -0,0 +1,68 @@
+//! This is an example of showing difference between `PerCpuHashMap` and
+//! `HashMap`. The former is per-cpu data structure and users don't need to
+//! worry about race condition. The latter is global data structure so it has
+//! race condition problems.
+//!
+//! `PerCpuArray` can be used instead of bpf stack to hold temporary values
+//! that exceeds the maximum size of bpf stack (512 bytes).
+#![no_std]
+#![no_main]
+use example_probes::hashmaps::*;
+use redbpf_probes::kprobe::prelude::*;
+
+program!(0xFFFFFFFE, "GPL");
+
+#[map]
+static mut ALT_STACK: PerCpuArray<BigStructure> = PerCpuArray::with_max_entries(1);
+
+#[map]
+static mut BIG_STRUCT: LruHashMap<i8, BigStructure> = LruHashMap::with_max_entries(16);
+
+#[map]
+static mut PCPU_MEM_ALLOC: PerCpuHashMap<usize, usize> = PerCpuHashMap::with_max_entries(16);
+
+#[map]
+static mut MEM_ALLOC: HashMap<usize, usize> = HashMap::with_max_entries(16);
+
+#[kprobe]
+unsafe fn sched_fork(_regs: Registers) {
+    let rnd_key = (bpf_get_prandom_u32() & 0xff) as i8;
+    if let Some(bigstruct) = BIG_STRUCT.get_mut(&rnd_key) {
+        bigstruct.f2[99] = 99;
+        BIG_STRUCT.set(&rnd_key, bigstruct);
+    } else {
+        // maximum size of bpf stack is 512 bytes. BigStructure struct is 808
+        // bytes. So it can not be located in stack. Use percpu array to hold
+        // temporary BigStructure value. Note that if percpu array is used for
+        // this purpose, the size of percpu array must be 1. This is checked by
+        // BPF verifier.
+        let bigstruct = ALT_STACK.get_mut(0).unwrap();
+        for x in 0..=99 {
+            bigstruct.f2[x] = x;
+        }
+
+        BIG_STRUCT.set(&rnd_key, bigstruct);
+    }
+}
+
+#[kprobe]
+unsafe fn __kmalloc(regs: Registers) {
+    let mut size = regs.parm1() as usize;
+    let mut max: usize = 9999;
+    for x in 1..=12 {
+        size >>= 1;
+        if size == 0 {
+            max = usize::pow(2, x) - 1;
+            break;
+        }
+    }
+    if let Some(count) = PCPU_MEM_ALLOC.get_mut(&max) {
+        *count += 1;
+        let count = MEM_ALLOC.get_mut(&max).unwrap();
+        *count += 1;
+    } else {
+        let count = 1;
+        PCPU_MEM_ALLOC.set(&max, &count);
+        MEM_ALLOC.set(&max, &count);
+    }
+}
diff --git a/examples/example-probes/src/hashmaps/mod.rs b/examples/example-probes/src/hashmaps/mod.rs
new file mode 100644
index 00000000..bae5abb4
--- /dev/null
+++ b/examples/example-probes/src/hashmaps/mod.rs
@@ -0,0 +1,15 @@
+#[repr(C)]
+#[derive(Clone, Debug)]
+pub struct BigStructure {
+    pub f1: usize,
+    pub f2: [usize; 100],
+}
+
+impl Default for BigStructure {
+    fn default() -> Self {
+        BigStructure {
+            f1: 0,
+            f2: [0; 100],
+        }
+    }
+}
diff --git a/examples/example-probes/src/lib.rs b/examples/example-probes/src/lib.rs
index ea9df2e3..83f72982 100644
--- a/examples/example-probes/src/lib.rs
+++ b/examples/example-probes/src/lib.rs
@@ -12,7 +12,8 @@
 pub mod bindings;
 
 pub mod echo;
+pub mod hashmaps;
 pub mod mallocstacks;
+pub mod tasks;
 pub mod tcp_lifetime;
 pub mod vfsreadlat;
-pub mod tasks;
diff --git a/examples/example-userspace/examples/hashmaps.rs b/examples/example-userspace/examples/hashmaps.rs
new file mode 100644
index 00000000..8bc0f796
--- /dev/null
+++ b/examples/example-userspace/examples/hashmaps.rs
@@ -0,0 +1,77 @@
+//! This example shows usage of HashMap, PerCpuHashMap and LruHashMap.  And
+//! also it confirms you that hashmap has race condition problems. You should
+//! consider PerCpuHashMap if your program needs to store accurate map data.
+
+use libc;
+use std::process;
+use std::time::Duration;
+use tokio::{signal::ctrl_c, time::sleep};
+use tracing::{error, subscriber, Level};
+use tracing_subscriber::FmtSubscriber;
+
+use probes::hashmaps::BigStructure;
+use redbpf::{load::Loader, HashMap, LruHashMap, PerCpuHashMap};
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() {
+    let subscriber = FmtSubscriber::builder()
+        .with_max_level(Level::TRACE)
+        .finish();
+    subscriber::set_global_default(subscriber).unwrap();
+    if unsafe { libc::geteuid() != 0 } {
+        error!("You must be root to use eBPF!");
+        process::exit(1);
+    }
+
+    let mut loaded = Loader::load(probe_code()).expect("error loading probe");
+    for kp in loaded.kprobes_mut() {
+        kp.attach_kprobe(kp.name().as_str(), 0)
+            .expect(format!("error on attach_kprobe to {}", kp.name()).as_str());
+    }
+
+    let big_struct =
+        LruHashMap::<i8, BigStructure>::new(loaded.map("BIG_STRUCT").expect("map not found"))
+            .expect("error on LruHashMap::new");
+    let pcpu_mem_alloc =
+        PerCpuHashMap::<usize, usize>::new(loaded.map("PCPU_MEM_ALLOC").expect("map not found"))
+            .expect("error on PerCpuHashMap::new");
+    let mem_alloc = HashMap::<usize, usize>::new(loaded.map("MEM_ALLOC").expect("map not found"))
+        .expect("error on HashMap::new");
+    println!("Hit Ctrl-C to quit");
+    loop {
+        tokio::select! {
+            _ = sleep(Duration::from_secs(1)) => {}
+            _ = ctrl_c() => break
+        }
+
+        let mut alloc_stats = mem_alloc.iter().collect::<Vec<(usize, usize)>>();
+        alloc_stats.sort();
+        println!("[allocation size upto XXX bytes] => [number of __kmalloc call]");
+
+        for (size, total_cnt) in alloc_stats {
+            let pcpu_vals = pcpu_mem_alloc.get(size).unwrap();
+            let exact_cnt: usize = pcpu_vals.iter().sum();
+            if total_cnt != exact_cnt {
+                println!(
+                    "{} => {} != {} (hashmap != pcpu hashmap)",
+                    size, total_cnt, exact_cnt
+                );
+            } else {
+                println!("{} => {}", size, total_cnt);
+            }
+        }
+    }
+
+    println!("");
+    println!("iterate over big structures!");
+    for (_, bigstruct) in big_struct.iter() {
+        println!("{:?}", bigstruct);
+    }
+}
+
+fn probe_code() -> &'static [u8] {
+    include_bytes!(concat!(
+        env!("OUT_DIR"),
+        "/target/bpf/programs/hashmaps/hashmaps.elf"
+    ))
+}
diff --git a/redbpf-probes/src/maps.rs b/redbpf-probes/src/maps.rs
index 1b35e7dd..b1e02484 100644
--- a/redbpf-probes/src/maps.rs
+++ b/redbpf-probes/src/maps.rs
@@ -15,7 +15,7 @@ kernel and user-space code.
 use core::convert::TryInto;
 use core::default::Default;
 use core::marker::PhantomData;
-use core::mem;
+use core::{mem, ptr};
 use cty::*;
 
 use crate::bindings::*;
@@ -47,6 +47,15 @@ macro_rules! define_hashmap {
                 }
             }
             /// Returns a reference to the value corresponding to the key.
+            ///
+            /// **CUATION** The value that the returned reference refers to is
+            /// stored at 8 bytes aligned memory. So the reference is not
+            /// guaranteed to be aligned properly if the alignment of the value
+            /// exceeds 8 bytes. So this method should not be called if the
+            /// alignment is greater than 8 bytes.
+            ///
+            /// Use `get_val` method instead if the alignment of value is
+            /// greater than 8 bytes.
             #[inline]
             pub fn get(&mut self, key: &K) -> Option<&V> {
                 unsafe {
@@ -62,6 +71,17 @@ macro_rules! define_hashmap {
                 }
             }
 
+            /// Returns a mutable reference to the value corresponding to the key.
+            ///
+            /// **CUATION** The value that the returned mutable reference
+            /// refers to is stored at 8 bytes aligned memory. So the mutable
+            /// reference is not guaranteed to be aligned properly if the
+            /// alignment of the value exceeds 8 bytes. So this method should
+            /// not be called if the alignment is greater than 8 bytes.
+            ///
+            /// Use `get_val` method instead if the alignment of value is
+            /// greater than 8 bytes. But you should call `set` method to
+            /// update the modified value to BPF maps.
             #[inline]
             pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
                 unsafe {
@@ -77,6 +97,30 @@ macro_rules! define_hashmap {
                 }
             }
 
+            /// Returns a value corresponding to the key
+            ///
+            /// **NOTE** It is better to use more efficient `get_mut` method
+            /// instead if the alignment of the value is equal to or less than
+            /// 8 bytes. i.e, alignment is 8, 4, 2 bytes or 1 byte. Rust
+            /// compiler expects that the value a reference refers to should be
+            /// aligned properly. But the Linux kernel does not guarantee the
+            /// alignment of the value the rust compiler assumes but the Linux
+            /// kernel just stores values at 8 bytes aligned memory.
+            #[inline]
+            pub fn get_val(&mut self, key: &K) -> Option<V> {
+                unsafe {
+                    let value = bpf_map_lookup_elem(
+                        &mut self.def as *mut _ as *mut c_void,
+                        key as *const _ as *const c_void,
+                    );
+                    if value.is_null() {
+                        None
+                    } else {
+                        Some(ptr::read_unaligned(value as *const V))
+                    }
+                }
+            }
+
             /// Set the `value` in the map for `key`
             #[inline]
             pub fn set(&mut self, key: &K, value: &V) {
@@ -176,17 +220,48 @@ macro_rules! define_array {
     };
 }
 define_hashmap!(
-    /// Hash table map.
+    /// Hash table map
     ///
-    /// High level API for BPF_MAP_TYPE_HASH maps.
+    /// High level API of BPF_MAP_TYPE_HASH maps for BPF programs.
     ///
-    /// For userspace API, see [`redbpf::HashMap`](../../redbpf/struct.HashMap.html)
+    /// If you are looking for userspace API, see
+    /// [`redbpf::HashMap`](../../redbpf/struct.HashMap.html) instead.
     HashMap,
     bpf_map_type_BPF_MAP_TYPE_HASH
 );
-// define_hashmap!(PerCpuHashMap, bpf_map_type_BPF_MAP_TYPE_PERCPU_HASH);  // userspace part is not implemented yet
-// define_hashmap!(LruHashMap, bpf_map_type_BPF_MAP_TYPE_LRU_HASH);  // userspace part is not implemented yet
-// define_hashmap!(LruPerCpuHashMap, bpf_map_type_BPF_MAP_TYPE_LRU_PERCPU_HASH);  // userspace part is not implemented yet
+define_hashmap!(
+    /// Per-cpu hash table map
+    ///
+    /// High level API of BPF_MAP_TYPE_PERCPU_HASH maps for BPF programs.
+    ///
+    /// If you are looking for userspace API, see
+    /// [`redbpf::PerCpuHashMap`](../../redbpf/struct.PerCpuHashMap.html)
+    /// instead.
+    PerCpuHashMap,
+    bpf_map_type_BPF_MAP_TYPE_PERCPU_HASH
+);
+define_hashmap!(
+    /// LRU hash table map
+    ///
+    /// High level API of BPF_MAP_TYPE_LRU_HASH maps for BPF programs.
+    ///
+    /// If you are looking for userspace API, see
+    /// [`redbpf::LruHashMap`](../../redbpf/struct.LruHashMap.html) instead.
+    LruHashMap,
+    bpf_map_type_BPF_MAP_TYPE_LRU_HASH
+);
+define_hashmap!(
+    /// LRU per-cpu hash table map
+    ///
+    /// High level API of BPF_MAP_TYPE_LRU_PERCPU_HASH maps for BPF programs.
+    ///
+    /// If you are looking for userspace API, see
+    /// [`redbpf::LruPerCpuHashMap`](../../redbpf/struct.LruPerCpuHashMap.html)
+    /// instead.
+    LruPerCpuHashMap,
+    bpf_map_type_BPF_MAP_TYPE_LRU_PERCPU_HASH
+);
+
 define_array!(
     /// BPF array map for BPF programs
     ///
diff --git a/redbpf/src/lib.rs b/redbpf/src/lib.rs
index 2d3889ee..9cfdd3f5 100644
--- a/redbpf/src/lib.rs
+++ b/redbpf/src/lib.rs
@@ -59,8 +59,10 @@ use bpf_sys::{
     bpf_attach_type_BPF_SK_SKB_STREAM_PARSER, bpf_attach_type_BPF_SK_SKB_STREAM_VERDICT,
     bpf_attach_type_BPF_TRACE_ITER, bpf_create_map_attr, bpf_create_map_xattr, bpf_insn,
     bpf_iter_create, bpf_link_create, bpf_load_program_xattr, bpf_map_def, bpf_map_info,
-    bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_PERCPU_ARRAY, bpf_prog_type,
-    BPF_ANY,
+    bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_HASH,
+    bpf_map_type_BPF_MAP_TYPE_LRU_HASH, bpf_map_type_BPF_MAP_TYPE_LRU_PERCPU_HASH,
+    bpf_map_type_BPF_MAP_TYPE_PERCPU_ARRAY, bpf_map_type_BPF_MAP_TYPE_PERCPU_HASH,
+    bpf_map_type_BPF_MAP_TYPE_PERF_EVENT_ARRAY, bpf_prog_type, BPF_ANY,
 };
 use goblin::elf::{reloc::RelocSection, section_header as hdr, Elf, SectionHeader, Sym};
 
@@ -236,6 +238,16 @@ pub struct TaskIter {
     link_fd: Option<RawFd>,
 }
 
+/// A base BPF map data structure
+///
+/// It is a base data structure that contains a map definition and auxiliary
+/// data. It just hods data but it does not provide any useful API to users.
+/// See [`HashMap`](./struct.HashMap.html),
+/// [`LruHashMap`](./struct.LruHashMap.html),
+/// [`PerCpuHashMap`](./struct.PerCpuHashMap.html),
+/// [`LruPerCpuHashMap`](./struct.LruPerCpuHashMap.html),
+/// [`Array`](./struct.Array.html), [`PerCpuArray`](./struct.PerCpuArray.html)
+/// that wrap `Map` to provide API of BPF maps to userspace programs.
 #[derive(Debug)]
 pub struct Map {
     pub name: String,
@@ -259,12 +271,51 @@ enum MapBuilder<'a> {
     ExistingMap(Map),
 }
 
+/// A BPF hash map structure
+///
+/// This provides higher level API for BPF maps whose type is
+/// `BPF_MAP_TYPE_HASH`
 pub struct HashMap<'a, K: Clone, V: Clone> {
     base: &'a Map,
     _k: PhantomData<K>,
     _v: PhantomData<V>,
 }
 
+/// A BPF LRU hash map structure
+///
+/// This provides higher level API for BPF maps whose type is
+/// `BPF_MAP_TYPE_LRU_HASH`
+pub struct LruHashMap<'a, K: Clone, V: Clone> {
+    base: &'a Map,
+    _k: PhantomData<K>,
+    _v: PhantomData<V>,
+}
+
+/// A per-cpu BPF hash map structure
+///
+/// This provides higher level API for BPF maps whose type is
+/// `BPF_MAP_TYPE_PERCPU_HASH`
+pub struct PerCpuHashMap<'a, K: Clone, V: Clone> {
+    base: &'a Map,
+    _k: PhantomData<K>,
+    _v: PhantomData<PerCpuValues<V>>,
+}
+
+/// An LRU per-cpu BPF hash map structure
+///
+/// This provides higher level API for BPF maps whose type is
+/// `BPF_MAP_TYPE_LRU_PERCPU_HASH`
+pub struct LruPerCpuHashMap<'a, K: Clone, V: Clone> {
+    base: &'a Map,
+    _k: PhantomData<K>,
+    _v: PhantomData<PerCpuValues<V>>,
+}
+
+/// A stacktrace BPF map structure
+///
+/// Stacktrace map provides a feature of getting an array of instruction
+/// pointers that are stored in the BPF map whose type is
+/// `BPF_MAP_TYPE_STACK_TRACE`.
 pub struct StackTrace<'a> {
     base: &'a Map,
 }
@@ -343,6 +394,11 @@ pub struct RelocationInfo {
     sym_idx: usize,
 }
 
+trait MapIterable<K: Clone, V: Clone> {
+    fn get(&self, key: K) -> Option<V>;
+    fn next_key(&self, key: Option<K>) -> Option<K>;
+}
+
 impl Program {
     #[allow(clippy::unnecessary_wraps)]
     fn new(kind: &str, name: &str, code: &[u8]) -> Result<Program> {
@@ -1371,7 +1427,11 @@ impl<'a> ModuleBuilder<'a> {
             if self.programs.contains_key(&rel.target_sec_idx) {
                 if let Err(_) = rel.apply(&mut self.programs, &maps, &symtab) {
                     // means that not normal case, we should rely on symbol value instead of section header index
-                    rel.apply_with_symmap(&mut self.programs, &symval_to_maps, &symtab)?;
+                    rel.apply_with_symmap(&mut self.programs, &symval_to_maps, &symtab)
+                        .map_err(|e| {
+                            error!("can not relocate map");
+                            e
+                        })?;
                 }
             }
         }
@@ -1792,9 +1852,11 @@ impl<'base, K: Clone, V: Clone> HashMap<'base, K, V> {
     pub fn new(base: &Map) -> Result<HashMap<K, V>> {
         if mem::size_of::<K>() != base.config.key_size as usize
             || mem::size_of::<V>() != base.config.value_size as usize
+            || (bpf_map_type_BPF_MAP_TYPE_HASH != base.config.type_
+                && bpf_map_type_BPF_MAP_TYPE_PERF_EVENT_ARRAY != base.config.type_)
         {
             error!(
-                "map definitions (sizes of key and value) of base `Map' and
+                "map definitions (map type and key/value size) of base `Map' and
             `HashMap' do not match"
             );
             return Err(Error::Map);
@@ -1807,46 +1869,211 @@ impl<'base, K: Clone, V: Clone> HashMap<'base, K, V> {
         })
     }
 
-    pub fn set(&self, mut key: K, mut value: V) {
-        unsafe {
-            bpf_sys::bpf_map_update_elem(
-                self.base.fd,
-                &mut key as *mut _ as *mut _,
-                &mut value as *mut _ as *mut _,
-                0,
+    pub fn set(&self, key: K, value: V) {
+        let _ = bpf_map_set(self.base.fd, key, value);
+    }
+
+    pub fn get(&self, key: K) -> Option<V> {
+        bpf_map_get(self.base.fd, key)
+    }
+
+    pub fn delete(&self, key: K) {
+        let _ = bpf_map_delete(self.base.fd, key);
+    }
+
+    /// Return an iterator over all items in the map
+    pub fn iter<'a>(&'a self) -> MapIter<'a, K, V> {
+        MapIter {
+            iterable: self,
+            last_key: None,
+        }
+    }
+}
+
+impl<K: Clone, V: Clone> MapIterable<K, V> for HashMap<'_, K, V> {
+    fn get(&self, key: K) -> Option<V> {
+        HashMap::get(self, key)
+    }
+
+    fn next_key(&self, key: Option<K>) -> Option<K> {
+        bpf_map_get_next_key(self.base.fd, key)
+    }
+}
+
+impl<'base, K: Clone, V: Clone> LruHashMap<'base, K, V> {
+    pub fn new(base: &Map) -> Result<LruHashMap<K, V>> {
+        if mem::size_of::<K>() != base.config.key_size as usize
+            || mem::size_of::<V>() != base.config.value_size as usize
+            || bpf_map_type_BPF_MAP_TYPE_LRU_HASH != base.config.type_
+        {
+            error!(
+                "map definitions (map type and key/value sizes) of base `Map' and `LruHashMap' do not match"
             );
+            return Err(Error::Map);
         }
+
+        Ok(LruHashMap {
+            base,
+            _k: PhantomData,
+            _v: PhantomData,
+        })
     }
 
-    pub fn get(&self, mut key: K) -> Option<V> {
-        let mut value = MaybeUninit::zeroed();
-        if unsafe {
-            bpf_sys::bpf_map_lookup_elem(
-                self.base.fd,
-                &mut key as *mut _ as *mut _,
-                &mut value as *mut _ as *mut _,
-            )
-        } < 0
+    pub fn set(&self, key: K, value: V) {
+        let _ = bpf_map_set(self.base.fd, key, value);
+    }
+
+    pub fn get(&self, key: K) -> Option<V> {
+        bpf_map_get(self.base.fd, key)
+    }
+
+    pub fn delete(&self, key: K) {
+        let _ = bpf_map_delete(self.base.fd, key);
+    }
+
+    /// Return an iterator over all items in the map
+    pub fn iter<'a>(&'a self) -> MapIter<'a, K, V> {
+        MapIter {
+            iterable: self,
+            last_key: None,
+        }
+    }
+}
+
+impl<K: Clone, V: Clone> MapIterable<K, V> for LruHashMap<'_, K, V> {
+    fn get(&self, key: K) -> Option<V> {
+        LruHashMap::<'_, K, V>::get(self, key)
+    }
+
+    fn next_key(&self, key: Option<K>) -> Option<K> {
+        bpf_map_get_next_key(self.base.fd, key)
+    }
+}
+
+impl<'base, K: Clone, V: Clone> PerCpuHashMap<'base, K, V> {
+    pub fn new(base: &Map) -> Result<PerCpuHashMap<K, V>> {
+        if mem::size_of::<K>() != base.config.key_size as usize
+            || mem::size_of::<V>() != base.config.value_size as usize
+            || bpf_map_type_BPF_MAP_TYPE_PERCPU_HASH != base.config.type_
         {
-            return None;
+            error!("map definitions (size of key/value and map type) of base `Map' and `PerCpuHashMap' do not match");
+            return Err(Error::Map);
         }
-        Some(unsafe { value.assume_init() })
+
+        Ok(PerCpuHashMap {
+            base,
+            _k: PhantomData,
+            _v: PhantomData,
+        })
     }
 
-    pub fn delete(&self, mut key: K) {
-        unsafe {
-            bpf_sys::bpf_map_delete_elem(self.base.fd, &mut key as *mut _ as *mut _);
+    /// Set per-cpu `values` to the BPF map at `key`
+    ///
+    /// The number of elements in `values` should be equal to the number of
+    /// possible CPUs. This requirement is automatically fulfilled when
+    /// `values` is created by
+    /// [`PerCpuValues::new`](./struct.PerCpuValues.html#method.new)
+    ///
+    /// `Err` can be returned if the number of elements is wrong or underlying
+    /// bpf_map_update_elem function returns a negative value.
+    pub fn set(&self, key: K, values: PerCpuValues<V>) -> Result<()> {
+        bpf_percpu_map_set(self.base.fd, key, values)
+    }
+
+    /// Get per-cpu values corresponding to the `key` from the BPF map
+    ///
+    /// If `key` is found, `Some([PerCpuValues](./struct.PerCpuValues.html))`
+    /// is returned.
+    pub fn get(&self, key: K) -> Option<PerCpuValues<V>> {
+        bpf_percpu_map_get(self.base.fd, key)
+    }
+
+    /// Delete `key` from the BPF map
+    pub fn delete(&self, key: K) {
+        let _ = bpf_map_delete(self.base.fd, key);
+    }
+
+    /// Return an iterator over all items in the map
+    pub fn iter<'a>(&'a self) -> MapIter<'a, K, PerCpuValues<V>> {
+        MapIter {
+            iterable: self,
+            last_key: None,
+        }
+    }
+}
+
+impl<K: Clone, V: Clone> MapIterable<K, PerCpuValues<V>> for PerCpuHashMap<'_, K, V> {
+    fn get(&self, key: K) -> Option<PerCpuValues<V>> {
+        PerCpuHashMap::get(self, key)
+    }
+
+    fn next_key(&self, key: Option<K>) -> Option<K> {
+        bpf_map_get_next_key(self.base.fd, key)
+    }
+}
+
+impl<'base, K: Clone, V: Clone> LruPerCpuHashMap<'base, K, V> {
+    pub fn new(base: &Map) -> Result<LruPerCpuHashMap<K, V>> {
+        if mem::size_of::<K>() != base.config.key_size as usize
+            || mem::size_of::<V>() != base.config.value_size as usize
+            || bpf_map_type_BPF_MAP_TYPE_LRU_PERCPU_HASH != base.config.type_
+        {
+            error!("map definitions (size of key/value and map type) of base `Map' and `LruPerCpuHashMap' do not match");
+            return Err(Error::Map);
         }
+
+        Ok(LruPerCpuHashMap {
+            base,
+            _k: PhantomData,
+            _v: PhantomData,
+        })
     }
 
-    pub fn iter<'a>(&'a self) -> MapIter<'a, '_, K, V> {
+    /// Set per-cpu `values` to the BPF map at `key`
+    ///
+    /// The number of elements in `values` should be equal to the number of
+    /// possible CPUs. This requirement is automatically fulfilled when
+    /// `values` is created by
+    /// [`PerCpuValues::new`](./struct.PerCpuValues.html#method.new)
+    ///
+    /// `Err` can be returned if the number of elements is wrong or underlying
+    /// bpf_map_update_elem function returns a negative value.
+    pub fn set(&self, key: K, values: PerCpuValues<V>) -> Result<()> {
+        bpf_percpu_map_set(self.base.fd, key, values)
+    }
+
+    /// Get per-cpu values corresponding to the `key` from the BPF map
+    ///
+    /// If `key` is found, `Some([PerCpuValues](./struct.PerCpuValues.html))`
+    /// is returned.
+    pub fn get(&self, key: K) -> Option<PerCpuValues<V>> {
+        bpf_percpu_map_get(self.base.fd, key)
+    }
+
+    /// Delete `key` from the BPF map
+    pub fn delete(&self, key: K) {
+        let _ = bpf_map_delete(self.base.fd, key);
+    }
+
+    /// Return an iterator over all items in the map
+    pub fn iter<'a>(&'a self) -> MapIter<'a, K, PerCpuValues<V>> {
         MapIter {
-            map: self,
-            key: None,
+            iterable: self,
+            last_key: None,
         }
     }
 }
 
+impl<K: Clone, V: Clone> MapIterable<K, PerCpuValues<V>> for LruPerCpuHashMap<'_, K, V> {
+    fn get(&self, key: K) -> Option<PerCpuValues<V>> {
+        LruPerCpuHashMap::get(self, key)
+    }
+
+    fn next_key(&self, key: Option<K>) -> Option<K> {
+        bpf_map_get_next_key(self.base.fd, key)
+    }
+}
+
 impl<'base, T: Clone> Array<'base, T> {
     /// Create `Array` map from `base`
     pub fn new(base: &Map) -> Result<Array<T>> {
@@ -1924,12 +2151,14 @@ fn round_up<T>(unit_size: usize) -> usize {
 /// the same with [`cpus::get_possible_num`](./cpus/fn.get_possible_num.html).
 /// It also implements `Deref` and `DerefMut` so it can be used as a normal
 /// array.
+///
 /// # Example
 /// ```no_run
 /// use redbpf::PerCpuValues;
 /// let mut values = PerCpuValues::<u64>::new(0);
 /// values[0] = 1;
 /// ```
+#[derive(Clone, Debug)]
 pub struct PerCpuValues<T: Clone>(Box<[T]>);
 
 impl<T: Clone> PerCpuValues<T> {
@@ -1940,12 +2169,19 @@ impl<T: Clone> PerCpuValues<T> {
     pub fn new(default_value: T) -> Self {
         let count = cpus::get_possible_num();
         let v = vec![default_value; count];
-        Self(v.into_boxed_slice())
+        Self(v.into())
     }
+}
 
-    // This is called by `get` methods of per-cpu map structures
-    fn from_boxed_slice(v: Box<[T]>) -> Self {
-        Self(v)
+impl<T: Clone> From<Box<[T]>> for PerCpuValues<T> {
+    fn from(values: Box<[T]>) -> Self {
+        Self(values)
+    }
+}
+
+impl<T: Clone> From<Vec<T>> for PerCpuValues<T> {
+    fn from(values: Vec<T>) -> Self {
+        Self::from(values.into_boxed_slice())
     }
 }
 
@@ -2054,7 +2290,7 @@ impl<'base, T: Clone> PerCpuArray<'base, T> {
             }
         }
 
-        Some(PerCpuValues::from_boxed_slice(values.into_boxed_slice()))
+        Some(values.into())
     }
 
     /// Get length of array map
@@ -2129,51 +2365,21 @@ impl<'base> ProgramArray<'base> {
     }
 }
 
-pub struct MapIter<'a, 'b, K: Clone, V: Clone> {
-    map: &'a HashMap<'b, K, V>,
-    key: Option<K>,
+pub struct MapIter<'a, K: Clone, V: Clone> {
+    iterable: &'a dyn MapIterable<K, V>,
+    last_key: Option<K>,
 }
 
-impl<K: Clone, V: Clone> Iterator for MapIter<'_, '_, K, V> {
+impl<K: Clone, V: Clone> Iterator for MapIter<'_, K, V> {
     type Item = (K, V);
 
     fn next(&mut self) -> Option<Self::Item> {
-        let key = self.key.take();
-        self.key = match key {
-            Some(mut key) => {
-                let mut next_key = MaybeUninit::<K>::zeroed();
-                let ret = unsafe {
-                    bpf_sys::bpf_map_get_next_key(
-                        self.map.base.fd,
-                        &mut key as *mut _ as *mut _,
-                        &mut next_key as *mut _ as *mut _,
-                    )
-                };
-                if ret < 0 {
-                    None
-                } else {
-                    Some(unsafe { next_key.assume_init() })
-                }
-            }
-            None => {
-                let mut key = MaybeUninit::<K>::zeroed();
-                if unsafe {
-                    bpf_sys::bpf_map_get_next_key(
-                        self.map.base.fd,
-                        ptr::null(),
-                        &mut key as *mut _ as *mut _,
-                    )
-                } < 0
-                {
-                    None
-                } else {
-                    Some(unsafe { key.assume_init() })
-                }
-            }
-        };
-
-        let key = self.key.as_ref()?.clone();
-        Some((key.clone(), self.map.get(key).unwrap()))
+        let key = self.last_key.take();
+        self.last_key = self.iterable.next_key(key);
+        Some((
+            self.last_key.as_ref()?.clone(),
+            self.iterable.get(self.last_key.as_ref()?.clone())?,
+        ))
     }
 }
 
@@ -2434,3 +2640,130 @@ fn data<'d>(bytes: &'d [u8], shdr: &SectionHeader) -> &'d [u8] {
 
     &bytes[offset..end]
 }
+
+fn bpf_map_set<K: Clone, V: Clone>(fd: RawFd, mut key: K, mut value: V) -> Result<()> {
+    if unsafe {
+        bpf_sys::bpf_map_update_elem(
+            fd,
+            &mut key as *mut _ as *mut _,
+            &mut value as *mut _ as *mut _,
+            0,
+        )
+    } < 0
+    {
+        Err(Error::Map)
+    } else {
+        Ok(())
+    }
+}
+
+fn bpf_map_get<K: Clone, V: Clone>(fd: RawFd, mut key: K) -> Option<V> {
+    let mut value = MaybeUninit::zeroed();
+    if unsafe {
+        bpf_sys::bpf_map_lookup_elem(
+            fd,
+            &mut key as *mut _ as *mut _,
+            &mut value as *mut _ as *mut _,
+        )
+    } < 0
+    {
+        return None;
+    }
+    Some(unsafe { value.assume_init() })
+}
+
+fn bpf_map_delete<K: Clone>(fd: RawFd, mut key: K) -> Result<()> {
+    if unsafe { bpf_sys::bpf_map_delete_elem(fd, &mut key as *mut _ as *mut _) } < 0 {
+        Err(Error::Map)
+    } else {
+        Ok(())
+    }
+}
+
+fn bpf_map_get_next_key<K: Clone>(fd: RawFd, key: Option<K>) -> Option<K> {
+    if let Some(mut key) = key {
+        let mut next_key = MaybeUninit::<K>::zeroed();
+        let ret = unsafe {
+            bpf_sys::bpf_map_get_next_key(
+                fd,
+                &mut key as *mut _ as *mut _,
+                &mut next_key as *mut _ as *mut _,
+            )
+        };
+        if ret < 0 {
+            None
+        } else {
+            Some(unsafe { next_key.assume_init() })
+        }
+    } else {
+        let mut key = MaybeUninit::<K>::zeroed();
+        if unsafe { bpf_sys::bpf_map_get_next_key(fd, ptr::null(), &mut key as *mut _ as *mut _) }
+            < 0
+        {
+            None
+        } else {
+            Some(unsafe { key.assume_init() })
+        }
+    }
+}
+
+fn bpf_percpu_map_set<K: Clone, V: Clone>(
+    fd: RawFd,
+    mut key: K,
+    values: PerCpuValues<V>,
+) -> Result<()> {
+    let count = cpus::get_possible_num();
+    if values.len() != count {
+        return Err(Error::Map);
+    }
+
+    // It is needed to round up the value size to 8*N bytes
+    // cf., https://elixir.bootlin.com/linux/v5.8/source/kernel/bpf/syscall.c#L1103
+    let value_size = round_up::<V>(8);
+    let alloc_size = value_size * count;
+    let mut alloc = vec![0u8; alloc_size];
+    let mut data = alloc.as_mut_ptr();
+    for i in 0..count {
+        unsafe {
+            let dst_ptr = data.add(value_size * i) as *const V as *mut V;
+            ptr::write_unaligned::<V>(dst_ptr, values[i].clone());
+        }
+    }
+    if unsafe {
+        bpf_sys::bpf_map_update_elem(
+            fd,
+            &mut key as *mut _ as *mut _,
+            &mut data as *mut _ as *mut _,
+            0,
+        )
+    } < 0
+    {
+        Err(Error::Map)
+    } else {
+        Ok(())
+    }
+}
+
+fn bpf_percpu_map_get<K: Clone, V: Clone>(fd: RawFd, mut key: K) -> Option<PerCpuValues<V>> {
+    // It is needed to round up the value size to 8*N
+    // cf., https://elixir.bootlin.com/linux/v5.8/source/kernel/bpf/syscall.c#L1035
+    let value_size = round_up::<V>(8);
+    let count = cpus::get_possible_num();
+    let alloc_size = value_size * count;
+    let mut alloc = vec![0u8; alloc_size];
+    let data = alloc.as_mut_ptr();
+    if unsafe { bpf_sys::bpf_map_lookup_elem(fd, &mut key as *mut _ as *mut _, data as *mut _) } < 0
+    {
+        return None;
+    }
+
+    let mut values = Vec::with_capacity(count);
+    for i in 0..count {
+        unsafe {
+            let elem_ptr = data.add(value_size * i) as *const V;
+            values.push(ptr::read_unaligned(elem_ptr));
+        }
+    }
+
+    Some(values.into())
+}