001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.kahadb.util;
018
019 import java.util.Collection;
020 import java.util.HashMap;
021 import java.util.Iterator;
022 import java.util.LinkedHashSet;
023 import java.util.Map;
024 import java.util.Set;
025
026 /**
027 * LFU cache implementation based on http://dhruvbird.com/lfu.pdf, with some notable differences:
028 * <ul>
029 * <li>
030 * Frequency list is stored as an array with no next/prev pointers between nodes: looping over the array should be faster and more CPU-cache friendly than
031 * using an ad-hoc linked-pointers structure.
032 * </li>
033 * <li>
034 * The max frequency is capped at the cache size to avoid creating more and more frequency list entries, and all elements residing in the max frequency entry
035 * are re-positioned in the frequency entry linked set in order to put most recently accessed elements ahead of less recently ones,
036 * which will be collected sooner.
037 * </li>
038 * <li>
039 * The eviction factor determines how many elements (more specifically, the percentage of) will be evicted.
040 * </li>
041 * </ul>
042 * As a consequence, this cache runs in *amortized* O(1) time (considering the worst case of having the lowest frequency at 0 and having to evict all
043 * elements).
044 *
045 * @author Sergio Bossa
046 */
047 public class LFUCache<Key, Value> implements Map<Key, Value> {
048
049 private final Map<Key, CacheNode<Key, Value>> cache;
050 private final LinkedHashSet[] frequencyList;
051 private int lowestFrequency;
052 private int maxFrequency;
053 //
054 private final int maxCacheSize;
055 private final float evictionFactor;
056
057 public LFUCache(int maxCacheSize, float evictionFactor) {
058 if (evictionFactor <= 0 || evictionFactor >= 1) {
059 throw new IllegalArgumentException("Eviction factor must be greater than 0 and lesser than or equal to 1");
060 }
061 this.cache = new HashMap<Key, CacheNode<Key, Value>>(maxCacheSize);
062 this.frequencyList = new LinkedHashSet[maxCacheSize];
063 this.lowestFrequency = 0;
064 this.maxFrequency = maxCacheSize - 1;
065 this.maxCacheSize = maxCacheSize;
066 this.evictionFactor = evictionFactor;
067 initFrequencyList();
068 }
069
070 public Value put(Key k, Value v) {
071 Value oldValue = null;
072 CacheNode<Key, Value> currentNode = cache.get(k);
073 if (currentNode == null) {
074 if (cache.size() == maxCacheSize) {
075 doEviction();
076 }
077 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[0];
078 currentNode = new CacheNode(k, v, 0);
079 nodes.add(currentNode);
080 cache.put(k, currentNode);
081 lowestFrequency = 0;
082 } else {
083 oldValue = currentNode.v;
084 currentNode.v = v;
085 }
086 return oldValue;
087 }
088
089
090 public void putAll(Map<? extends Key, ? extends Value> map) {
091 for (Map.Entry<? extends Key, ? extends Value> me : map.entrySet()) {
092 put(me.getKey(), me.getValue());
093 }
094 }
095
096 public Value get(Object k) {
097 CacheNode<Key, Value> currentNode = cache.get(k);
098 if (currentNode != null) {
099 int currentFrequency = currentNode.frequency;
100 if (currentFrequency < maxFrequency) {
101 int nextFrequency = currentFrequency + 1;
102 LinkedHashSet<CacheNode<Key, Value>> currentNodes = frequencyList[currentFrequency];
103 LinkedHashSet<CacheNode<Key, Value>> newNodes = frequencyList[nextFrequency];
104 moveToNextFrequency(currentNode, nextFrequency, currentNodes, newNodes);
105 cache.put((Key) k, currentNode);
106 if (lowestFrequency == currentFrequency && currentNodes.isEmpty()) {
107 lowestFrequency = nextFrequency;
108 }
109 } else {
110 // Hybrid with LRU: put most recently accessed ahead of others:
111 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[currentFrequency];
112 nodes.remove(currentNode);
113 nodes.add(currentNode);
114 }
115 return currentNode.v;
116 } else {
117 return null;
118 }
119 }
120
121 public Value remove(Object k) {
122 CacheNode<Key, Value> currentNode = cache.remove(k);
123 if (currentNode != null) {
124 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[currentNode.frequency];
125 nodes.remove(currentNode);
126 if (lowestFrequency == currentNode.frequency) {
127 findNextLowestFrequency();
128 }
129 return currentNode.v;
130 } else {
131 return null;
132 }
133 }
134
135 public int frequencyOf(Key k) {
136 CacheNode<Key, Value> node = cache.get(k);
137 if (node != null) {
138 return node.frequency + 1;
139 } else {
140 return 0;
141 }
142 }
143
144 public void clear() {
145 for (int i = 0; i <= maxFrequency; i++) {
146 frequencyList[i].clear();
147 }
148 cache.clear();
149 lowestFrequency = 0;
150 }
151
152 public Set<Key> keySet() {
153 return this.cache.keySet();
154 }
155
156 public Collection<Value> values() {
157 return null; //To change body of implemented methods use File | Settings | File Templates.
158 }
159
160 public Set<Entry<Key, Value>> entrySet() {
161 return null; //To change body of implemented methods use File | Settings | File Templates.
162 }
163
164 public int size() {
165 return cache.size();
166 }
167
168 public boolean isEmpty() {
169 return this.cache.isEmpty();
170 }
171
172 public boolean containsKey(Object o) {
173 return this.cache.containsKey(o);
174 }
175
176 public boolean containsValue(Object o) {
177 return false; //To change body of implemented methods use File | Settings | File Templates.
178 }
179
180
181 private void initFrequencyList() {
182 for (int i = 0; i <= maxFrequency; i++) {
183 frequencyList[i] = new LinkedHashSet<CacheNode<Key, Value>>();
184 }
185 }
186
187 private void doEviction() {
188 int currentlyDeleted = 0;
189 float target = maxCacheSize * evictionFactor;
190 while (currentlyDeleted < target) {
191 LinkedHashSet<CacheNode<Key, Value>> nodes = frequencyList[lowestFrequency];
192 if (nodes.isEmpty()) {
193 throw new IllegalStateException("Lowest frequency constraint violated!");
194 } else {
195 Iterator<CacheNode<Key, Value>> it = nodes.iterator();
196 while (it.hasNext() && currentlyDeleted++ < target) {
197 CacheNode<Key, Value> node = it.next();
198 it.remove();
199 cache.remove(node.k);
200 }
201 if (!it.hasNext()) {
202 findNextLowestFrequency();
203 }
204 }
205 }
206 }
207
208 private void moveToNextFrequency(CacheNode<Key, Value> currentNode, int nextFrequency, LinkedHashSet<CacheNode<Key, Value>> currentNodes, LinkedHashSet<CacheNode<Key, Value>> newNodes) {
209 currentNodes.remove(currentNode);
210 newNodes.add(currentNode);
211 currentNode.frequency = nextFrequency;
212 }
213
214 private void findNextLowestFrequency() {
215 while (lowestFrequency <= maxFrequency && frequencyList[lowestFrequency].isEmpty()) {
216 lowestFrequency++;
217 }
218 if (lowestFrequency > maxFrequency) {
219 lowestFrequency = 0;
220 }
221 }
222
223 private static class CacheNode<Key, Value> {
224
225 public final Key k;
226 public Value v;
227 public int frequency;
228
229 public CacheNode(Key k, Value v, int frequency) {
230 this.k = k;
231 this.v = v;
232 this.frequency = frequency;
233 }
234
235 }
236 }