Interview Algorithms and System Design: Bloom Filter, TopK, Median, and Concurrency Implementations
The article presents a suite of interview‑style algorithm and system‑design solutions—including Bloom‑filter URL blacklists, hash‑partitioned word frequencies, missing‑number bit arrays, top‑K min‑heap, low‑memory median, short‑URL encoding, Redis user counting, and extensive Java implementations of sorting, singleton, LRU cache, custom thread pools, producer‑consumer models and various FooBar synchronization techniques.
This article presents a collection of interview‑style algorithm and system‑design problems commonly encountered in large‑scale backend development. It covers practical solutions for URL blacklists using Bloom filters, word‑frequency statistics with hash partitioning, finding missing numbers via bit arrays, detecting duplicate URLs, top‑K search with min‑heap, median calculation under strict memory limits, short‑URL services, comment storage with message queues, online user counting using Redis, and popular string extraction using hash maps or tries.
It also discusses red‑packet distribution algorithms, and provides complete Java implementations for several fundamental data‑structure and concurrency utilities, including quicksort, merge sort, heap sort, singleton pattern, LRU cache, custom thread pool, producer‑consumer model, blocking queue, alternating character printing, and various FooBar synchronization techniques.
Bloom Filter for URL Blacklist
A Bloom filter is described as a long binary vector with multiple hash functions, allowing membership queries with minimal memory consumption.
Word Frequency (File Partitioning)
Large integer datasets are partitioned into smaller files using hash functions to enable frequency counting within memory constraints.
Missing Number via Bit Array
A 40‑billion‑range bit array (≈0.5 GB) is used to record presence of numbers and identify a missing value.
Duplicate URL Detection
Hash‑based partitioning across machines or files is suggested to locate duplicate URLs in a 10‑billion‑URL dataset.
Top‑K Search (Min‑Heap)
After hash partitioning, each sub‑file builds a frequency map and a size‑100 min‑heap to extract the global top‑100 terms.
Median under 10 MB Memory
Iterative binary‑digit partitioning of the dataset files reduces the search space until the median can be found in memory.
Short URL System Design
Uses an auto‑incrementing counter converted to base‑62, stored in a KV store (e.g., Redis) for redirection.
Comment Storage with Message Queue
Front‑end writes comments directly, while a message queue asynchronously persists them; read side uses read‑write splitting and caching.
Online/Concurrent User Count (Redis)
Stores user IDs in a sorted set with timestamps, queries the count for the last minute, and removes expired entries.
Popular String Extraction (HashMap / Trie)
Counts string occurrences using a HashMap (≈777 MB for 3 million unique strings) or a prefix‑tree when many strings share prefixes, then selects the top‑10 via a min‑heap.
Red‑Packet Algorithms
Describes linear cut and double‑average methods for random distribution.
Java Code Implementations
public class QuickSort {
public static void swap(int[] arr, int i, int j) {
int tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
/* 常规快排 */
public static void quickSort1(int[] arr, int L , int R) {
if (L > R) return;
int M = partition(arr, L, R);
quickSort1(arr, L, M - 1);
quickSort1(arr, M + 1, R);
}
public static int partition(int[] arr, int L, int R) {
if (L > R) return -1;
if (L == R) return L;
int lessEqual = L - 1;
int index = L;
while (index < R) {
if (arr[index] <= arr[R])
swap(arr, index, ++lessEqual);
index++;
}
swap(arr, ++lessEqual, R);
return lessEqual;
}
/* 荷兰国旗 */
public static void quickSort2(int[] arr, int L, int R) {
if (L > R) return;
int[] equalArea = netherlandsFlag(arr, L, R);
quickSort2(arr, L, equalArea[0] - 1);
quickSort2(arr, equalArea[1] + 1, R);
}
public static int[] netherlandsFlag(int[] arr, int L, int R) {
if (L > R) return new int[] { -1, -1 };
if (L == R) return new int[] { L, R };
int less = L - 1;
int more = R;
int index = L;
while (index < more) {
if (arr[index] == arr[R]) {
index++;
} else if (arr[index] < arr[R]) {
swap(arr, index++, ++less);
} else {
swap(arr, index, --more);
}
}
swap(arr, more, R);
return new int[] { less + 1, more };
}
// for test
public static void main(String[] args) {
int testTime = 1;
int maxSize = 10000000;
int maxValue = 100000;
boolean succeed = true;
long T1=0,T2=0;
for (int i = 0; i < testTime; i++) {
int[] arr1 = generateRandomArray(maxSize, maxValue);
int[] arr2 = copyArray(arr1);
int[] arr3 = copyArray(arr1);
long t1 = System.currentTimeMillis();
quickSort1(arr1,0,arr1.length-1);
long t2 = System.currentTimeMillis();
quickSort2(arr2,0,arr2.length-1);
long t3 = System.currentTimeMillis();
T1 += (t2-t1);
T2 += (t3-t2);
if (!isEqual(arr1, arr2) || !isEqual(arr2, arr3)) {
succeed = false;
break;
}
}
System.out.println(T1+" "+T2);
}
private static int[] generateRandomArray(int maxSize, int maxValue) {
int[] arr = new int[(int) ((maxSize + 1) * Math.random())];
for (int i = 0; i < arr.length; i++) {
arr[i] = (int) ((maxValue + 1) * Math.random())
- (int) (maxValue * Math.random());
}
return arr;
}
private static int[] copyArray(int[] arr) {
if (arr == null) return null;
int[] res = new int[arr.length];
for (int i = 0; i < arr.length; i++) {
res[i] = arr[i];
}
return res;
}
private static boolean isEqual(int[] arr1, int[] arr2) {
if ((arr1 == null && arr2 != null) || (arr1 != null && arr2 == null))
return false;
if (arr1 == null && arr2 == null)
return true;
if (arr1.length != arr2.length)
return false;
for (int i = 0; i < arr1.length; i++)
if (arr1[i] != arr2[i])
return false;
return true;
}
private static void printArray(int[] arr) {
if (arr == null)
return;
for (int i = 0; i < arr.length; i++)
System.out.print(arr[i] + " ");
System.out.println();
}
} public static void merge(int[] arr, int L, int M, int R) {
int[] help = new int[R - L + 1];
int i = 0;
int p1 = L;
int p2 = M + 1;
while (p1 <= M && p2 <= R)
help[i++] = arr[p1] <= arr[p2] ? arr[p1++] : arr[p2++];
while (p1 <= M)
help[i++] = arr[p1++];
while (p2 <= R)
help[i++] = arr[p2++];
for (i = 0; i < help.length; i++)
arr[L + i] = help[i];
}
public static void mergeSort(int[] arr, int L, int R) {
if (L == R)
return;
int mid = L + ((R - L) >> 1);
process(arr, L, mid);
process(arr, mid + 1, R);
merge(arr, L, mid, R);
}
public static void main(String[] args) {
int[] arr1 = {9,8,7,6,5,4,3,2,1};
mergeSort(arr, 0, arr.length - 1);
printArray(arr);
} // 堆排序额外空间复杂度O(1)
public static void heapSort(int[] arr) {
if (arr == null || arr.length < 2)
return;
for (int i = arr.length - 1; i >= 0; i--)
heapify(arr, i, arr.length);
int heapSize = arr.length;
swap(arr, 0, --heapSize);
// O(N*logN)
while (heapSize > 0) { // O(N)
heapify(arr, 0, heapSize); // O(logN)
swap(arr, 0, --heapSize); // O(1)
}
}
// arr[index]刚来的数,往上
public static void heapInsert(int[] arr, int index) {
while (arr[index] > arr[(index - 1) / 2]) {
swap(arr, index, (index - 1) / 2);
index = (index - 1) / 2;
}
}
// arr[index]位置的数,能否往下移动
public static void heapify(int[] arr, int index, int heapSize) {
int left = index * 2 + 1; // 左孩子的下标
while (left < heapSize) { // 下方还有孩子的时候
// 两个孩子中,谁的值大,把下标给largest
// 1)只有左孩子,left -> largest
// 2) 同时有左孩子和右孩子,右孩子的值<= 左孩子的值,left -> largest
// 3) 同时有左孩子和右孩子并且右孩子的值> 左孩子的值, right -> largest
int largest = left+1 < heapSize && arr[left+1]> arr[left] ? left+1 : left;
// 父和较大的孩子之间,谁的值大,把下标给largest
largest = arr[largest] > arr[index] ? largest : index;
if (largest == index)
break;
swap(arr, largest, index);
index = largest;
left = index * 2 + 1;
}
}
public static void swap(int[] arr, int i, int j) {
int tmp = arr[i];
arr[i] = arr[j];
arr[j] = tmp;
}
public static void main(String[] args) {
int[] arr1 = {9,8,7,6,5,4,3,2,1};
heapSort(arr1);
printArray(arr1);
} public class Singleton {
private volatile static Singleton singleton;
private Singleton() {}
public static Singleton getSingleton() {
if (singleton == null) {
synchronized (Singleton.class) {
if (singleton == null) {
singleton = new Singleton();
}
}
}
return singleton;
}
} public class LRUCache {
private LinkedHashMap<Integer,Integer> cache;
private int capacity; //容量大小
public LRUCache(int capacity) {
cache = new LinkedHashMap<>(capacity);
this.capacity = capacity;
}
public int get(int key) {
if(!cache.containsKey(key)) {
return -1;
}
int res = cache.get(key);
cache.remove(key);
cache.put(key,res);
return res;
}
public void put(int key,int value) {
if(cache.containsKey(key)) {
cache.remove(key);
}
if(capacity == cache.size()) {
Set<Integer> keySet = cache.keySet();
Iterator<Integer> iterator = keySet.iterator();
cache.remove(iterator.next());
}
cache.put(key,value);
}
} class LRUCache {
class DNode {
DNode prev;
DNode next;
int val;
int key;
}
Map<Integer, DNode> map = new HashMap<>();
DNode head, tail;
int cap;
public LRUCache(int capacity) {
head = new DNode();
tail = new DNode();
head.next = tail;
tail.prev = head;
cap = capacity;
}
public int get(int key) {
if (map.containsKey(key)) {
DNode node = map.get(key);
removeNode(node);
addToHead(node);
return node.val;
} else {
return -1;
}
}
public void put(int key, int value) {
if (map.containsKey(key)) {
DNode node = map.get(key);
node.val = value;
removeNode(node);
addToHead(node);
} else {
DNode newNode = new DNode();
newNode.val = value;
newNode.key = key;
addToHead(newNode);
map.put(key, newNode);
if (map.size() > cap) {
map.remove(tail.prev.key);
removeNode(tail.prev);
}
}
}
public void removeNode(DNode node) {
DNode prevNode = node.prev;
DNode nextNode = node.next;
prevNode.next = nextNode;
nextNode.prev = prevNode;
}
public void addToHead(DNode node) {
DNode firstNode = head.next;
head.next = node;
node.prev = head;
node.next = firstNode;
firstNode.prev = node;
}
} package com.concurrent.pool;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class MySelfThreadPool {
private static final int WORK_NUM = 5;
private static final int TASK_NUM = 100;
private int workNum;
private int taskNum;
private final Set<WorkThread> workThreads;
private final BlockingQueue<Runnable> taskQueue;
public MySelfThreadPool() {
this(WORK_NUM, TASK_NUM);
}
public MySelfThreadPool(int workNum, int taskNum) {
if (workNum <= 0) workNum = WORK_NUM;
if (taskNum <= 0) taskNum = TASK_NUM;
taskQueue = new ArrayBlockingQueue<>(taskNum);
this.workNum = workNum;
this.taskNum = taskNum;
workThreads = new HashSet<>();
for (int i=0;i<workNum;i++) {
WorkThread workThread = new WorkThread("thead_"+i);
workThread.start();
workThreads.add(workThread);
}
}
public void execute(Runnable task) {
try {
taskQueue.put(task);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
public void destroy() {
System.out.println("ready close thread pool...");
if (workThreads == null || workThreads.isEmpty()) return ;
for (WorkThread workThread : workThreads) {
workThread.stopWork();
workThread = null;
}
workThreads.clear();
}
private class WorkThread extends Thread{
public WorkThread(String name) {
super();
setName(name);
}
@Override
public void run() {
while (!interrupted()) {
try {
Runnable runnable = taskQueue.take();
if (runnable !=null) {
System.out.println(getName()+" readyexecute:"+runnable.toString());
runnable.run();
}
runnable = null;
} catch (Exception e) {
interrupt();
e.printStackTrace();
}
}
}
public void stopWork() {
interrupt();
}
}
} public class TestMySelfThreadPool {
private static final int TASK_NUM = 50;
public static void main(String[] args) {
MySelfThreadPool myPool = new MySelfThreadPool(3,50);
for (int i=0;i<TASK_NUM;i++) {
myPool.execute(new MyTask("task_"+i));
}
}
static class MyTask implements Runnable{
private String name;
public MyTask(String name) {
this.name = name;
}
public String getName() { return name; }
public void setName(String name) { this.name = name; }
@Override
public void run() {
try { Thread.sleep(1000); } catch (InterruptedException e) { e.printStackTrace(); }
System.out.println("task :"+name+" end...");
}
@Override
public String toString() { return "name = "+name; }
}
} public class Storage {
private static int MAX_VALUE = 100;
private List<Object> list = new ArrayList<>();
public void produce(int num) {
synchronized (list) {
while (list.size() + num > MAX_VALUE) {
System.out.println("暂时不能执行生产任务");
try { list.wait(); } catch (InterruptedException e) { e.printStackTrace(); }
}
for (int i = 0; i < num; i++) { list.add(new Object()); }
System.out.println("已生产产品数"+num+" 仓库容量"+list.size());
list.notifyAll();
}
}
public void consume(int num) {
synchronized (list) {
while (list.size() < num) {
System.out.println("暂时不能执行消费任务");
try { list.wait(); } catch (InterruptedException e) { e.printStackTrace(); }
}
for (int i = 0; i < num; i++) { list.remove(0); }
System.out.println("已消费产品数"+num+" 仓库容量" + list.size());
list.notifyAll();
}
}
} public class FooBar {
private int n;
private BlockingQueue<Integer> bar = new LinkedBlockingQueue<>(1);
private BlockingQueue<Integer> foo = new LinkedBlockingQueue<>(1);
public FooBar(int n) { this.n = n; }
public void foo(Runnable printFoo) throws InterruptedException {
for (int i = 0; i < n; i++) {
foo.put(i);
printFoo.run();
bar.put(i);
}
}
public void bar(Runnable printBar) throws InterruptedException {
for (int i = 0; i < n; i++) {
bar.take();
printBar.run();
foo.take();
}
}
}... (additional FooBar variants using CyclicBarrier, Semaphore, synchronized, ReentrantLock, etc.) ...
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
