Covers generators, iterators, and the iterator protocol in depth. Also see Real Python on Generators for more examples.
In Python, anything you can loop over with for is an iterable. Under the hood, Python calls two dunder methods on it.
// Custom iterable in JS
class Range {
constructor(start, end) {
this.start = start;
this.end = end;
}
[Symbol.iterator]() {
let current = this.start;
const end = this.end;
return {
next() {
if (current <= end) {
return { value: current++, done: false };
}
return { value: undefined, done: true };
}
};
}
}
for (const n of new Range(1, 3)) {
console.log(n); // 1, 2, 3
}
class Range:
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
"""Return the iterator object (self in this case)"""
self.current = self.start
return self
def __next__(self):
"""Return next value or raise StopIteration"""
if self.current > self.end:
raise StopIteration
value = self.current
self.current += 1
return value
for n in Range(1, 3):
print(n) # 1, 2, 3
Iterable has __iter__ → returns an iterator. A list is iterable but not an iterator itself — each for loop gets a fresh iterator.
Iterator has both __iter__ and __next__ → stateful, one-shot traversal. Calling next(iterator) advances it manually.
lst = [1, 2, 3]
it = iter(lst) # create an iterator from a list
next(it) # 1
next(it) # 2
next(it) # 3
next(it) # StopIteration ← this is how for loops know to stop
Writing __iter__ and __next__ manually is tedious. Generators are functions that use yield to produce values lazily — Python handles the iterator protocol automatically. This is JavaScript's function*.
function* count_up(start, end) {
for (let i = start; i <= end; i++) {
yield i;
}
}
for (const n of count_up(1, 3)) {
console.log(n); // 1, 2, 3
}
// Calling the generator gives an iterator
const gen = count_up(1, 3);
gen.next(); // {value: 1, done: false}
gen.next(); // {value: 2, done: false}
def count_up(start, end):
for i in range(start, end + 1):
yield i
for n in count_up(1, 3):
print(n) # 1, 2, 3
# Calling the generator gives an iterator
gen = count_up(1, 3)
next(gen) # 1
next(gen) # 2
next(gen) # 3
next(gen) # StopIteration
A generator function pauses at each yield, preserving all local state. The next call to next() resumes from where it paused.
Generators don't compute all values at once — they compute each value on demand. This is powerful for large or infinite sequences.
# Memory comparison
import sys
# List — all values computed NOW, stored in memory
million_list = list(range(1_000_000))
sys.getsizeof(million_list) # ~8 MB
# Generator — values computed ON DEMAND
million_gen = (x for x in range(1_000_000)) # generator expression
sys.getsizeof(million_gen) # ~112 bytes ← same regardless of size!
# Infinite sequence — impossible with lists
def naturals():
n = 1
while True:
yield n
n += 1
from itertools import islice
first_10 = list(islice(naturals(), 10)) # [1,2,3,4,5,6,7,8,9,10]
Like list comprehensions but lazy. Use () instead of [].
# List comprehension — eager, creates the full list
squares_list = [x**2 for x in range(10)] # list
# Generator expression — lazy, one value at a time
squares_gen = (x**2 for x in range(10)) # generator
# Use wherever an iterable is expected
total = sum(x**2 for x in range(10)) # no extra ()!
big = any(x > 50 for x in range(100)) # short-circuits
filtered = list(x for x in data if x > 0)
# Chaining generators (pipeline, no intermediate lists)
lines = (line.strip() for line in open("file.txt"))
non_empty = (line for line in lines if line)
words = (word for line in non_empty for word in line.split())
yield from delegates to a sub-generator — like spreading an iterable into your generator.
def flatten(nested):
"""Flatten a nested list of any depth"""
for item in nested:
if isinstance(item, list):
yield from flatten(item) # recurse into sub-lists
else:
yield item
list(flatten([1, [2, [3, 4]], [5]])) # [1, 2, 3, 4, 5]
# yield from also works with any iterable
def chain_gen(*iterables):
for it in iterables:
yield from it # equivalent to itertools.chain()
def read_csv_rows(filename):
"""Yield one dict per row — no full file in memory"""
import csv
with open(filename) as f:
reader = csv.DictReader(f)
for row in reader:
yield row
# Process a 10GB CSV without loading it all
for row in read_csv_rows("huge.csv"):
process(row)
def paginate(url, page_size=100):
"""Yield items across all pages automatically"""
import requests
page = 1
while True:
resp = requests.get(url, params={"page": page, "per_page": page_size})
data = resp.json()
if not data:
return
yield from data
page += 1
for user in paginate("https://api.example.com/users"):
print(user["name"])
import csv
def read_rows(path):
with open(path) as f:
yield from csv.DictReader(f)
def parse_numbers(rows):
for row in rows:
row["amount"] = float(row["amount"])
yield row
def filter_large(rows, threshold=1000):
return (row for row in rows if row["amount"] > threshold)
# Compose the pipeline — no intermediate lists!
pipeline = filter_large(parse_numbers(read_rows("transactions.csv")))
for txn in pipeline:
print(txn)
1. What is the difference between an iterable and an iterator?
2. A generator function uses yield. What does it return when called?
3. What is the memory advantage of (x**2 for x in range(1_000_000)) over [x**2 for x in range(1_000_000)]?
4. What does yield from some_iterable do?
Questions answered correctly.