Overview of Common Python Libraries for Artificial Intelligence and Data Science with Code Examples
This article provides a comprehensive introduction to popular Python libraries for artificial intelligence, computer vision, data analysis, and machine learning—such as NumPy, OpenCV, scikit‑image, Pillow, TensorFlow, PyTorch, and many others—accompanied by concise code snippets and performance comparisons to help beginners select suitable tools.
This article introduces a broad set of Python libraries that are frequently used in artificial intelligence, computer vision, data analysis, and machine learning, offering brief descriptions and ready‑to‑run code examples for each.
1. NumPy
NumPy (Numerical Python) provides high‑performance array objects and mathematical functions, implemented in C for speed.
import numpy as np
import math
import random
import time
start = time.time()
for i in range(10):
list_1 = list(range(1,10000))
for j in range(len(list_1)):
list_1[j] = math.sin(list_1[j])
print("使用纯Python用时{}s".format(time.time()-start))
start = time.time()
for i in range(10):
list_1 = np.array(np.arange(1,10000))
list_1 = np.sin(list_1)
print("使用Numpy用时{}s".format(time.time()-start))Running the code shows NumPy is significantly faster than pure Python loops.
2. OpenCV
OpenCV is a cross‑platform computer‑vision library with Python bindings, useful for image processing and common vision algorithms.
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
img = cv.imread('h89817032p0.png')
kernel = np.ones((5,5),np.float32)/25
dst = cv.filter2D(img,-1,kernel)
blur_1 = cv.GaussianBlur(img,(5,5),0)
blur_2 = cv.bilateralFilter(img,9,75,75)
plt.figure(figsize=(10,10))
plt.subplot(221),plt.imshow(img[:,:,::-1]),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(222),plt.imshow(dst[:,:,::-1]),plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.subplot(223),plt.imshow(blur_1[:,:,::-1]),plt.title('Gaussian')
plt.xticks([]), plt.yticks([])
plt.subplot(224),plt.imshow(blur_1[:,:,::-1]),plt.title('Bilateral')
plt.xticks([]), plt.yticks([])
plt.show()3. Scikit‑image
Scikit‑image builds on SciPy and NumPy to provide image processing functions such as rescaling and resizing.
from skimage import data, color, io
from skimage.transform import rescale, resize, downscale_local_mean
image = color.rgb2gray(io.imread('h89817032p0.png'))
image_rescaled = rescale(image, 0.25, anti_aliasing=False)
image_resized = resize(image, (image.shape[0] // 4, image.shape[1] // 4), anti_aliasing=True)
image_downscaled = downscale_local_mean(image, (4, 3))
plt.figure(figsize=(20,20))
plt.subplot(221),plt.imshow(image, cmap='gray'),plt.title('Original')
plt.subplot(222),plt.imshow(image_rescaled, cmap='gray'),plt.title('Rescaled')
plt.subplot(223),plt.imshow(image_resized, cmap='gray'),plt.title('Resized')
plt.subplot(224),plt.imshow(image_downscaled, cmap='gray'),plt.title('Downscaled')
plt.xticks([]), plt.yticks([])
plt.show()4. Pillow (PIL)
Pillow is the actively maintained fork of the original Python Imaging Library, supporting image creation and manipulation.
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
def rndChar():
return chr(random.randint(65, 90))
def rndColor():
return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))
def rndColor2():
return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127))
width = 60 * 6
height = 60 * 6
image = Image.new('RGB', (width, height), (255, 255, 255))
font = ImageFont.truetype('/usr/share/fonts/wps-office/simhei.ttf', 60)
draw = ImageDraw.Draw(image)
for x in range(width):
for y in range(height):
draw.point((x, y), fill=rndColor())
for t in range(6):
draw.text((60 * t + 10, 150), rndChar(), font=font, fill=rndColor2())
image = image.filter(ImageFilter.BLUR)
image.save('code.jpg', 'jpeg')5. SimpleCV
SimpleCV offers a high‑level interface to OpenCV, but its Python‑3 support is limited.
from SimpleCV import Image, Color, Display
img = Image('http://i.imgur.com/lfAeZ4n.png')
feats = img.findKeypoints()
feats.draw(color=Color.RED)
img.show()
output = img.applyLayers()
output.save('juniperfeats.png')Running under Python 3 raises a SyntaxError: Missing parentheses in call to 'print', so it is not recommended.
6. Mahotas
Mahotas provides fast computer‑vision algorithms built on NumPy.
import numpy as np
import mahotas
import mahotas.demos
from mahotas.thresholding import soft_threshold
from matplotlib import pyplot as plt
from os import path
f = mahotas.demos.load('lena', as_grey=True)
f = f[128:,128:]
plt.gray()
print("Fraction of zeros in original image: {0}".format(np.mean(f==0)))
plt.imshow(f)
plt.show()7. Ilastik
Ilastik offers user‑friendly, interactive machine‑learning tools for bio‑image analysis, requiring little expertise.
8. Scikit‑learn
Scikit‑learn is a free Python library offering a wide range of classification, regression, and clustering algorithms.
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.datasets import make_blobs
np.random.seed(0)
batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
k_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
t0 = time.time()
k_means.fit(X)
t_batch = time.time() - t0
mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, batch_size=batch_size, n_init=10, max_no_improvement=10, verbose=0)
t0 = time.time()
mbk.fit(X)
t_mini_batch = time.time() - t0
fig = plt.figure(figsize=(8, 3))
fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
colors = ['#4EACC5', '#FF9C34', '#4E9A06']
# (plotting code omitted for brevity)
plt.title('KMeans')
plt.show()9. SciPy
SciPy supplies efficient numerical routines for integration, interpolation, optimization, linear algebra, and special functions.
from scipy import special
import matplotlib.pyplot as plt
import numpy as np
def drumhead_height(n, k, distance, angle, t):
kth_zero = special.jn_zeros(n, k)[-1]
return np.cos(t) * np.cos(n*angle) * special.jn(n, distance*kth_zero)
theta = np.r_[0:2*np.pi:50j]
radius = np.r_[0:1:50j]
x = np.array([r * np.cos(theta) for r in radius])
y = np.array([r * np.sin(theta) for r in radius])
z = np.array([drumhead_height(1, 1, r, theta, 0.5) for r in radius])
fig = plt.figure()
ax = fig.add_axes(rect=(0, 0.05, 0.95, 0.95), projection='3d')
ax.plot_surface(x, y, z, rstride=1, cstride=1, cmap='RdBu_r', vmin=-0.5, vmax=0.5)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()10. NLTK
The Natural Language Toolkit provides interfaces to over 50 corpora and tools for tokenization, tagging, parsing, and semantic reasoning.
import nltk
from nltk.corpus import treebank
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('treebank')
sentence = """At eight o'clock on Thursday morning Arthur didn't feel very good."""
tokens = nltk.word_tokenize(sentence)
tagged = nltk.pos_tag(tokens)
entities = nltk.chunk.ne_chunk(tagged)
# Display a parse tree (requires GUI)11. spaCy
spaCy is an open‑source library for advanced NLP tasks, suitable for large‑scale information extraction and preprocessing for deep learning.
import spacy
texts = [
"Net income was $9.4 million compared to the prior year of $2.7 million.",
"Revenue exceeded twelve billion dollars, with a loss of $1b."
]
nlp = spacy.load("en_core_web_sm")
for doc in nlp.pipe(texts, disable=["tok2vec", "tagger", "parser", "attribute_ruler", "lemmatizer"]):
print([(ent.text, ent.label_) for ent in doc.ents])12. LibROSA
LibROSA is a Python library for music and audio analysis, useful for building music‑information‑retrieval systems.
# Beat tracking example
import librosa
filename = librosa.example('nutcracker')
y, sr = librosa.load(filename)
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
beat_times = librosa.frames_to_time(beat_frames, sr=sr)13. Pandas
Pandas offers fast, powerful data structures for data manipulation, cleaning, and analysis.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))
ts = ts.cumsum()
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=list("ABCD"))
df = df.cumsum()
df.plot()
plt.show()14. Matplotlib
Matplotlib is a plotting library that produces publication‑quality figures.
# plot_multi_curve.py
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0.1, 2 * np.pi, 100)
y_1 = x
y_2 = np.square(x)
y_3 = np.log(x)
y_4 = np.sin(x)
plt.plot(x,y_1)
plt.plot(x,y_2)
plt.plot(x,y_3)
plt.plot(x,y_4)
plt.show()15. Seaborn
Seaborn builds on Matplotlib to provide a higher‑level API for statistical data visualization.
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="ticks")
df = sns.load_dataset("penguins")
sns.pairplot(df, hue="species")
plt.show()16. Orange
Orange is an open‑source data‑mining and machine‑learning suite with a visual programming interface.
$ pip install orange3
$ orange-canvas17. PyBrain
PyBrain is a modular machine‑learning library for building neural networks such as multilayer perceptrons.
from pybrain.structure import FeedForwardNetwork
n = FeedForwardNetwork()
from pybrain.structure import LinearLayer, SigmoidLayer
inLayer = LinearLayer(2)
hiddenLayer = SigmoidLayer(3)
outLayer = LinearLayer(1)
n.addInputModule(inLayer)
n.addModule(hiddenLayer)
n.addOutputModule(outLayer)
from pybrain.structure import FullConnection
in_to_hidden = FullConnection(inLayer, hiddenLayer)
hidden_to_out = FullConnection(hiddenLayer, outLayer)
n.addConnection(in_to_hidden)
n.addConnection(hidden_to_out)
n.sortModules()18. Milk
Milk is a Python machine‑learning toolkit offering classifiers, feature selection, and clustering.
import numpy as np
import milk
features = np.random.rand(100,10)
labels = np.zeros(100)
features[50:] += .5
labels[50:] = 1
learner = milk.defaultclassifier()
model = learner.train(features, labels)
example = np.random.rand(10)
print(model.apply(example))
example2 = np.random.rand(10)
example2 += .5
print(model.apply(example2))19. TensorFlow
TensorFlow is an end‑to‑end open‑source machine‑learning platform; the example builds a CNN on CIFAR‑10 using TensorFlow 2.x.
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
train_images, test_images = train_images / 255.0, test_images / 255.0
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))
model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))20. PyTorch
PyTorch provides dynamic computation graphs and a Pythonic interface for deep learning.
# Import libraries
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
# Model definition
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
nn.ReLU()
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
model = NeuralNetwork().to('cuda' if torch.cuda.is_available() else 'cpu')
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
# Training loop omitted for brevity21. Theano
Theano enables definition and efficient evaluation of mathematical expressions involving multi‑dimensional arrays.
import theano
import theano.tensor as T
x = T.dvector('x')
y = x ** 2
J, updates = theano.scan(lambda i, y, x: T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
f = theano.function([x], J, updates=updates)
print(f([4, 4]))22. Keras
Keras is a high‑level neural‑network API that runs on top of TensorFlow, CNTK, or Theano.
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()
model.add(Dense(units=64, activation='relu', input_dim=100))
model.add(Dense(units=10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=32)23. MXNet
MXNet is a deep‑learning framework that supports both symbolic and imperative programming.
import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag
import mxnet.ndarray as F
mnist = mx.test_utils.get_mnist()
batch_size = 100
train_data = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_data = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
class Net(gluon.Block):
def __init__(self, **kwargs):
super(Net, self).__init__(**kwargs)
self.conv1 = nn.Conv2D(20, kernel_size=(5,5))
self.pool1 = nn.MaxPool2D(pool_size=(2,2), strides=(2,2))
self.conv2 = nn.Conv2D(50, kernel_size=(5,5))
self.pool2 = nn.MaxPool2D(pool_size=(2,2), strides=(2,2))
self.fc1 = nn.Dense(500)
self.fc2 = nn.Dense(10)
def forward(self, x):
x = self.pool1(F.tanh(self.conv1(x)))
x = self.pool2(F.tanh(self.conv2(x)))
x = x.reshape((0, -1))
x = F.tanh(self.fc1(x))
x = F.tanh(self.fc2(x))
return x
net = Net()
ctx = [mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()]
net.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})
metric = mx.metric.Accuracy()
softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
# Training loop omitted for brevity24. PaddlePaddle
PaddlePaddle is Baidu's open‑source deep‑learning platform; the example implements LeNet‑5.
import paddle
import numpy as np
from paddle.nn import Conv2D, MaxPool2D, Linear
import paddle.nn.functional as F
class LeNet(paddle.nn.Layer):
def __init__(self, num_classes=1):
super(LeNet, self).__init__()
self.conv1 = Conv2D(in_channels=1, out_channels=6, kernel_size=5)
self.max_pool1 = MaxPool2D(kernel_size=2, stride=2)
self.conv2 = Conv2D(in_channels=6, out_channels=16, kernel_size=5)
self.max_pool2 = MaxPool2D(kernel_size=2, stride=2)
self.conv3 = Conv2D(in_channels=16, out_channels=120, kernel_size=4)
self.fc1 = Linear(in_features=120, out_features=64)
self.fc2 = Linear(in_features=64, out_features=num_classes)
def forward(self, x):
x = self.conv1(x)
x = F.sigmoid(x)
x = self.max_pool1(x)
x = F.sigmoid(x)
x = self.conv2(x)
x = self.max_pool2(x)
x = self.conv3(x)
x = paddle.reshape(x, [x.shape[0], -1])
x = self.fc1(x)
x = F.sigmoid(x)
x = self.fc2(x)
return x25. CNTK
Microsoft Cognitive Toolkit (CNTK) is a deep‑learning framework that describes networks as directed graphs.
NDLNetworkBuilder=[
run=ndlLR
]
ndlLR=[
SDim=$dimension$
LDim=1
features=Input(SDim, 1)
labels=Input(LDim, 1)
B0=Parameter(4)
W0=Parameter(4, SDim)
B=Parameter(LDim)
W=Parameter(LDim, 4)
t0=Times(W0, features)
z0=Plus(t0, B0)
s0=Sigmoid(z0)
t=Times(W, s0)
z=Plus(t, B)
s=Sigmoid(z)
LR=Logistic(labels, s)
EP=SquareError(labels, s)
FeatureNodes=(features)
LabelNodes=(labels)
CriteriaNodes=(LR)
EvalNodes=(EP)
OutputNodes=(s,t,z,s0,W0)
]The article concludes with a collection of references and QR‑code promotions for further learning resources.
Python Programming Learning Circle
A global community of Chinese Python developers offering technical articles, columns, original video tutorials, and problem sets. Topics include web full‑stack development, web scraping, data analysis, natural language processing, image processing, machine learning, automated testing, DevOps automation, and big data.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
