home

Series

Creating a Series Instance

Series from Python list

import pandas as pd

series = pd.Series([2, 4, 8])
print(series)
# 0    2
# 1    4
# 2    8
# dtype: int64

Series from Python list with explicit Index

import pandas as pd

income_by_year = pd.Series([1000, 1250, 1350, 1450], 
                     index=[2001, 2002, 2003, 2004])
print(income_by_year)
# 2001    1000
# 2002    1250
# 2003    1350
# 2004    1450
# dtype: int64

Series from Python dictionary

Note how index needs not to be of type integer, but can be of any desired type.

import pandas as pd

nicknames = pd.Series(
    {
        "koray":"danger",
        "pinar":"angel",
        "toprak":"topacik",
        "deniz":"osman"
    }
)

print(nicknames)
# koray      danger
# pinar       angel
# toprak    topacik
# deniz       osman
# dtype: object

Series with Repeating Values

import pandas as pd

sr = pd.Series(5, index=range(0, 5))
print(sr)
# 0    5
# 1    5
# 2    5
# 3    5
# 4    5

Series with Non-Unique Index

Index values need not to be unique in Index of a Series.

import pandas as pd

nicknames = pd.Series(
    {
        "koray":"danger",
        "pinar":"angel",
        "toprak":"topacik",
        "deniz":"osman",
    }
)
nicknames = nicknames.append(pd.Series({"deniz" : "boombox"}))
print(nicknames)
# koray      danger
# pinar       angel
# toprak    topacik
# deniz       osman
# deniz     boombox
# dtype: object

Series from an ndarray

import numpy as np
import pandas as pd

sr = pd.Series(np.random.randint(-1000, 1000, size=4))
print(sr)
# 0    357
# 1    950
# 2   -472
# 3    802
# dtype: int64

Series with Mixed Types (in Index and in Values)

import pandas as pd

sr = pd.Series([1, 2])
sr.loc['MyName'] = 'Koray Tugay'

print(sr)
# 0                   1
# 1                   2
# MyName    Koray Tugay
# dtype: object

Series Index

Resetting Index in Series

import pandas as pd

foo = pd.Series([5, 6], index=['foo', 'bar'])
print(foo)
# foo    5
# bar    6
# dtype: int64

foo.reset_index(drop=True, inplace=True)
print(foo)
# 0    5
# 1    6
# dtype: int64

Modifying the Index

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

income_by_year.index = [3001, 3002, 3003, 3004]
print(income_by_year)
# 3001    1000
# 3002    1250
# 3003    1350
# 3004    1450
# dtype: int64

Values and Items

Note how values of Series created from a Python list is returned as an ndarray.

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year.values)
# [1000 1250 1350 1450]

print(type(income_by_year.values))
# <class 'numpy.ndarray'>

print(list(income_by_year.items()))
# [(2001, 1000), (2002, 1250), (2003, 1350), (2004, 1450)]

print(type(list(income_by_year.items())))
# <class 'tuple'>

Querying a Series

.loc and .iloc

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year[2002])
# 1250

print(income_by_year.loc[2002])
# 1250

print(income_by_year.iloc[2])
# 1350

.loc and .iloc with multiple values

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year.loc[[2001, 2002]])
# 2001    1000
# 2002    1250
# dtype: int64

print(income_by_year.iloc[[2, 3]])
# 2003    1350
# 2004    1450
# dtype: int64

Various Query Methods

import pandas

sr = pandas.Series([1, 2, 3, 2, 1], index=["A", "B", "C", "D", "E"])

# Value Count (also known as Histogramming)
print(sr.value_counts())
# 2    2
# 1    2
# 3    1

# Unique Elements
print(sr.unique())
# [1 2 3]

# Number of Unique Elements
print(sr.nunique())
# 3

# Locating n-min (n-max)
print(sr.nsmallest())
# A    1
# E    1
# B    2
# D    2
# C    3

# Locating n-largest values - an integer argument can be passed to the nlargest() method to get n values
print(sr.nlargest())
# C    3
# B    2
# D    2
# A    1
# E    1

Filtering a Series

Boolean Selection on Series

A Boolean selection applies a logical expression to the values of the Series and returns a new series of Boolean values representing the result of that expression upon each value.

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year > 1250)
# 2001    False
# 2002    False
# 2003     True
# 2004     True

Masking

import pandas as pd

income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year[income_by_year > 1250])
# 2003    1350
# 2004    1450

Operations on Series

import pandas as pd

# Multiple Values modification
income_by_year = pd.Series(
    [1000, 1250, 1350, 1450],
    index=[2001, 2002, 2003, 2004]
)

print(income_by_year + income_by_year)
# 2001    2000
# 2002    2500
# 2003    2700
# 2004    2900

# Single value modification
income_by_year.loc[2004] = income_by_year.loc[2004] * 2
print(income_by_year)
# 2001    1000
# 2002    1250
# 2003    1350
# 2004    2900

Auto Alignment

Series will be auto aligned against index values in arithmetic operations.

import pandas as pd

foo = pd.Series([5, 6], index=['foo', 'bar'])
bar = pd.Series([6, 5], index=['bar', 'foo'])

print(pd.Series(foo + bar, index=['foo', 'bar'])) # If not explicitly passed, index will be sorted.
# foo    10
# bar    12
# dtype: int64

Slicing a Series

import pandas
import numpy

my_series = pandas.Series(
    numpy.arange(100, 110),
    index=numpy.arange(10, 20)
)
print(my_series)
# 10    100
# 11    101
# 12    102
# 13    103
# 14    104
# 15    105
# 16    106
# 17    107
# 18    108
# 19    109

my_series_slice = my_series[1: 6]
print(my_series_slice)
# 11    101
# 12    102
# 13    103
# 14    104
# 15    105

# Omitting the start..
my_series_slice = my_series[: 6]
print(my_series_slice)
# 10    100
# 11    101
# 12    102
# 13    103
# 14    104
# 15    105

Missing Values in Series

Note how None is preserved when the list being passed has Strings, but converted to NaN once a list of integers is used (See the value at iloc[2] in each case below).

import pandas as pd

names = ['deniz', 'toprak', None]
print(pd.Series(names))
# 0     deniz
# 1    toprak
# 2      None
# dtype: object

ages = [1, 1, None]
print(pd.Series(ages))
# 0    1.0
# 1    1.0
# 2    NaN
# dtype: float64

Examples

import pandas as pd

sr = pd.Series(
        [5, 25, 125, 525], index=["a", "b", "c", "d"]
    )

# Maximum value in the Series
print(sr.max())
# 525

# Index of Maximum Value
print(sr.idxmax())
# d

# Search key in Series
print("a" in sr)
# True

# Search value in Series
print(5 in sr.values)
# True

# Masking Example
print(sr[(sr > 5) & (sr < 525)])
# b     25
# c    125

Find value appearing a specific number of times

import pandas as pd

sr = pd.Series([5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8])
print((sr.value_counts())[sr.value_counts() == 3].index.values)
# [8 6 5]

# Solution using `where`
print(sr.where(sr.value_counts() == 3).dropna().index.values)
# [5 6 8]