Source code for climb.tool.impl.data_suite.models.copula
import logging
import numpy as np
import pandas as pd
from climb.tool.impl.data_suite.third_party.copulas.multivariate import GaussianMultivariate
logger = logging.getLogger()
logger.setLevel(logging.INFO)
[docs]
def fit_sample_copula(
clean_corpus,
copula="vine",
copula_n_samples=10,
columns=None,
random_seed=42,
):
"""
> The function takes a corpus of data, fits a copula to it, and then samples from the copula
Args:
clean_corpus: the corpus of data you want to fit the copula to.
copula: the type of copula to use. Defaults to vine
copula_n_samples: The number of samples to generate from the copula. Defaults to 10
columns: The names of the columns in the dataframe.
random_seed: The random seed. Defaults to 42
"""
try:
if copula == "vine":
from climb.tool.impl.data_suite.third_party.copulas.multivariate import VineCopula
logging.info("Vine...")
# vine = VineCopula('center')
# vine = VineCopula('regular')
vine = VineCopula("direct", random_seed=random_seed)
if columns is None:
columns = [f"x{i + 1}" for i in range(clean_corpus.shape[1] - 1)] + ["y"]
vine.fit(pd.DataFrame(data=clean_corpus)) # , columns=columns))
logging.info(f"Copula Samples = {copula_n_samples}")
samples = vine.sample(copula_n_samples)
except BaseException:
dist = GaussianMultivariate(random_seed=random_seed)
dist.fit(clean_corpus)
logging.info(f"Copula Samples = {copula_n_samples}")
samples = dist.sample(copula_n_samples)
if copula == "gauss":
logging.info("Gaussian...")
dist = GaussianMultivariate()
dist.fit(clean_corpus)
logging.info(f"Copula Samples = {copula_n_samples}")
samples = dist.sample(copula_n_samples)
copula_samples = np.array(samples)
return copula_samples