yaset.utils

yaset.utils.config

yaset.utils.config.replace_auto(options: dict = None) → None

Replace the keyword ‘auto’ by ‘-1’ in configuration files

Parameters:options (dict) – configuration parameters
Returns:None

yaset.utils.conlleval

class yaset.utils.conlleval.EvalCounts

Bases: object

class yaset.utils.conlleval.Metrics(tp, fp, fn, prec, rec, fscore)

Bases: tuple

fn

Alias for field number 2

fp

Alias for field number 1

fscore

Alias for field number 5

prec

Alias for field number 3

rec

Alias for field number 4

tp

Alias for field number 0

yaset.utils.conlleval.calculate_metrics(correct, guessed, total)
yaset.utils.conlleval.end_of_chunk(prev_tag, tag, prev_type, type_)
yaset.utils.conlleval.evaluate_ner(corr, pred)
yaset.utils.conlleval.metrics(counts)
yaset.utils.conlleval.parse_tag(t)
yaset.utils.conlleval.start_of_chunk(prev_tag, tag, prev_type, type_)
yaset.utils.conlleval.uniq(iterable)

yaset.utils.copy

yaset.utils.copy.copy_embedding_models(embeddings_options: dict = None, output_dir: str = None) → None

Copy pretrained embeddings specified in configuration file to model directory

Parameters:
  • embeddings_options (dict) – configuration file portion related to embeddings
  • output_dir (str) – directory where files will be copied
Returns:

None

yaset.utils.data

class yaset.utils.data.NERDataset(mappings: dict = None, instance_conll_file: str = None, debug: bool = None, singleton_replacement_ratio: float = 0.0, bert_use: bool = False, bert_voc_dir: str = None, bert_lowercase: bool = False, pretrained_use: bool = False, char_use: bool = False, elmo_use: bool = False)

Bases: torch.utils.data.dataset.Dataset

create_instance(sequence_buffer: list = None)
extract_singletons()
load_instances()
yaset.utils.data.collate_ner(batch, tok_pad_id: int = None, chr_pad_id_literal: int = None, chr_pad_id_utf8: int = None, bert_use: bool = False, char_use: bool = False, elmo_use: bool = False, pretrained_use: bool = False, options: dict = None)
yaset.utils.data.collate_ner_ensemble(batch, model_mappings: dict = None, model_options: dict = None, reference_id: str = None)

yaset.utils.eval

yaset.utils.eval.eval_ner(eval_payload: list = None)

yaset.utils.load

yaset.utils.load.load_model(model_dir: str = None)

Load a single NER model

Args:
model_dir (str): NER model directory
Returns:
NER model
yaset.utils.load.load_model_single(model_dir: str = None, cuda: bool = None)

yaset.utils.logging

class yaset.utils.logging.TrainLogger(tensorboard_path: str = None)

Bases: object

add_checkpoint(step: int = None, checkpoint_payload: dict = None)
add_dev_score(step: int = None, payload: dict = None)
add_histogram(name: str = None, value: str = None, global_step: int = None, bins: str = 'auto')
add_loss(loss_value: float = None, loss_name: str = None, global_step: int = None)
add_other_score_dev(idx_iteration: int = None, score_name: str = None, score_value: float = None)
add_scalar(name: str = None, value: float = None, global_step: int = None)
add_step_values(step: int = None, gs_values: list = None, pred_values: list = None)
close_writer()
do_early_stopping(nb_steps: int = None)
dump_to_disk(custom_log_file: str = None, tensorboard_log_file: str = None)
get_best_step(criterion: str = 'f1', reverse: bool = False)
get_dev_score(step: int = None)
get_last_checkpoint_string(step: int = None)
get_loss(loss_name: str = None, global_step: int = None)
get_step_values(step: int = None)
load_json_file(filepath: str = None)

yaset.utils.mapping

yaset.utils.mapping.extract_char_mapping(instance_file: str = None)
yaset.utils.mapping.extract_label_mapping(instance_file: str = None)
yaset.utils.mapping.extract_mappings_and_pretrained_matrix(options: dict = None, oov_symbol: str = '<unk>', pad_symbol: str = '<pad>', output_dir: str = None) -> (<class 'dict'>, <class 'numpy.ndarray'>)

Extract pretrained embedding matrix, size and mapping.

Parameters:
  • output_dir (str) – model output directory
  • options (dict) – model parameters
  • oov_symbol (str) – symbol to use for OOV (vector will be created if necessary)
  • pad_symbol (str) – symbol to use for padding (vector will be created if necessary)
Returns:

pretrained matrix, pretrained matrix size and pretrained matrix mapping

Return type:

np.ndarray, int, dict

yaset.utils.misc

yaset.utils.misc.chunks(l, n)
yaset.utils.misc.flatten(list_of_lists)

yaset.utils.path

yaset.utils.path.ensure_dir(directory: str) → None

Creates a directory

Args:
directory (str): path to create
Returns:
None

yaset.utils.training

class yaset.utils.training.Trainer(accumulation_steps: int = None, batch_size: int = None, clip_grad_norm: float = None, cuda: bool = False, dataloader_train: torch.utils.data.dataloader.DataLoader = None, dataloader_dev: torch.utils.data.dataloader.DataLoader = None, eval_function: Callable = None, eval_every_n_steps: int = None, fp16: bool = None, len_dataset_train: int = None, len_dataset_dev: int = None, log_to_stdout_every_n_step: int = None, lr_scheduler: object = None, max_steps: int = None, model: torch.nn.modules.module.Module = None, optimizer: torch.optim.optimizer.Optimizer = None, train_logger: yaset.utils.logging.TrainLogger = None, warmup_scheduler: torch.optim.lr_scheduler.LambdaLR = None, working_dir: str = None)

Bases: object

static clear_model_dir(model_dir)

Remove old model parameter files

Args:
model_dir (str): model parameter directory
Returns:
None
perform_training()
test_on_dev(step_counter: int = None)
yaset.utils.training.cycle(iterable)