Source code for epbd_bert.utility.dnabert2
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from epbd_bert.path_configs import dnabert2_pretrained_dirpath
[docs]
def get_dnabert2_tokenizer(max_num_tokens=512, home_dir=""):
tokenizer = AutoTokenizer.from_pretrained(
home_dir + dnabert2_pretrained_dirpath,
model_max_length=max_num_tokens,
padding_side="right",
use_fast=True,
trust_remote_code=True,
)
return tokenizer
[docs]
def get_dnabert2_pretrained_model(home_dir=""):
model = AutoModel.from_pretrained(home_dir + dnabert2_pretrained_dirpath, trust_remote_code=True)
return model
[docs]
def load_dnabert2_for_classification(num_labels, home_dir=""):
model = AutoModelForSequenceClassification.from_pretrained(
home_dir + dnabert2_pretrained_dirpath,
num_labels=num_labels,
trust_remote_code=True,
)
return model