You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The ERROR of to_dense_adj: ''RuntimeError: max(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.''
#5475
Closed
jiaruHithub opened this issue
Sep 20, 2022
· 1 comment
· Fixed by #5476
class MoleculeDataset(InMemoryDataset):
def __init__(self,
root,
#data = None,
#slices = None,
transform=None,
pre_transform=None,
pre_filter=None,
dataset='zinc250k',
empty=False):
"""
Adapted from qm9.py. Disabled the download functionality
:param root: directory of the dataset, containing a raw and processed
dir. The raw dir should contain the file containing the smiles, and the
processed dir can either empty or a previously processed file
:param dataset: name of the dataset. Currently only implemented for
zinc250k, chembl_with_labels, tox21, hiv, bace, bbbp, clintox, esol,
freesolv, lipophilicity, muv, pcba, sider, toxcast
:param empty: if True, then will not load any data obj. For
initializing empty dataset
"""
self.dataset = dataset
self.root = root
super(MoleculeDataset, self).__init__(root, transform, pre_transform,
pre_filter)
self.transform, self.pre_transform, self.pre_filter = transform, pre_transform, pre_filter
if not empty:
self.data, self.slices = torch.load(self.processed_paths[0])
def get(self, idx):
data = Data()
for key in self.data.keys:
item, slices = self.data[key], self.slices[key]
s = list(repeat(slice(None), item.dim()))
s[data.__cat_dim__(key, item)] = slice(slices[idx],
slices[idx + 1])
data[key] = item[s]
return data
@property
def raw_file_names(self):
file_name_list = os.listdir(self.raw_dir)
# assert len(file_name_list) == 1 # currently assume we have a
# # single raw file
return file_name_list
@property
def processed_file_names(self):
return 'geometric_data_processed.pt'
def download(self):
raise NotImplementedError('Must indicate valid location of raw data. '
'No download allowed')
def process(self):
data_smiles_list = []
data_list = []
if self.dataset == 'zinc_standard_agent':
input_path = self.raw_paths[0]
input_df = pd.read_csv(input_path, sep=',', compression='gzip',
dtype='str')
smiles_list = list(input_df['smiles'])
zinc_id_list = list(input_df['zinc_id'])
for i in range(len(smiles_list)):
s = smiles_list[i]
# each example contains a single species
try:
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
id = int(zinc_id_list[i].split('ZINC')[1].lstrip('0'))
data.id = torch.tensor(
[id]) # id here is zinc id value, stripped of
# leading zeros
data_list.append(data)
data_smiles_list.append(smiles_list[i])
except:
continue
elif self.dataset == "zinc_sample":
input_path = self.raw_paths[0]
with open(input_path, "r") as f:
data = f.readlines()
all_data = [x.strip() for x in data]
data_smiles_list = []
data_list = []
for i, item in enumerate(all_data):
s = item
try:
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None:
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
id = i
data.id = torch.tensor([id]) # id here is zinc id value, stripped of
# leading zeros
data_list.append(data)
data_smiles_list.append(s)
except:
continue
elif self.dataset == 'chembl_filtered':
### get downstream test molecules.
from splitters import scaffold_split
###
downstream_dir = [
'dataset/bace',
'dataset/bbbp',
'dataset/clintox',
'dataset/esol',
'dataset/freesolv',
'dataset/hiv',
'dataset/lipophilicity',
'dataset/muv',
# 'dataset/pcba/processed/smiles.csv',
'dataset/sider',
'dataset/tox21',
'dataset/toxcast'
]
downstream_inchi_set = set()
for d_path in downstream_dir:
print(d_path)
dataset_name = d_path.split('/')[1]
downstream_dataset = MoleculeDataset(d_path, dataset=dataset_name)
downstream_smiles = pd.read_csv(os.path.join(d_path,
'processed', 'smiles.csv'),
header=None)[0].tolist()
assert len(downstream_dataset) == len(downstream_smiles)
_, _, _, (train_smiles, valid_smiles, test_smiles) = scaffold_split(downstream_dataset, downstream_smiles, task_idx=None, null_value=0,
frac_train=0.8,frac_valid=0.1, frac_test=0.1,
return_smiles=True)
### remove both test and validation molecules
remove_smiles = test_smiles + valid_smiles
downstream_inchis = []
for smiles in remove_smiles:
species_list = smiles.split('.')
for s in species_list: # record inchi for all species, not just
# largest (by default in create_standardized_mol_id if input has
# multiple species)
inchi = create_standardized_mol_id(s)
downstream_inchis.append(inchi)
downstream_inchi_set.update(downstream_inchis)
smiles_list, rdkit_mol_objs, folds, labels = \
_load_chembl_with_labels_dataset(os.path.join(self.root, 'raw'))
print('processing')
for i in range(len(rdkit_mol_objs)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
mw = Descriptors.MolWt(rdkit_mol)
if 50 <= mw <= 900:
inchi = create_standardized_mol_id(smiles_list[i])
if inchi != None and inchi not in downstream_inchi_set:
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
# fold information
if i in folds[0]:
data.fold = torch.tensor([0])
elif i in folds[1]:
data.fold = torch.tensor([1])
else:
data.fold = torch.tensor([2])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'tox21':
smiles_list, rdkit_mol_objs, labels = \
_load_tox21_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
## convert aromatic bonds to double bonds
#Chem.SanitizeMol(rdkit_mol,
#sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'hiv':
smiles_list, rdkit_mol_objs, labels = \
_load_hiv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'bace':
smiles_list, rdkit_mol_objs, folds, labels = \
_load_bace_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data.fold = torch.tensor([folds[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'bbbp':
smiles_list, rdkit_mol_objs, labels = \
_load_bbbp_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'clintox':
smiles_list, rdkit_mol_objs, labels = \
_load_clintox_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'esol':
smiles_list, rdkit_mol_objs, labels = \
_load_esol_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'freesolv':
smiles_list, rdkit_mol_objs, labels = \
_load_freesolv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'lipophilicity':
smiles_list, rdkit_mol_objs, labels = \
_load_lipophilicity_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'muv':
smiles_list, rdkit_mol_objs, labels = \
_load_muv_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'pcba':
smiles_list, rdkit_mol_objs, labels = \
_load_pcba_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'pcba_pretrain':
smiles_list, rdkit_mol_objs, labels = \
_load_pcba_dataset(self.raw_paths[0])
downstream_inchi = set(pd.read_csv(os.path.join(self.root,
'downstream_mol_inchi_may_24_2019'),
sep=',', header=None)[0])
for i in range(len(smiles_list)):
if '.' not in smiles_list[i]: # remove examples with
# multiples species
rdkit_mol = rdkit_mol_objs[i]
mw = Descriptors.MolWt(rdkit_mol)
if 50 <= mw <= 900:
inchi = create_standardized_mol_id(smiles_list[i])
if inchi != None and inchi not in downstream_inchi:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
# elif self.dataset == ''
elif self.dataset == 'sider':
smiles_list, rdkit_mol_objs, labels = \
_load_sider_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'toxcast':
smiles_list, rdkit_mol_objs, labels = \
_load_toxcast_dataset(self.raw_paths[0])
for i in range(len(smiles_list)):
rdkit_mol = rdkit_mol_objs[i]
if rdkit_mol != None:
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i]) # id here is the index of the mol in
# the dataset
data.y = torch.tensor(labels[i, :])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'ptc_mr':
input_path = self.raw_paths[0]
input_df = pd.read_csv(input_path, sep=',', header=None, names=['id', 'label', 'smiles'])
smiles_list = input_df['smiles']
labels = input_df['label'].values
for i in range(len(smiles_list)):
s = smiles_list[i]
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i])
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
elif self.dataset == 'mutag':
smiles_path = os.path.join(self.root, 'raw', 'mutag_188_data.can')
# smiles_path = 'dataset/mutag/raw/mutag_188_data.can'
labels_path = os.path.join(self.root, 'raw', 'mutag_188_target.txt')
# labels_path = 'dataset/mutag/raw/mutag_188_target.txt'
smiles_list = pd.read_csv(smiles_path, sep=' ', header=None)[0]
labels = pd.read_csv(labels_path, header=None)[0].values
for i in range(len(smiles_list)):
s = smiles_list[i]
rdkit_mol = AllChem.MolFromSmiles(s)
if rdkit_mol != None: # ignore invalid mol objects
# # convert aromatic bonds to double bonds
# Chem.SanitizeMol(rdkit_mol,
# sanitizeOps=Chem.SanitizeFlags.SANITIZE_KEKULIZE)
data = mol_to_graph_data_obj_simple(rdkit_mol)
# manually add mol id
data.id = torch.tensor(
[i])
data.y = torch.tensor([labels[i]])
data_list.append(data)
data_smiles_list.append(smiles_list[i])
else:
raise ValueError('Invalid dataset name')
if self.pre_filter is not None:
data_list = [data for data in data_list if self.pre_filter(data)]
if self.pre_transform is not None:
data_list = [self.pre_transform(data) for data in data_list]
# write data_smiles_list in processed paths
data_smiles_series = pd.Series(data_smiles_list)
data_smiles_series.to_csv(os.path.join(self.processed_dir,
'smiles.csv'), index=False,
header=False)
data, slices = self.collate(data_list)
torch.save((data, slices), self.processed_paths[0])
the scaffold_split code is
def scaffold_split(dataset, smiles_list, task_idx=None, null_value=0,
frac_train=0.8, frac_valid=0.1, frac_test=0.1,
return_smiles=False):
"""
Adapted from https://github.com/deepchem/deepchem/blob/master/deepchem/splits/splitters.py
Split dataset by Bemis-Murcko scaffolds
This function can also ignore examples containing null values for a
selected task when splitting. Deterministic split
:param dataset: pytorch geometric dataset obj
:param smiles_list: list of smiles corresponding to the dataset obj
:param task_idx: column idx of the data.y tensor. Will filter out
examples with null value in specified task column of the data.y tensor
prior to splitting. If None, then no filtering
:param null_value: float that specifies null value in data.y to filter if
task_idx is provided
:param frac_train:
:param frac_valid:
:param frac_test:
:param return_smiles:
:return: train, valid, test slices of the input dataset obj. If
return_smiles = True, also returns ([train_smiles_list],
[valid_smiles_list], [test_smiles_list])
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
if task_idx != None:
# filter based on null values in task_idx
# get task array
y_task = np.array([data.y[task_idx].item() for data in dataset])
# boolean array that correspond to non null values
non_null = y_task != null_value
smiles_list = list(compress(enumerate(smiles_list), non_null))
else:
non_null = np.ones(len(dataset)) == 1
smiles_list = list(compress(enumerate(smiles_list), non_null))
# create dict of the form {scaffold_i: [idx1, idx....]}
all_scaffolds = {}
for i, smiles in smiles_list:
scaffold = generate_scaffold(smiles, include_chirality=True)
if scaffold not in all_scaffolds:
all_scaffolds[scaffold] = [i]
else:
all_scaffolds[scaffold].append(i)
# sort from largest to smallest sets
all_scaffolds = {key: sorted(value) for key, value in all_scaffolds.items()}
all_scaffold_sets = [
scaffold_set for (scaffold, scaffold_set) in sorted(
all_scaffolds.items(), key=lambda x: (len(x[1]), x[1][0]), reverse=True)
]
# get train, valid test indices
train_cutoff = frac_train * len(smiles_list)
valid_cutoff = (frac_train + frac_valid) * len(smiles_list)
train_idx, valid_idx, test_idx = [], [], []
for scaffold_set in all_scaffold_sets:
if len(train_idx) + len(scaffold_set) > train_cutoff:
if len(train_idx) + len(valid_idx) + len(scaffold_set) > valid_cutoff:
test_idx.extend(scaffold_set)
else:
valid_idx.extend(scaffold_set)
else:
train_idx.extend(scaffold_set)
assert len(set(train_idx).intersection(set(valid_idx))) == 0
assert len(set(test_idx).intersection(set(valid_idx))) == 0
train_dataset = dataset[torch.tensor(train_idx)]
valid_dataset = dataset[torch.tensor(valid_idx)]
test_dataset = dataset[torch.tensor(test_idx)]
if not return_smiles:
return train_dataset, valid_dataset, test_dataset
else:
train_smiles = [smiles_list[i][1] for i in train_idx]
valid_smiles = [smiles_list[i][1] for i in valid_idx]
test_smiles = [smiles_list[i][1] for i in test_idx]
return train_dataset, valid_dataset, test_dataset, (train_smiles,
valid_smiles,
test_smiles)
def random_scaffold_split(dataset, smiles_list, task_idx=None, null_value=0,
frac_train=0.8, frac_valid=0.1, frac_test=0.1, seed=0):
"""
Adapted from https://github.com/pfnet-research/chainer-chemistry/blob/master/chainer_chemistry/dataset/splitters/scaffold_splitter.py
Split dataset by Bemis-Murcko scaffolds
This function can also ignore examples containing null values for a
selected task when splitting. Deterministic split
:param dataset: pytorch geometric dataset obj
:param smiles_list: list of smiles corresponding to the dataset obj
:param task_idx: column idx of the data.y tensor. Will filter out
examples with null value in specified task column of the data.y tensor
prior to splitting. If None, then no filtering
:param null_value: float that specifies null value in data.y to filter if
task_idx is provided
:param frac_train:
:param frac_valid:
:param frac_test:
:param seed;
:return: train, valid, test slices of the input dataset obj
"""
np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
if task_idx != None:
# filter based on null values in task_idx
# get task array
y_task = np.array([data.y[task_idx].item() for data in dataset])
# boolean array that correspond to non null values
non_null = y_task != null_value
smiles_list = list(compress(enumerate(smiles_list), non_null))
else:
non_null = np.ones(len(dataset)) == 1
smiles_list = list(compress(enumerate(smiles_list), non_null))
rng = np.random.RandomState(seed)
scaffolds = defaultdict(list)
for ind, smiles in smiles_list:
scaffold = generate_scaffold(smiles, include_chirality=True)
scaffolds[scaffold].append(ind)
scaffold_sets = rng.permutation(list(scaffolds.values()))
n_total_valid = int(np.floor(frac_valid * len(dataset)))
n_total_test = int(np.floor(frac_test * len(dataset)))
train_idx = []
valid_idx = []
test_idx = []
for scaffold_set in scaffold_sets:
if len(valid_idx) + len(scaffold_set) <= n_total_valid:
valid_idx.extend(scaffold_set)
elif len(test_idx) + len(scaffold_set) <= n_total_test:
test_idx.extend(scaffold_set)
else:
train_idx.extend(scaffold_set)
train_dataset = dataset[torch.tensor(train_idx)]
valid_dataset = dataset[torch.tensor(valid_idx)]
test_dataset = dataset[torch.tensor(test_idx)]
return train_dataset, valid_dataset, test_dataset
the split_class_graphs code is:
def split_class_graphs(dataset):
y_list = []
for data in dataset:
y_list.append(tuple(data.y.tolist()))
num_classes = len(set(y_list))
y_cetos = set(y_list)
y_idxs = []
for y_ceto in y_cetos:
y_idxs.append([idx for idx, y in enumerate(y_list) if y == y_ceto])
# for i in y_idxs:
# print(len(i))
#print(y_list)
all_graphs_list = []
for graph in dataset:
adj = to_dense_adj(graph.edge_index)[0].numpy()
#print(adj)
all_graphs_list.append(adj)
class_graphs = []
for class_label in set(y_list):
c_graph_list = [all_graphs_list[i] for i in range(len(y_list)) if y_list[i] == class_label]
print(len(c_graph_list))
class_graphs.append( ( np.array(class_label), c_graph_list ) )
return class_graphs, y_idxs
and report the error:
[03:31:05] WARNING: not removing hydrogen atom without neighbors
[03:31:05] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:06] WARNING: not removing hydrogen atom without neighbors
[03:31:07] WARNING: not removing hydrogen atom without neighbors
[03:31:07] WARNING: not removing hydrogen atom without neighbors
Traceback (most recent call last):
File "graohon_get.py", line 208, in <module>
class_graphs, y_idxs = split_class_graphs(train_dataset)
File "graohon_get.py", line 64, in split_class_graphs
adj = to_dense_adj(graph.edge_index)[0].numpy()
File "/opt/conda/lib/python3.7/site-packages/torch_geometric/utils/to_dense_adj.py", line 22, in to_dense_adj
batch = edge_index.new_zeros(edge_index.max().item() + 1)
RuntimeError: max(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.
The dataset is contained in MoleculeNet, I download in http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip
if run with Tox21 ToxCast SIDER ClinTox MUV datasets will report this error, but datasets BACE BBBP HIV will not reported.
Could you help me ?
thanks!
Environment
PyG version: 2.0.4
PyTorch version:1.9.0
OS:centos7
Python version:3.7.4
CUDA/cuDNN version:20.2
How you installed PyTorch and PyG (conda, pip, source):pip
Any other relevant information (e.g., version of torch-scatter):
torch 1.9.0
torch-cluster 1.6.0
torch-geometric 2.0.4
torch-points-kernels 0.6.10
torch-scatter 2.0.9
torch-sparse 0.6.12
torch-spline-conv 1.2.1
The text was updated successfully, but these errors were encountered:
🐛 Describe the bug
my code is
the ```MoleculeDataset`` code
the
scaffold_split
code isthe
split_class_graphs
code is:and report the error:
The dataset is contained in MoleculeNet, I download in
http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip
if run with
Tox21 ToxCast SIDER ClinTox MUV
datasets will report this error, but datasetsBACE BBBP HIV
will not reported.Could you help me ?
thanks!
Environment
conda
,pip
, source):piptorch-scatter
):torch 1.9.0
torch-cluster 1.6.0
torch-geometric 2.0.4
torch-points-kernels 0.6.10
torch-scatter 2.0.9
torch-sparse 0.6.12
torch-spline-conv 1.2.1
The text was updated successfully, but these errors were encountered: