Skip to content

Commit

Permalink
Merge pull request #3 from p2m2/sentence_transformer
Browse files Browse the repository at this point in the history
Sentence transformer
  • Loading branch information
ofilangi authored Oct 28, 2024
2 parents 7488fb3 + b6ac27e commit 782a325
Show file tree
Hide file tree
Showing 25 changed files with 320 additions and 675 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,10 @@ Run a specific test file
```bash
python -m unittest tests/similarity/test_model_embedding_manager.py
```
```bash
python -m llm_semantic_annotator.similarity_evaluator
```
84 changes: 0 additions & 84 deletions config/all-demo.json

This file was deleted.

32 changes: 0 additions & 32 deletions config/chmo.json

This file was deleted.

22 changes: 14 additions & 8 deletions config/foodon-demo.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,33 @@
"batch_size" : 32,

"populate_owl_tag_embeddings" : {
"prefix" : {
"rdfs" : "http://www.w3.org/2000/01/rdf-schema#",
"obo" : "http://purl.obolibrary.org/obo/",
"owl" : "http://www.w3.org/2002/07/owl#"
},
"ontologies": {
"foodon_link" : {
"foodon": {
"url": "https://github.com/FoodOntology/foodon/raw/refs/tags/v2024-07-12/foodon.owl",
"prefix": "http://purl.obolibrary.org/obo/",
"prefix": "http://purl.obolibrary.org/obo/FOODON_",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
"label" : "rdfs:label",
"properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"]
}
}
},
"debug_nb_terms_by_ontology" : -1
},
"populate_abstract_embeddings" : {
"abstracts_per_file" : 50,
"from_file" : {
"json_files" : [
"data/abstracts/abstracts_1.json",
"data/abstracts/abstracts_2.json"
"from_ncbi_api" : {
"ncbi_api_chunk_size" : 200,
"debug_nb_ncbi_request" : -1,
"retmax" : 2000,
"selected_term" : [
"food"
]
}

}
}
22 changes: 17 additions & 5 deletions config/mesh-demo.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
{
"encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
"threshold_similarity_tag_chunk" : 0.60,
"threshold_similarity_tag_chunk" : 0.70,
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

"populate_owl_tag_embeddings" : {
"prefix" : {
"rdf" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs" : "http://www.w3.org/2000/01/rdf-schema#",
"mesh" : "http://id.nlm.nih.gov/mesh/D000478",
"meshv" : "http://id.nlm.nih.gov/mesh/vocab#",
"owl" : "http://www.w3.org/2002/07/owl#"
},
"ontologies": {
"mesh_link" : {
"mesh": {
"filepath" : "data/mesh/mesh_concept.nt",
"prefix": "http://id.nlm.nih.gov/mesh/",
"filepath" : "data/mesh/mesh.nt",
"prefix": "http://id.nlm.nih.gov/mesh/M",
"format": "nt",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://id.nlm.nih.gov/mesh/vocab#scopeNote>"]
"label" : "rdfs:label",
"properties": ["<http://id.nlm.nih.gov/mesh/vocab#scopeNote>"],
"constraints" : {
"meshv:active" : "true",
"rdf:type" : "meshv:Concept"
}

}
}
},
Expand Down
84 changes: 0 additions & 84 deletions config/ms-demo.json

This file was deleted.

26 changes: 23 additions & 3 deletions config/ncbi-taxon-demo.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,29 @@
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

"populate_ncbi_taxon_tag_embeddings" : {
"regex" : "(assic.*)|(arab.*)" ,
"tags_per_file" : 2000
"populate_owl_tag_embeddings" : {
"prefix" : {
"rdfs" : "http://www.w3.org/2000/01/rdf-schema#",
"obo" : "http://purl.obolibrary.org/obo/",
"owl" : "http://www.w3.org/2002/07/owl#",
"ncbitaxon" : "http://purl.obolibrary.org/obo/ncbitaxon#",
"NCBITaxon" : "http://purl.obolibrary.org/obo/NCBITaxon_"
},
"ontologies": {
"taxon_link" : {
"ncbitaxon": {
"url": "http://purl.obolibrary.org/obo/ncbitaxon.owl",
"prefix": "http://purl.obolibrary.org/obo/NCBITaxon_",
"selected_prefix_term": "http://purl.obolibrary.org/obo/NCBITaxon_37",
"format": "xml",
"label" : "rdfs:label",
"properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"],
"constraints": {
"ncbitaxon:has_rank" : "NCBITaxon:species"
}
}
}
}
},
"populate_abstract_embeddings" : {
"abstracts_per_file" : 50,
Expand Down
42 changes: 42 additions & 0 deletions config/planteome-demo-only-TO-0000394.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"encodeur" : "sentence-transformers/all-MiniLM-L6-v2",
"threshold_similarity_tag_chunk" : 0.70,
"threshold_similarity_tag" : 0.80,
"batch_size" : 32,

"populate_owl_tag_embeddings" : {
"prefix" : {
"rdfs" : "http://www.w3.org/2000/01/rdf-schema#",
"obo" : "http://purl.obolibrary.org/obo/",
"owl" : "http://www.w3.org/2002/07/owl#"
},
"ontologies": {
"planteome_link" : {
"to": {
"url": "http://purl.obolibrary.org/obo/to.owl",
"prefix": "http://purl.obolibrary.org/obo/TO_",
"selected_prefix_term": "http://purl.obolibrary.org/obo/TO_000039",
"format": "xml",
"label" : "rdfs:label",
"properties": ["obo:IAO_0000115","rdfs:comment","owl:annotatedTarget"],
"constraints" : {
"oboInOwl:hasOBONamespace" : "'plant_trait_ontology'"
}
}
}
},
"debug_nb_terms_by_ontology" : -1
},
"populate_abstract_embeddings" : {
"abstracts_per_file" : 500,
"from_ncbi_api" : {
"ncbi_api_chunk_size" : 200,
"debug_nb_ncbi_request" : -1,
"retmax" : 2000,
"selected_term" : [
"Crops%2C+Agricultural%2Fmetabolism%5BMeSH%5D"
]
}

}
}
9 changes: 2 additions & 7 deletions config/planteome-demo.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"peco": {
"url": "http://purl.obolibrary.org/obo/peco.owl",
"prefix": "http://purl.obolibrary.org/obo/PECO_",
"format": "xml",
"format": "xml",
"label" : "<http://www.w3.org/2000/01/rdf-schema#label>",
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
},
Expand All @@ -36,12 +36,7 @@
"properties": ["<http://purl.obolibrary.org/obo/IAO_0000115>"]
}
}
},
"debug_nb_terms_by_ontology" : -1
},
"populate_ncbi_taxon_tag_embeddings" : {
"regex" : "(assic.*)|(arab.*)" ,
"tags_per_file" : 2000
}
},
"populate_abstract_embeddings" : {
"abstracts_per_file" : 500,
Expand Down
Loading

0 comments on commit 782a325

Please sign in to comment.