{
  "projects": {
    "project_1": {
      "id": "project_1",
      "name": "task agnostic mask learning in distilbert",
      "research_question": "how does tamt helps in improving perfomance in distilbert",
      "keywords": [
        "distilbert",
        "bert",
        "earlybert"
      ],
      "papers": [
        {
          "title": "Automated Personnel Selection for Software Engineers Using LLM-Based Profile Evaluation",
          "authors": [
            "Ahmed Akib Jawad Karim",
            "Shahria Hoque",
            "Md. Golam Rabiul Alam",
            "Md. Zia Uddin"
          ],
          "abstract": "Organizational success in todays competitive employment market depends on\nchoosing the right staff. This work evaluates software engineer profiles using\nan automated staff selection method based on advanced natural language\nprocessing (NLP) techniques. A fresh dataset was generated by collecting\nLinkedIn profiles with important attributes like education, experience, skills,\nand self-introduction. Expert feedback helped transformer models including\nRoBERTa, DistilBERT, and a customized BERT variation, LastBERT, to be adjusted.\nThe models were meant to forecast if a candidate's profile fit the selection\ncriteria, therefore allowing automated ranking and assessment. With 85%\naccuracy and an F1 score of 0.85, RoBERTa performed the best; DistilBERT\nprovided comparable results at less computing expense. Though light, LastBERT\nproved to be less effective, with 75% accuracy. The reusable models provide a\nscalable answer for further categorization challenges. This work presents a\nfresh dataset and technique as well as shows how transformer models could\nimprove recruiting procedures. Expanding the dataset, enhancing model\ninterpretability, and implementing the system in actual environments will be\npart of future activities.",
          "published_date": "2024-10-30",
          "year": 2024,
          "url": "http://arxiv.org/abs/2410.23365v2",
          "pdf_url": "http://arxiv.org/pdf/2410.23365v2",
          "source": "ArXiv",
          "arxiv_id": "2410.23365v2",
          "categories": [
            "cs.SE"
          ],
          "doi": null
        },
        {
          "title": "Compositional and Lexical Semantics in RoBERTa, BERT and DistilBERT: A Case Study on CoQA",
          "authors": [
            "Ieva Stali\u016bnait\u0117",
            "Ignacio Iacobacci"
          ],
          "abstract": "Many NLP tasks have benefited from transferring knowledge from contextualized\nword embeddings, however the picture of what type of knowledge is transferred\nis incomplete. This paper studies the types of linguistic phenomena accounted\nfor by language models in the context of a Conversational Question Answering\n(CoQA) task. We identify the problematic areas for the finetuned RoBERTa, BERT\nand DistilBERT models through systematic error analysis - basic arithmetic\n(counting phrases), compositional semantics (negation and Semantic Role\nLabeling), and lexical semantics (surprisal and antonymy). When enhanced with\nthe relevant linguistic knowledge through multitask learning, the models\nimprove in performance. Ensembles of the enhanced models yield a boost between\n2.2 and 2.7 points in F1 score overall, and up to 42.1 points in F1 on the\nhardest question classes. The results show differences in ability to represent\ncompositional and lexical information between RoBERTa, BERT and DistilBERT.",
          "published_date": "2020-09-17",
          "year": 2020,
          "url": "http://arxiv.org/abs/2009.08257v1",
          "pdf_url": "http://arxiv.org/pdf/2009.08257v1",
          "source": "ArXiv",
          "arxiv_id": "2009.08257v1",
          "categories": [
            "cs.CL",
            "cs.LG"
          ],
          "doi": null
        },
        {
          "title": "Table 8: Textual features only extracted by DistilBERT.",
          "authors": [],
          "abstract": "",
          "published_date": "",
          "year": null,
          "url": "https://doi.org/10.7717/peerj-cs.2252/table-8",
          "source": "Crossref",
          "doi": "10.7717/peerj-cs.2252/table-8",
          "journal": "",
          "type": "component"
        },
        {
          "title": "Analysing Natural Language Processing Techniques: A Comparative Study of NLTK, spaCy, BERT, and DistilBERT on Customer Query Datasets.",
          "authors": [
            "Patrizia De Camillis"
          ],
          "abstract": "",
          "published_date": "",
          "year": null,
          "url": "https://doi.org/10.63227/357.566.84",
          "source": "Crossref",
          "doi": "10.63227/357.566.84",
          "journal": "",
          "type": "dissertation"
        }
      ],
      "notes": [],
      "status": "active",
      "user_id": "admin_user",
      "created_at": "2025-07-15T00:15:05.561448",
      "updated_at": "2025-07-15T00:15:36.162269"
    }
  },
  "counter": 1
}