start-ver=1.4
cd-journal=joma
no-vol=10
cd-vols=
no-issue=
article-no=
start-page=70053
end-page=70067
dt-received=
dt-revised=
dt-accepted=
dt-pub-year=2022
dt-pub=2022
dt-online=
en-article=
kn-article=
en-subject=
kn-subject=
en-title=
kn-title=Improvement and Evaluation of Data Consistency Metric CIL for Software Engineering Data Sets
en-subtitle=
kn-subtitle=
en-abstract=
kn-abstract=Software data sets derived from actual software products and their development processes are widely used for project planning, management, quality assurance and process improvement, etc. Although it is demonstrated that certain data sets are not fit for these purposes, the data quality of data sets is often not assessed before using them. The principal reason for this is that there are not many metrics quantifying fitness of software development data. In that respect, this study makes an effort to fill in the void in literature by devising a new and efficient assessment method of data quality. To that end, we start as a reference from Case Inconsistency Level (CIL), which counts the number of inconsistent project pairs in a data set to evaluate its consistency. Based on a follow-up evaluation with a large sample set, we depict that CIL is not effective in evaluating the quality of certain data sets. By studying the problems associated with CIL and eliminating them, we propose an improved metric called Similar Case Inconsistency Level (SCIL). Our empirical evaluation with 54 data samples derived from six large project data sets shows that SCIL can distinguish between consistent and inconsistent data sets, and that prediction models for software development effort and productivity built from consistent data sets achieve indeed a relatively higher accuracy.
en-copyright=
kn-copyright=

en-aut-name=GanMaohua
en-aut-sei=Gan
en-aut-mei=Maohua
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=1
ORCID=

en-aut-name=YucelZeynep
en-aut-sei=Yucel
en-aut-mei=Zeynep
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=2
ORCID=

en-aut-name=MondenAkito
en-aut-sei=Monden
en-aut-mei=Akito
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=3
ORCID=

affil-num=1
en-affil=Graduate School of Natural Science and Technology, Okayama University
kn-affil=

affil-num=2
en-affil=Graduate School of Natural Science and Technology, Okayama University
kn-affil=

affil-num=3
en-affil=Graduate School of Natural Science and Technology, Okayama University
kn-affil=
en-keyword=Software
kn-keyword=Software
en-keyword=Measurement
kn-keyword=Measurement
en-keyword=Estimation
kn-keyword=Estimation
en-keyword=Data integrity
kn-keyword=Data integrity
en-keyword=Redundancy
kn-keyword=Redundancy
en-keyword=Data models
kn-keyword=Data models
en-keyword=Software engineering
kn-keyword=Software engineering
en-keyword=Data quality metric
kn-keyword=Data quality metric
en-keyword=data inconsistency
kn-keyword=data inconsistency
en-keyword=software project data analysis
kn-keyword=software project data analysis
en-keyword=software effort estimation
kn-keyword=software effort estimation
en-keyword=software productivity estimation
kn-keyword=software productivity estimation
END

start-ver=1.4
cd-journal=joma
no-vol=9
cd-vols=
no-issue=
article-no=
start-page=85795
end-page=85812
dt-received=
dt-revised=
dt-accepted=
dt-pub-year=2021
dt-pub=2021
dt-online=
en-article=
kn-article=
en-subject=
kn-subject=
en-title=
kn-title=Modeling and Predictability Analysis on Channel Spectrum Status Over Heavy Wireless LAN Traffic Environment
en-subtitle=
kn-subtitle=
en-abstract=
kn-abstract=Using the real wireless spectrum occupancy status in 2.4 and 5 GHz bands collected at a railway station as representative of a heavy wireless LAN (WLAN) traffic environment, this paper studies the modeling of durations of busy/idle (B/I) status and its predictability based on predictability theory. We first measure and model the channel status in the heavy traffic environment over almost all of the WLAN channels at 2.4 GHz and 5 GHz bands in a busy (rush hour) period and non-busy period. Then, using two selected channels at 2.4 GHz and 5 GHz bands, we analyze the upper bound (UB) and lower bound (LB) of predictability of the busy/idle durations based on predictability theory. The analysis shows that the LB predictability of durations can be easily increased by changing their probability distribution. Based on this property, we introduce the data categorization (DC) method. By categorizing the busy/idle durations into different streams, the proposed data categorization can improve the prediction performance of some streams with large LB predictability, even if it employs a simple low-complexity auto-regressive (AR) predictor.
en-copyright=
kn-copyright=

en-aut-name=HouYafei
en-aut-sei=Hou
en-aut-mei=Yafei
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=1
ORCID=

en-aut-name=WebberJulian
en-aut-sei=Webber
en-aut-mei=Julian
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=2
ORCID=

en-aut-name=YanoKazuto
en-aut-sei=Yano
en-aut-mei=Kazuto
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=3
ORCID=

en-aut-name=KawasakiShun
en-aut-sei=Kawasaki
en-aut-mei=Shun
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=4
ORCID=

en-aut-name=DennoSatoshi
en-aut-sei=Denno
en-aut-mei=Satoshi
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=5
ORCID=

en-aut-name=SuzukiYoshinori
en-aut-sei=Suzuki
en-aut-mei=Yoshinori
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=6
ORCID=

affil-num=1
en-affil=Natural Science and Technology, Institute of Academic and Research, Okayama University
kn-affil=

affil-num=2
en-affil=Graduate School of Engineering Science, Osaka University
kn-affil=

affil-num=3
en-affil=Wave Engineering Laboratory, Advanced Telecommunications Research Institute International
kn-affil=

affil-num=4
en-affil=Natural Science and Technology, Institute of Academic and Research, Okayama University
kn-affil=

affil-num=5
en-affil=Natural Science and Technology, Institute of Academic and Research, Okayama University
kn-affil=

affil-num=6
en-affil=Wave Engineering Laboratory, Advanced Telecommunications Research Institute International
kn-affil=
en-keyword=Wireless LAN
kn-keyword=Wireless LAN
en-keyword=Wireless communication
kn-keyword=Wireless communication
en-keyword=Predictive models
kn-keyword=Predictive models
en-keyword=Data models
kn-keyword=Data models
en-keyword=Analytical models
kn-keyword=Analytical models
en-keyword=Rail transportation
kn-keyword=Rail transportation
en-keyword=Protocols
kn-keyword=Protocols
en-keyword=Spectrum usage model
kn-keyword=Spectrum usage model
en-keyword=heavy WLAN traffic environment
kn-keyword=heavy WLAN traffic environment
en-keyword=cognitive radio
kn-keyword=cognitive radio
en-keyword=predictability theory
kn-keyword=predictability theory
en-keyword=auto-regressive predictor
kn-keyword=auto-regressive predictor
en-keyword=data categorization
kn-keyword=data categorization
END

start-ver=1.4
cd-journal=joma
no-vol=9
cd-vols=
no-issue=
article-no=
start-page=98048
end-page=98059
dt-received=
dt-revised=
dt-accepted=
dt-pub-year=2021
dt-pub=2021
dt-online=
en-article=
kn-article=
en-subject=
kn-subject=
en-title=
kn-title=Studying the Applicability of Generative Adversarial Networks on HEp-2 Cell Image Augmentation
en-subtitle=
kn-subtitle=
en-abstract=
kn-abstract=The Anti-Nuclear Antibodies (ANAs) testing is the primary serological diagnosis screening test for autoimmune diseases. ANAs testing is conducted mainly by the Indirect Immunofluorescence (IIF) on Human Epithelial cell-substrate (HEp-2) protocol. However, due to its high variability, human-subjectivity, and low throughput, there is an insistent need to develop an efficient Computer-Aided Diagnosis system (CADs) to automate this protocol. Many recently proposed Convolutional Neural Networks (CNNs) demonstrated promising results in HEp-2 cell image classification, which is the main task of the HE-p2 IIF protocol. However, the lack of large labeled datasets is still the main challenge in this field. This work provides a detailed study of the applicability of using generative adversarial networks (GANs) algorithms as an augmentation method. Different types of GANs were employed to synthesize HEp-2 cell images to address the data scarcity problem. For systematic comparison, empirical quantitative metrics were implemented to evaluate different GAN models' performance of learning the real data representations. The results of this work showed that though the high visual similarity with the real images, GANs' capacity to generate diverse data is still limited. This deficiency in the generated data diversity is found to be of a crucial impact when used as a standalone method for augmentation. However, combining limited-size GANs-generated data with classic augmentation improves the classification accuracy across different variants of CNNs. Our results demonstrated a competitive performance for the overall classification accuracy and the mean class accuracy of the HEp-2 cell image classification task.
en-copyright=
kn-copyright=

en-aut-name=AnaamAsaad
en-aut-sei=Anaam
en-aut-mei=Asaad
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=1
ORCID=

en-aut-name=Bu-OmerHani M.
en-aut-sei=Bu-Omer
en-aut-mei=Hani M.
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=2
ORCID=

en-aut-name=GofukuAkio
en-aut-sei=Gofuku
en-aut-mei=Akio
kn-aut-name=
kn-aut-sei=
kn-aut-mei=
aut-affil-num=3
ORCID=

affil-num=1
en-affil=Graduate School of Interdisciplinary Science and Engineering in Health Systems, Okayama University
kn-affil=

affil-num=2
en-affil=Graduate School of Interdisciplinary Science and Engineering in Health Systems, Okayama University
kn-affil=

affil-num=3
en-affil=Graduate School of Interdisciplinary Science and Engineering in Health Systems, Okayama University
kn-affil=
en-keyword=Computer architecture
kn-keyword=Computer architecture
en-keyword=Task analysis
kn-keyword=Task analysis
en-keyword=Microprocessors
kn-keyword=Microprocessors
en-keyword=Generative adversarial networks
kn-keyword=Generative adversarial networks
en-keyword=Biomedical imaging
kn-keyword=Biomedical imaging
en-keyword=Measurement
kn-keyword=Measurement
en-keyword=Feature extraction
kn-keyword=Feature extraction
en-keyword=Computer-aided diagnosis systems (CADs)
kn-keyword=Computer-aided diagnosis systems (CADs)
en-keyword=convolutional neural networks (CNNs)
kn-keyword=convolutional neural networks (CNNs)
en-keyword=data augmentation
kn-keyword=data augmentation
en-keyword=data diversity
kn-keyword=data diversity
en-keyword=evaluation metrics
kn-keyword=evaluation metrics
en-keyword=generative adversarial networks (GANs)
kn-keyword=generative adversarial networks (GANs)
en-keyword=HEp-2 cell image classification
kn-keyword=HEp-2 cell image classification
END