bib-rapport.bib
No OneTemporary
Actions

Size

16 KB

Subscribers

None

bib-rapport.bib
View Options

	@misc{Aylien16,
	Author = {Aylien},
	Date-Added = {2018-02-17 20:56:11 +0000},
	Date-Modified = {2018-02-17 21:00:33 +0000},
	Howpublished = {\url{http://blog.aylien.com/source-code-classification-using-deep-learning/}},
	Keywords = {data science, research},
	Month = {August},
	Title = {Source Code Classification Using Deep Learning [blog post]},
	Year = {2016}}

	@misc{universal-ctags,
	Author = {Universal Ctags Team},
	Date-Added = {2018-02-17 20:53:07 +0000},
	Date-Modified = {2018-02-17 20:54:44 +0000},
	Howpublished = {\url{http://ctags.io/}},
	Title = {Universal Ctags},
	Year = {2001--2018}}

	@misc{sloccount,
	Author = {David A. Wheeler},
	Date-Added = {2018-02-17 20:47:15 +0000},
	Date-Modified = {2018-02-17 20:51:51 +0000},
	Howpublished = {\url{https://www.dwheeler.com/sloccount/}},
	Title = {SLOCCount},
	Year = {2004--2018}}

	@misc{cloc,
	Author = {Al Danial},
	Date-Added = {2018-02-17 20:46:02 +0000},
	Date-Modified = {2018-02-17 20:46:38 +0000},
	Howpublished = {\url{https://github.com/AlDanial/cloc}},
	Title = {cloc},
	Year = {2006--2018}}

	@misc{guesslang,
	Author = {Y. Somda},
	Date-Added = {2018-02-17 20:27:54 +0000},
	Date-Modified = {2018-02-17 20:43:42 +0000},
	Howpublished = {\url{http://guesslang.readthedocs.io/}},
	Title = {Guesslang},
	Year = {2017--2018}}

	@misc{linguist,
	Author = {Github},
	Date-Added = {2018-02-17 20:21:27 +0000},
	Date-Modified = {2018-02-17 20:26:46 +0000},
	Howpublished = {\url{https://github.com/github/linguist}},
	Title = {Linguist},
	Year = {2011--2018}}

	@misc{ohcount,
	Author = {Black Duck Software},
	Date-Added = {2018-02-17 20:11:31 +0000},
	Date-Modified = {2018-02-17 21:03:52 +0000},
	Title = {Ohcount},
	Howpublished = {\url{https://github.com/blackducksoftware/ohcount}},
	Year = {2008--2018}}

	@inproceedings{vanDam16,
	Author = {J. K. v. Dam and V. Zaytsev},
	Booktitle = {2016 IEEE 23rd International Conference on Software Analysis, Evolution, and Reengineering (SANER)},
	Doi = {10.1109/SANER.2016.92},
	Keywords = {meta data;natural language processing;pattern classification;program diagnostics;software maintenance;text analysis;embedded code fragments;file extensions;grammar-based text analysis;keyword search;legacy code analysis;multinominal naïve Bayes;n-grams;natural language classifiers;natural language processing field;normalised compression distance;skip-grams;software artefact metadata;software language identification;statistical language models;universal IDE support;Cascading style sheets;HTML;Java;Natural languages;Software;Training;Training data;language identification;natural language processing;software language engineering},
	Month = {March},
	Pages = {624-628},
	Title = {Software Language Identification with Natural Language Classifiers},
	Volume = {1},
	Year = {2016},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/SANER.2016.92}}

	@article{Klein11,
	Archiveprefix = {arXiv},
	Author = {David Klein and Kyle Murray and Simon Weber},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.org/rec/bib/journals/corr/abs-1106-4064},
	Eprint = {1106.4064},
	Journal = {CoRR},
	Timestamp = {Wed, 07 Jun 2017 14:41:07 +0200},
	Title = {Algorithmic Programming Language Identification},
	Url = {http://arxiv.org/abs/1106.4064},
	Volume = {abs/1106.4064},
	Year = {2011},
	Bdsk-Url-1 = {http://arxiv.org/abs/1106.4064}}

	@inproceedings{Gilda17,
	Author = {S. Gilda},
	Booktitle = {2017 14th International Joint Conference on Computer Science and Software Engineering (JCSSE)},
	Doi = {10.1109/JCSSE.2017.8025917},
	Keywords = {feature extraction;learning (artificial intelligence);neural nets;pattern classification;programming languages;software engineering;source code (software);artificial neural network;convolutional neural network;file extension;intelligent feature extraction;multilayer neural network;neural networks;programming languages;software development industry;source code classification;supervised learning;word embedding layers;Feature extraction;HTML;Syntactics;Training;Artificial neural network;Feature extraction;Multi-layer neural network;Supervised learning},
	Month = {July},
	Pages = {1-6},
	Title = {Source code classification using Neural Networks},
	Year = {2017},
	Bdsk-Url-1 = {http://dx.doi.org/10.1109/JCSSE.2017.8025917}}

	@article{Zevin17,
	Archiveprefix = {arXiv},
	Author = {Shaul Zevin and Catherine Holzem},
	Bibsource = {dblp computer science bibliography, http://dblp.org},
	Biburl = {http://dblp.org/rec/bib/journals/corr/ZevinH17},
	Eprint = {1703.07638},
	Journal = {CoRR},
	Timestamp = {Wed, 07 Jun 2017 14:41:28 +0200},
	Title = {Machine Learning Based Source Code Classification Using Syntax Oriented Features},
	Url = {http://arxiv.org/abs/1703.07638},
	Volume = {abs/1703.07638},
	Year = {2017},
	Bdsk-Url-1 = {http://arxiv.org/abs/1703.07638}}

	@inproceedings{Ugurel02,
	Acmid = {775141},
	Address = {New York, NY, USA},
	Author = {Ugurel, Secil and Krovetz, Robert and Giles, C. Lee},
	Booktitle = {Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
	Doi = {10.1145/775047.775141},
	Isbn = {1-58113-567-X},
	Location = {Edmonton, Alberta, Canada},
	Numpages = {7},
	Pages = {632--638},
	Publisher = {ACM},
	Series = {KDD '02},
	Title = {What's the Code?: Automatic Classification of Source Code Archives},
	Url = {http://doi.acm.org/10.1145/775047.775141},
	Year = {2002},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/775047.775141},
	Bdsk-Url-2 = {http://dx.doi.org/10.1145/775047.775141}}

	@inproceedings{Wang15,
	author = {Peng Wang and
	Jiaming Xu and
	Bo Xu and
	Cheng{-}Lin Liu and
	Heng Zhang and
	Fangyuan Wang and
	Hongwei Hao},
	title = {Semantic Clustering and Convolutional Neural Network for Short Text
	Categorization},
	booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational
	Linguistics and the 7th International Joint Conference on Natural
	Language Processing of the Asian Federation of Natural Language Processing,
	{ACL} 2015, July 26-31, 2015, Beijing, China, Volume 2: Short Papers},
	pages = {352--357},
	year = {2015},
	url = {http://aclweb.org/anthology/P/P15/P15-2058.pdf},
	timestamp = {Mon, 03 Aug 2015 08:13:34 +0200},
	biburl = {http://dblp.org/rec/bib/conf/acl/WangXXLZWH15},
	bibsource = {dblp computer science bibliography, http://dblp.org}
	}

	@inproceedings{Khasnabish14,
	author = {Jyotiska Nath Khasnabish and
	Mitali Sodhi and
	Jayati Deshmukh and
	G. Srinivasaraghavan},
	title = {Detecting Programming Language from Source Code Using Bayesian Learning
	Techniques},
	booktitle = {Machine Learning and Data Mining in Pattern Recognition - 10th International
	Conference, {MLDM} 2014, St. Petersburg, Russia, July 21-24, 2014.
	Proceedings},
	pages = {513--522},
	year = {2014},
	url = {https://doi.org/10.1007/978-3-319-08979-9_39},
	doi = {10.1007/978-3-319-08979-9_39},
	timestamp = {Wed, 17 May 2017 14:25:11 +0200},
	biburl = {http://dblp.org/rec/bib/conf/mldm/KhasnabishSDS14},
	bibsource = {dblp computer science bibliography, http://dblp.org}
	}

	@misc{Heres16,
	Author = {Daniël Heres},
	Howpublished = {\url{http://blog.aylien.com/source-code-classification-using-deep-learning/}},
	Month = {July},
	Title = {Detecting the Programming Language of Source Code Snippets using Machine Learning and Neural Networks [blog post]},
	Year = {2016}}

	@Inbook{Aggarwal12,
	author={Aggarwal, Charu C.
	and Zhai, ChengXiang},
	editor={Aggarwal, Charu C.
	and Zhai, ChengXiang},
	title={A Survey of Text Classification Algorithms},
	bookTitle={Mining Text Data},
	year={2012},
	publisher={Springer US},
	address={Boston, MA},
	pages={163--222},
	abstract={The problem of classification has been widely studied in the data mining, machine learning, database, and information retrieval communities with applications in a number of diverse domains, such as target marketing, medical diagnosis, news group filtering, and document organization. In this paper we will provide a survey of a wide variety of text classification algorithms.},
	isbn={978-1-4614-3223-4},
	doi={10.1007/978-1-4614-3223-4_6},
	url={https://doi.org/10.1007/978-1-4614-3223-4_6}
	}

	@article{Chen09,
	title = {Feature selection for text classification with Naïve Bayes},
	journal = {Expert Systems with Applications},
	volume = {36},
	number = {3, Part 1},
	pages = {5432 - 5435},
	year = {2009},
	issn = {0957-4174},
	doi = {https://doi.org/10.1016/j.eswa.2008.06.054},
	url = {http://www.sciencedirect.com/science/article/pii/S0957417408003564},
	author = {Jingnian Chen and Houkuan Huang and Shengfeng Tian and Youli Qu},
	keywords = {Text classification, Feature selection, Text preprocessing, Naïve Bayes}
	}

	@misc{MLatB16,
	Author = {Machine Learning at Berkeley},
	Howpublished = {\url{https://ml.berkeley.edu/blog/2016/12/03/github/}},
	Keywords = {data science, research},
	Month = {December},
	Title = {Github Programming Language Classification [blog post]},
	Year = {2016}
	}

	@article{Cavnar94,
	title={N-gram-based text categorization},
	author={Cavnar, William B and Trenkle, John M and others},
	journal={Ann arbor mi},
	volume={48113},
	number={2},
	pages={161--175},
	year={1994},
	publisher={Citeseer}
	}

	@article{Kim15,
	author = {Yoon Kim and
	Yacine Jernite and
	David Sontag and
	Alexander M. Rush},
	title = {Character-Aware Neural Language Models},
	journal = {CoRR},
	volume = {abs/1508.06615},
	year = {2015},
	url = {http://arxiv.org/abs/1508.06615},
	archivePrefix = {arXiv},
	eprint = {1508.06615},
	timestamp = {Wed, 07 Jun 2017 14:41:17 +0200},
	biburl = {https://dblp.org/rec/bib/journals/corr/KimJSR15},
	bibsource = {dblp computer science bibliography, https://dblp.org}
	}

	@article{Kim14,
	author = {Yoon Kim},
	title = {Convolutional Neural Networks for Sentence Classification},
	journal = {CoRR},
	volume = {abs/1408.5882},
	year = {2014},
	url = {http://arxiv.org/abs/1408.5882},
	archivePrefix = {arXiv},
	eprint = {1408.5882},
	timestamp = {Wed, 07 Jun 2017 14:40:07 +0200},
	biburl = {https://dblp.org/rec/bib/journals/corr/Kim14f},
	bibsource = {dblp computer science bibliography, https://dblp.org}
	}

	@inproceedings{kenlm,
	author = {Kenneth Heafield},
	title = {{KenLM:} Faster and Smaller Language Model Queries},
	year = {2011},
	month = {July},
	booktitle = {Proceedings of the {EMNLP} 2011 Sixth Workshop on Statistical Machine Translation},
	address = {Edinburgh, Scotland, United Kingdom},
	pages = {187--197},
	url = {https://kheafield.com/papers/avenue/kenlm.pdf},
	}

	@article{scikit-learn,
	title={Scikit-learn: Machine Learning in {P}ython},
	author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
	and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
	and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
	Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
	journal={Journal of Machine Learning Research},
	volume={12},
	pages={2825--2830},
	year={2011}
	}

	@misc{keras,
	title={Keras},
	author={Chollet, Fran\c{c}ois and others},
	year={2015},
	howpublished={\url{https://keras.io}},
	}

	@misc{tensorflow2015-whitepaper,
	title={ {TensorFlow}: Large-Scale Machine Learning on Heterogeneous Systems},
	howpublished={\url{https://www.tensorflow.org/}},
	author={
	Mart\'{\i}n~Abadi and
	Ashish~Agarwal and
	Paul~Barham and
	Eugene~Brevdo and
	Zhifeng~Chen and
	Craig~Citro and
	Greg~S.~Corrado and
	Andy~Davis and
	Jeffrey~Dean and
	Matthieu~Devin and
	Sanjay~Ghemawat and
	Ian~Goodfellow and
	Andrew~Harp and
	Geoffrey~Irving and
	Michael~Isard and
	Yangqing Jia and
	Rafal~Jozefowicz and
	Lukasz~Kaiser and
	Manjunath~Kudlur and
	Josh~Levenberg and
	Dandelion~Man\'{e} and
	Rajat~Monga and
	Sherry~Moore and
	Derek~Murray and
	Chris~Olah and
	Mike~Schuster and
	Jonathon~Shlens and
	Benoit~Steiner and
	Ilya~Sutskever and
	Kunal~Talwar and
	Paul~Tucker and
	Vincent~Vanhoucke and
	Vijay~Vasudevan and
	Fernanda~Vi\'{e}gas and
	Oriol~Vinyals and
	Pete~Warden and
	Martin~Wattenberg and
	Martin~Wicke and
	Yuan~Yu and
	Xiaoqiang~Zheng},
	year={2015},
	}


	@article{Gepperth16,
	Abstract = {We present a biologically inspired architecture for incremental learning that remains resource-efficient even in the face of very high data dimensionalities (>1000) that are typically associated with perceptual problems. In particular, we investigate how a new perceptual (object) class can be added to a trained architecture without retraining, while avoiding the well-known catastrophic forgetting effects typically associated with such scenarios. At the heart of the presented architecture lies a generative description of the perceptual space by a self-organized approach which at the same time approximates the neighborhood relations in this space on a two-dimensional plane. This approximation, which closely imitates the topographic organization of the visual cortex, allows an efficient local update rule for incremental learning even in the face of very high dimensionalities, which we demonstrate by tests on the well-known MNIST benchmark. We complement the model by adding a biologically plausible short-term memory system, allowing it to retain excellent classification accuracy even under incremental learning in progress. The short-term memory is additionally used to reinforce new data statistics by replaying previously stored samples during dedicated ``sleep'' phases.},
	Author = {Gepperth, Alexander and Karaoguz, Cem},
	Day = {01},
	Doi = {10.1007/s12559-016-9389-5},
	Issn = {1866-9964},
	Journal = {Cognitive Computation},
	Month = {Oct},
	Number = {5},
	Pages = {924--934},
	Title = {A Bio-Inspired Incremental Learning Architecture for Applied Perceptual Problems},
	Url = {https://doi.org/10.1007/s12559-016-9389-5},
	Volume = {8},
	Year = {2016},
	Bdsk-Url-1 = {https://doi.org/10.1007/s12559-016-9389-5}}

	@article{RebuffiKL16,
	author = {Sylvestre{-}Alvise Rebuffi and
	Alexander Kolesnikov and
	Christoph H. Lampert},
	title = {iCaRL: Incremental Classifier and Representation Learning},
	journal = {CoRR},
	volume = {abs/1611.07725},
	year = {2016},
	url = {http://arxiv.org/abs/1611.07725},
	archivePrefix = {arXiv},
	eprint = {1611.07725},
	timestamp = {Wed, 07 Jun 2017 14:42:11 +0200},
	biburl = {https://dblp.org/rec/bib/journals/corr/RebuffiKL16},
	bibsource = {dblp computer science bibliography, https://dblp.org}
	}

	@article{Kemker17,
	author = {Ronald Kemker and
	Christopher Kanan},
	title = {FearNet: Brain-Inspired Model for Incremental Learning},
	journal = {CoRR},
	volume = {abs/1711.10563},
	year = {2017},
	url = {http://arxiv.org/abs/1711.10563},
	archivePrefix = {arXiv},
	eprint = {1711.10563},
	timestamp = {Mon, 04 Dec 2017 18:34:59 +0100},
	biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-10563},
	bibsource = {dblp computer science bibliography, https://dblp.org}
	}

	@inproceedings{DiCosmo17,
	author = {Di Cosmo, Roberto and Stefano Zacchiroli},
	title = {Software Heritage: Why and How to Preserve Software Source Code},
	abstract = {Software is now a key component present in all aspects of our society. Its preservation has attracted growing attention over the past years within the digital preservation community. We claim that source code ``the only representation of software that contains human readable knowledge'' is a precious digital object that needs special handling: it must be a first class citizen in the preservation landscape and we need to take action immediately, given the increasingly more frequent incidents that result in permanent losses of source code collections. In this paper we present Software Heritage, an ambitious initiative to collect, preserve, and share the entire corpus of publicly accessible software source code. We discuss the archival goals of the project, its use cases and role as a participant in the broader digital preservation ecosystem, and detail its key design decisions. We also report on the project road map and the current status of the Software Heritage archive that, as of early 2017, has collected more than 3 billion unique source code files and 700 million commits coming from more than 50 million software development projects.},
	year = {2017},
	booktitle = {iPRES 2017: 14th International Conference on Digital Preservation},
	}

File Metadata

Mime Type: text/plain
Expires: Jun 4 2025, 7:19 PM (9 w, 6 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3399059

bib-rapport.bibNo OneTemporaryActions

bib-rapport.bibView Options

File Metadata

Event Timeline

bib-rapport.bib
No OneTemporary
Actions

bib-rapport.bib
View Options