# This file describes to datatool how to generate
# the autogenerated_extended_cleanup files.
# This is used very similarly to autogenerated_cleanup.txt, so
# look in that file for a description of most of the things you need to know.

# Notice the hard-coded paths in here.  It's likely that you'll want
# to change these to put the code into the right place in your own
# build directory.
# ( TODO: Hard-coded paths are bad coding style.  I should fix this when I get the chance. )
output_header_file "./autogenerated_extended_cleanup.hpp"
output_source_file "./autogenerated_extended_cleanup.cpp"
output_class_name ncbi::objects::CAutogeneratedExtendedCleanup

# The auto-generated class needs a pointer to the class containing all the cleanup functions
member { CNewCleanup_imp & m_NewCleanup }

root Seq-entry  ExtendedCleanupSeqEntry
root Seq-submit ExtendedCleanupSeqSubmit
root Seq-annot  ExtendedCleanupSeqAnnot
root Bioseq     ExtendedCleanupBioseq
root Bioseq-set ExtendedCleanupBioseqSet
root Seq-feat   ExtendedCleanupSeqFeat

# You can use angle brackets or double-quotes
header_include "newcleanupp.hpp"
# Yes, you need the following line because the auto-generator doesn't
# necessarily know exactly how you want to include the .hpp file
source_include "autogenerated_extended_cleanup.hpp"
source_include "cleanup_utils.hpp"
source_include "autogenerated_cleanup_extra.hpp"
source_include <objects/misc/sequence_macros.hpp>

# Some fields are deprecated and their accessor may be private, throw an
# exception, or be otherwise unusable.

deprecated {
    Variation-ref.population-data ,
    Variation-ref.validated ,
    Variation-ref.clinical-test ,
    Variation-ref.allele-origin ,
    Variation-ref.allele-state ,
    Variation-ref.allele-frequency ,
    Variation-ref.is-ancestral-allele ,
    Variation-ref.pub ,
    Variation-ref.location ,
    Variation-ref.ext-locs ,
    Variation-ref.ext
}

# Don't forget: order matters!

# some string cleanups that Basic does not do but extended does
use m_NewCleanup.x_TrimInternalSemicolonsMarkChanged {
    OrgName.attrib ,
    OrgName.lineage ,
    GB-block.origin ,
    Gene-ref.maploc ,
    Gene-ref.locus-tag ,
    Seq-feat.title ,
    Imp-feat.key ,
    Imp-feat.loc ,
    Pubdesc.comment ,
}

use m_NewCleanup.x_BioseqSetEC
{
    Bioseq-set
}

use m_NewCleanup.x_SortSeqDescs {
    POST Seq-entry
}

use m_NewCleanup.x_MergeDupBioSources {
    Bioseq.descr ,
    Bioseq-set.descr
}

use m_NewCleanup.x_RemoveDupBioSource {
    Bioseq ,
    Bioseq-set
}

use m_NewCleanup.x_FixStructuredCommentKeywords {
    Bioseq
}

use m_NewCleanup.x_RemoveProtDescThatDupsProtName
{
    Prot-ref
}

use m_NewCleanup.ProtRefEC 
{
    Prot-ref
}

use m_NewCleanup.x_BondEC { Seq-feat }
 
use m_NewCleanup.x_tRNAEC { Seq-feat }

use m_NewCleanup.x_RemoveRedundantComment
{
    Seq-feat.data.gene AND Seq-feat
}

use m_NewCleanup.x_ExceptTextEC { Seq-feat.except-text }

use m_NewCleanup.x_RemoveEmptyUserObject
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.KeepLatestDateDesc
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.x_CleanupGenbankBlock { Bioseq, Bioseq-set }

use m_NewCleanup.x_CleanupGenbankBlock
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.x_RemoveOldDescriptors
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.x_RemoveDupPubs
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.x_RemoveEmptyDescriptors
{
    Bioseq.descr,
    Bioseq-set.descr
}

use m_NewCleanup.CdRegionEC { Seq-feat }
use m_NewCleanup.x_ExtendProteinFeatureOnProteinSeq { Bioseq }
use m_NewCleanup.MoveDbxrefs { Seq-feat }
use m_NewCleanup.MoveStandardName { Seq-feat }

use m_NewCleanup.CreatePubFromFeat { Seq-feat }

use m_NewCleanup.MoveCitationQuals { Bioseq }
use m_NewCleanup.CreateMissingMolInfo { Bioseq }

use m_NewCleanup.x_ExtendSingleGeneOnMrna { Bioseq }

use m_NewCleanup.ResynchProteinPartials { Seq-feat }
use m_NewCleanup.ResynchPeptidePartials { POST Bioseq }
use m_NewCleanup.RemoveBadProteinTitle { POST Bioseq }

use m_NewCleanup.AddProteinTitles { POST Bioseq }

use m_NewCleanup.x_RemoveUnseenTitles { Bioseq, Bioseq-set }

use m_NewCleanup.x_MoveSeqdescOrgToSourceOrg { Seqdesc }
use m_NewCleanup.x_MoveSeqfeatOrgToSourceOrg { Seq-feat }

use m_NewCleanup.BioSourceEC { BioSource }

use m_NewCleanup.x_RemoveEmptyFeatures
{
    Seq-annot
}

use m_NewCleanup.x_MoveCDSFromNucAnnotToSetAnnot { Bioseq-set }

use m_NewCleanup.x_RescueMolInfo { Bioseq }

use m_NewCleanup.x_RemoveOldFeatures { Bioseq }

use m_NewCleanup.x_RemoveEmptyFeatureTables
{
    Bioseq,
    Bioseq-set
}

use m_NewCleanup.x_MergeAdjacentFeatureTables
{
    Bioseq,
    Bioseq-set
}

use  m_NewCleanup.x_MovePopPhyMutPub
{
    Bioseq-set
}

#remove empty descr set after other cleaning steps are done
use m_NewCleanup.x_ClearEmptyDescr {
    POST Bioseq-set,
    POST Bioseq
}

#convert seq-entry set that contains only one sequence to a sequence seq-entry
use m_NewCleanup.x_SingleSeqSetToSeq { POST Bioseq-set }

