NORDCAN - incidences and numbers

Below components to a R-script, which will extract age-gender specific rates and proportions from Nordic databases divided by year and geography.

Link to NORDCAN website: >click here<

library(XML)

#Source
#http://www-dep.iarc.fr/NORDCAN/DK/glossary.htm#ASR

#Table mapping ICD codes and cancer types

# ICD-10 Label
# C00-14\C10.1 Lip, oral cavity and pharynx
# C00.0-2,C00.5-9 Lip
# C00.3-4,C02-04,C05.0,C06 Oral cavity
# C01,C05.1-9,C09,C10.0,C10.2-9 Oropharynx
# C07-08 Salivary glands
# C11 Nasopharynx
# C12-13 Hypopharynx
# C14 Pharynx, ill-defined
# C15 Oesophagus
# C16 Stomach
# C17 Small intestine
# C18 Colon
# C18-21 Colorectal
# C19-21 Rectum and anus
# C22 Liver
# C23-24 Gallbladder
# C25 Pancreas
# C26,C39,C76-80, C97,D47 Unknown and ill-defined
# C30-31 Nose, sinuses
# C32+C10.1 Larynx
# C33-34 Lung
# C37,C38.0-3,C38.8,C45.1-9,C46.2-9,C47-48,C74,C75.0,C75.4-9,C88,D46 Other specified cancers
# C38.4+C45.0 Pleura
# C40-41 Bone
# C43 Melanoma of skin
# C44+C46.0 Skin, non-melanoma
# C49+C46.1 Soft tissues
# C50 Breast
# C51-52,C57.7-9 Other female genital organs
# C53 Cervix uteri
# C54 Corpus uteri
# C55+C58 Uterus, other
# C56,C57.0-4 Ovary etc.
# C60+C63 Penis etc.
# C61 Prostate
# C62 Testis
# C64 Kidney
# C65-68+D09.0-1+ D30.1-9+D41.1-9 Bladder etc.
# C69 Eye
# C70-72+C75.1-3+D32-33+D35.2-4,D42-43,D44.3-5 Brain, central nervous system
# C73 Thyroid
# C81 Hodgkin lymphoma
# C82-85,C96 Non-Hodgkin lymphoma
# C90 Multiple myeloma
# C91-95 Leukaemia
# C91.0 Acute lymphatic leukaemia
# C91.1 Chronic lymphatic leukaemia
# C91.2-9 Other and unspecified lymphatic leukaemia
# C92.0+C93.0+C94.0+C94.2+C94.4-5 Acute myeloid leukaemia
# C92.1+C93.1+C94.1 Chronic myeloid leukaemia
# C92.2-9+C93.2-9+C94.3+C94.7 Other and unspecified myeloid leukaemia
# C95 Leukaemia, cell unspecified
# CXX.X+ D09.0-1+D30.1-9+D35.2-4+D41.1-9+D32-33+D42-43+D44.3-5+D46-47 All sites
# CXX.X\(C44+C46.0)+D09.0-1+D30.1-9+D35.2-4+D41.1-9+D32-33+D42-43+D44.3-5+D46-47 All sites but non-melanoma skin cancer
# CXX.X\(C44+C46.0+C50+C61)+D09.0-1+D30.1-9+D35.2-4+D41.1-9+D32-33+D42-43+D44.3-5+D46-47 All sites but non-melanoma skin, breast and prostate cancer

#Coding for age specific numbers and rates (total, 5 year intervals from 0-80, 85+)
#Table names
#"Cancer" "Total"  "0-"     "5-"     "10-"    "15-"    "20-"    "25-"    "30-"    "35-"    "40-"    "45-"    "50-"    "55-"   "60-"    "65-"    "70-"    "75-"    "80-"    "85+"    "CR"     "ASR(W)" "ASR(E)" "ASR(N)"

#Registry (Geografi - se forneden)

#Sort (0 by cancer, 1 by ICD code)

#Sex (0 both, 1 males, 2 females)

#Type (0 incidence, 1 mortality)

#Stat (0 numbers, 1 rate /100.000)

#Period (years, depends on geography)

#Extract cancer incidence table, national level (Denmark registry 208)
incidence = "http://www-dep.iarc.fr/NORDCAN/english/Table4r.asp?registry=208&sort=0&sex=2&type=0&stat=0&period=1999&submit=Execute"
incidence.table = readHTMLTable(incidence, header=T, which=2,stringsAsFactors=F)
#Extract cancer mortality table, national level (Denmark registry 208)
mortality= "http://www-dep.iarc.fr/NORDCAN/english/Table4r.asp?registry=208&sort=0&sex=2&type=1&stat=0&period=1999&submit=Execute"
mortality.table = readHTMLTable(mortality, header=T, which=2,stringsAsFactors=F)

#Extract cancer prevalence (National level, age-intervals)
#Cancer encoding (danish): 
#Akut lymfatisk leukæmi, 420
#Akut myeloid leukæmi, 450
#Alle kræftformer, 510
#Alle kræftformer undtagen anden hud, 520
#Alle kræftformer undtagen anden hud, bryst og prostata, 530
#Anden hud (ikke modermærke), 320
#Anden og uspecificeret lymfatisk leukæmi, 440
#Anden og uspecificeret myeloid leukæmi, 470
#Andre specificerede, 490
#Bindevæv, 370
#Blære og andre urinveje, 300
#Bryst, 200
#Bugspytkirtel, 150
#Endetarm og anus, 120
#Galdeblære og galdeveje, 140
#Hjerne og centralnervesystem, 340
#Hodgkins lymfom, 390
#Hypopharynx, 60
#Knogle, 360
#Kronisk lymfatisk leukæmi, 430
#Kronisk myeloid leukæmi, 460
#Leukæmi, 410
#Leukæmi, uspecificerede celler, 480
#Lever, 130
#Livmoder, 222
#Livmoder uden specifikation 232
#Livmoderhals, 212
#Lunge (inkl. luftrør), 180
#Lungehinde, 190
#Læbe, 10
#Læbe, mundhule og svælg, 540
#Mave, 90
#Modermærkekræft, 310
#Mundhule, 20
#Myelomatose, 400
#Nasopharynx, 50
#Non- Hodgkin lymfom, 380
#Nyre, 290
#Næse og bihuler, 160
#Oropharynx, 40
#Penis og andre mandlige kønsorganer, 281
#Pharynx, dårligt defineret, 70
#Prostata, 261
#Skjoldbruskkirtel, 350
#Spiserør, 80
#Spytkirtel, 30
#Strube, 170
#Testikel, 271
#Tyk- og endetarm, 550
#Tyktarm, 110
#Tyndtarm, 100
#Ukendte og dårligt definerede, 500
#Æggestok, æggeleder mv., 242
#Øje, 330
#Øvrige kvindelige kønsorganer, 252
#
prevalence="http://www-dep.iarc.fr/NORDCAN/english/table11.asp?cancer=510&sex=1&registry=208&sYear=1963&eYear=2014&stat=2&age_from=1&age_to=18&submit=Execute"
#Cancer (number coded, sYear as incidence/mortality, eYear (2014 limit as of 11/6-2017)), stat (0, 2 raw number /proportion per 100.000, 3 raw number /proportion W, 4 raw number /proportion E, 5 raw number/proportion N)
prevalence.table=readHTMLTable(prevalence, header=T, which=2, stringsAsFactors=F,skip.rows = 1)
names(prevalence.table)<-c ear="" font="" number="" otal="" prop="" year="">

#Cancer survival (Nation level, year intervals)
survival="http://www-dep.iarc.fr/NORDCAN/english/table23.asp?registry=208&sex=1&time=1&sort=0&submit=Execute"
survival.table=readHTMLTable(survival, header=T, which=2, stringsAsFactors=F,skip.rows = 1)
names(survival.table)<-c ancer="" font="" number="">
                         "1975-1979 (number)","1975-1979 (CI)","1980-1984 (number)","1980-1984 (CI)",
                         "1985-1989 (number)","1985-1989 (CI)","1990-1994 (number)","1990-1994 (CI)",
                         "1995-1999 (number)","1995-1999 (CI)","2000-2004 (number)","2000-2004 (CI)",
                         "2005-2009 (number)","2005-2009 (CI)","2010-2014 (number)","2010-2014 (CI)")

#Population numbers per gender (National level/region level, five year age intervals and total)
pop="http://www-dep.iarc.fr/NORDCAN/English/graph5.asp?registry=208&period=2014&grid=1&submit=Execute"
pop.table=readHTMLTable(pop, header=T, which=2, stringsAsFactors=F,skip.rows = 1)

#National data Denmark (registry 208)
#Regional information Denmark (registry 2080 til 2084)

#Scandinavia (Norden registry 0, years 1960 ->)

#Faroe Islands (registry 234, years 1960 ->)

#Greenland (registry 304, years 1968 ->)

#Iceland (registry 352, years 1955 ->)

#Sweden (registry 752, years 1960 ->)

#Norway (registry 578, years 1953 ->)

#Finland (registry 246, years 1953 ->)

Comments

Popular Posts